public static XgbModel GetXgbModelFromFile(string fileName, int maxTrees) { XgbModel xgbModel = new XgbModel(); if (!File.Exists(fileName)) { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine(String.Format("Error: File {0} does not exist.",fileName)); Console.ResetColor(); return null; } Console.ResetColor(); Console.WriteLine(String.Format("Parsing {0}", fileName)); int numTree = 0; var fileInfo = new FileInfo(fileName); GlobalStats.ModelFileSize = fileInfo.Length; using (StreamReader sr = new StreamReader(fileName)) { while(!sr.EndOfStream) { var line = sr.ReadLine().Trim(); if (line.StartsWith("booster") || line == String.Empty) { if (xgbNodeList.Count > 0) { numTree++; Console.ForegroundColor = ConsoleColor.DarkGreen; Console.Write(String.Format("Constructing tree #{0} ", numTree)); XgbTree tree = new XgbTree(xgbNodeList[0]); ConstructXgbTree(tree); Console.WriteLine(String.Format("=> depth: {0} ({1} nodes)", tree.Height, xgbNodeList.Count)); Console.ResetColor(); xgbModel.XgbTrees.Add(tree); xgbNodeList.Clear(); if (numTree == maxTrees) break; } } else { var node = ParseXgbTreeNode(line); if (node == null) { return null; } xgbNodeList.Add(node.Number, node); } } } if (xgbNodeList.Count > 0 && (maxTrees < 0 || numTree < maxTrees)) { numTree++; Console.ForegroundColor = ConsoleColor.DarkGreen; Console.Write(String.Format("Constructing tree #{0} ", numTree)); XgbTree tree = new XgbTree(xgbNodeList[0]); ConstructXgbTree(tree); Console.WriteLine(String.Format("=> depth: {0} ({1} nodes)", tree.Height, xgbNodeList.Count)); Console.ResetColor(); xgbModel.XgbTrees.Add(tree); xgbNodeList.Clear(); } Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine(String.Format("{0} trees has been constructed.\n", xgbModel.NumTrees)); Console.ResetColor(); return xgbModel; }
private void CollectFeatureInteractions(XgbTree tree, HashSet<XgbTreeNode> currentInteraction, double currentGain, double currentCover, double pathProbability, int depth, int deepening) { if (tree.IsLeafNode) { return; } currentInteraction.Add(tree.Data); currentGain += tree.Data.Gain; currentCover += tree.Data.Cover; var pathProbabilityLeft = pathProbability * (((XgbTree)tree.Left).Data.Cover / tree.Data.Cover); var pathProbabilityRight = pathProbability * (((XgbTree)tree.Right).Data.Cover / tree.Data.Cover); var fi = new FeatureInteraction(currentInteraction, currentGain, currentCover, pathProbability, depth, _treeIndex, 1); if (depth < _maxDeepening || _maxDeepening < 0) { var newInteractionLeft = new HashSet<XgbTreeNode>() { }; var newInteractionRight = new HashSet<XgbTreeNode>() { }; CollectFeatureInteractions((XgbTree)tree.Left, newInteractionLeft, 0, 0, pathProbabilityLeft, depth + 1, deepening + 1); CollectFeatureInteractions((XgbTree)tree.Right, newInteractionRight, 0, 0, pathProbabilityRight, depth + 1, deepening + 1); } var path = string.Join("-", currentInteraction.Select(x => x.Number)); if (!_treeFeatureInteractions.ContainsKey(fi.Name)) { _treeFeatureInteractions.Add(fi.Name, fi); _pathMemo.Add(path); } else { if(_pathMemo.Contains(path)) { return; } _pathMemo.Add(path); var tfi = _treeFeatureInteractions[fi.Name]; tfi.Gain += currentGain; tfi.Cover += currentCover; tfi.FScore += 1; tfi.FScoreWeighted += pathProbability; tfi.AverageFScoreWeighted = tfi.FScoreWeighted / tfi.FScore; tfi.AverageGain = tfi.Gain / tfi.FScore; tfi.ExpectedGain += currentGain * pathProbability; tfi.TreeDepth += depth; tfi.AverageTreeDepth = tfi.TreeDepth / tfi.FScore; tfi.TreeIndex += _treeIndex; tfi.AverageTreeIndex = tfi.TreeIndex / tfi.FScore; tfi.SplitValueHistogram.Merge(fi.SplitValueHistogram); } if (currentInteraction.Count - 1 == _maxInteractionDepth) return; var currentInteractionLeft = new HashSet<XgbTreeNode>(currentInteraction); var currentInteractionRight = new HashSet<XgbTreeNode>(currentInteraction); var leftTree = (XgbTree)(tree.Left); var rightTree = (XgbTree)(tree.Right); if (leftTree.IsLeafNode && deepening == 0) { var tfi = _treeFeatureInteractions[fi.Name]; tfi.SumLeafValuesLeft += leftTree.Data.LeafValue; tfi.SumLeafCoversLeft += leftTree.Data.Cover; tfi.HasLeafStatistics = true; } if (rightTree.IsLeafNode && deepening == 0) { var tfi = _treeFeatureInteractions[fi.Name]; tfi.SumLeafValuesRight += rightTree.Data.LeafValue; tfi.SumLeafCoversRight += rightTree.Data.Cover; tfi.HasLeafStatistics = true; } CollectFeatureInteractions((XgbTree)tree.Left, currentInteractionLeft, currentGain, currentCover, pathProbabilityLeft, depth + 1, deepening); CollectFeatureInteractions((XgbTree)tree.Right, currentInteractionRight, currentGain, currentCover, pathProbabilityRight, depth + 1, deepening); }
private static void ConstructXgbTree(XgbTree tree) { if (tree.Data.LeftChild != null) { tree.Add(new XgbTree(xgbNodeList[(int)tree.Data.LeftChild])); ConstructXgbTree((XgbTree)tree.Left); } if (tree.Data.RightChild != null) { tree.Add(new XgbTree(xgbNodeList[(int)tree.Data.RightChild])); ConstructXgbTree((XgbTree)tree.Right); } }
private void CollectFeatureInteractions(XgbTree tree, HashSet<XgbTreeNode> currentInteraction, double currentGain, double currentCover, double pathProbability, int depth) { if (tree.IsLeafNode) return; currentInteraction.Add(tree.Data); currentGain += tree.Data.Gain; currentCover += tree.Data.Cover; var pathProbabilityLeft = pathProbability * (((XgbTree)tree.Left).Data.Cover / tree.Data.Cover); var pathProbabilityRight = pathProbability * (((XgbTree)tree.Right).Data.Cover / tree.Data.Cover); var fi = new FeatureInteraction(currentInteraction, currentGain, currentCover, pathProbability, 1); if (depth < _maxDeepening || _maxDeepening < 0) { var newInteractionLeft = new HashSet<XgbTreeNode>() { }; var newInteractionRight = new HashSet<XgbTreeNode>() { }; CollectFeatureInteractions((XgbTree)tree.Left, newInteractionLeft, 0, 0, pathProbabilityLeft, depth + 1); CollectFeatureInteractions((XgbTree)tree.Right, newInteractionRight, 0, 0, pathProbabilityRight, depth + 1); } var path = string.Join("-", currentInteraction.Select(x => x.Number)); if (!_treeFeatureInteractions.ContainsKey(fi.Name)) { _treeFeatureInteractions.Add(fi.Name, fi); _pathMemo.Add(path); } else { // reoccurrence? if (!_pathMemo.Contains(path)) { _pathMemo.Add(path); var tfi = _treeFeatureInteractions[fi.Name]; tfi.Gain += currentGain; tfi.Cover += currentCover; tfi.FScore += 1; tfi.FScoreWeighted += pathProbability; tfi.AverageFScoreWeighted = tfi.FScoreWeighted / tfi.FScore; tfi.AverageGain = tfi.Gain / tfi.FScore; tfi.ExpectedGain += currentGain * pathProbability; } return; } if (currentInteraction.Count - 1 == _maxInteractionDepth) return; var currentInteractionLeft = new HashSet<XgbTreeNode>(currentInteraction); var currentInteractionRight = new HashSet<XgbTreeNode>(currentInteraction); CollectFeatureInteractions((XgbTree)tree.Left, currentInteractionLeft, currentGain, currentCover, pathProbabilityLeft, depth + 1); CollectFeatureInteractions((XgbTree)tree.Right, currentInteractionRight, currentGain, currentCover, pathProbabilityRight, depth + 1); }