Example #1
0
        public static XgbModel GetXgbModelFromFile(string fileName, int maxTrees)
        {
            XgbModel xgbModel = new XgbModel();

            if (!File.Exists(fileName))
            {
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine(String.Format("Error: File {0} does not exist.",fileName));
                Console.ResetColor();
                return null;
            }


            Console.ResetColor();
            Console.WriteLine(String.Format("Parsing {0}", fileName));

            int numTree = 0;

            var fileInfo = new FileInfo(fileName);
            GlobalStats.ModelFileSize = fileInfo.Length;

            using (StreamReader sr = new StreamReader(fileName))
            {
                while(!sr.EndOfStream) {
                    

                    var line = sr.ReadLine().Trim();
                    if (line.StartsWith("booster") || line == String.Empty)
                    {
                        if (xgbNodeList.Count > 0)
                        {
                            numTree++;
                            Console.ForegroundColor = ConsoleColor.DarkGreen;
                            Console.Write(String.Format("Constructing tree #{0} ", numTree));


                            XgbTree tree = new XgbTree(xgbNodeList[0]);
                            ConstructXgbTree(tree);

                            Console.WriteLine(String.Format("=> depth: {0} ({1} nodes)", tree.Height, xgbNodeList.Count));
                            Console.ResetColor();

                            xgbModel.XgbTrees.Add(tree);
                            xgbNodeList.Clear();
                            if (numTree == maxTrees) break;
                        }
                    }
                    else
                    {
                        var node = ParseXgbTreeNode(line);
                        if (node == null)
                        {
                            return null;
                        }
                        xgbNodeList.Add(node.Number, node);                      
                    }
                }
            }
            if (xgbNodeList.Count > 0 && (maxTrees < 0 || numTree < maxTrees))
            {
                numTree++;
                Console.ForegroundColor = ConsoleColor.DarkGreen;
                Console.Write(String.Format("Constructing tree #{0} ", numTree));


                XgbTree tree = new XgbTree(xgbNodeList[0]);
                ConstructXgbTree(tree);

                Console.WriteLine(String.Format("=> depth: {0} ({1} nodes)", tree.Height, xgbNodeList.Count));
                Console.ResetColor();

                xgbModel.XgbTrees.Add(tree);
                xgbNodeList.Clear();
            }
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine(String.Format("{0} trees has been constructed.\n", xgbModel.NumTrees));
            Console.ResetColor();
            return xgbModel;
        }
Example #2
0
        private void CollectFeatureInteractions(XgbTree tree, HashSet<XgbTreeNode> currentInteraction, double currentGain, double currentCover, double pathProbability, int depth, int deepening)
        {
            if (tree.IsLeafNode)
            {
                return;
            }

            currentInteraction.Add(tree.Data);
            currentGain += tree.Data.Gain;
            currentCover += tree.Data.Cover;

            var pathProbabilityLeft = pathProbability * (((XgbTree)tree.Left).Data.Cover / tree.Data.Cover);
            var pathProbabilityRight = pathProbability * (((XgbTree)tree.Right).Data.Cover / tree.Data.Cover);

            var fi = new FeatureInteraction(currentInteraction, currentGain, currentCover, pathProbability, depth, _treeIndex, 1);

            if (depth < _maxDeepening || _maxDeepening < 0)
            {
                var newInteractionLeft = new HashSet<XgbTreeNode>() { };
                var newInteractionRight = new HashSet<XgbTreeNode>() { };

                CollectFeatureInteractions((XgbTree)tree.Left, newInteractionLeft, 0, 0, pathProbabilityLeft, depth + 1, deepening + 1);
                CollectFeatureInteractions((XgbTree)tree.Right, newInteractionRight, 0, 0, pathProbabilityRight, depth + 1, deepening + 1);
            }

            var path = string.Join("-", currentInteraction.Select(x => x.Number));

            if (!_treeFeatureInteractions.ContainsKey(fi.Name))
            {
                _treeFeatureInteractions.Add(fi.Name, fi);
                _pathMemo.Add(path);
            }
            else
            {
                if(_pathMemo.Contains(path))
                {
                    return;
                }

                _pathMemo.Add(path);
                var tfi = _treeFeatureInteractions[fi.Name];
                tfi.Gain += currentGain;
                tfi.Cover += currentCover;
                tfi.FScore += 1;
                tfi.FScoreWeighted += pathProbability;
                tfi.AverageFScoreWeighted = tfi.FScoreWeighted / tfi.FScore;
                tfi.AverageGain = tfi.Gain / tfi.FScore;
                tfi.ExpectedGain += currentGain * pathProbability;
                tfi.TreeDepth += depth;
                tfi.AverageTreeDepth = tfi.TreeDepth / tfi.FScore;
                tfi.TreeIndex += _treeIndex;
                tfi.AverageTreeIndex = tfi.TreeIndex / tfi.FScore;
                tfi.SplitValueHistogram.Merge(fi.SplitValueHistogram);
            }

            if (currentInteraction.Count - 1 == _maxInteractionDepth)
                return;

            var currentInteractionLeft = new HashSet<XgbTreeNode>(currentInteraction);
            var currentInteractionRight = new HashSet<XgbTreeNode>(currentInteraction);

            var leftTree = (XgbTree)(tree.Left);
            var rightTree = (XgbTree)(tree.Right);

            if (leftTree.IsLeafNode && deepening == 0)
            {
                var tfi = _treeFeatureInteractions[fi.Name];
                tfi.SumLeafValuesLeft += leftTree.Data.LeafValue;
                tfi.SumLeafCoversLeft += leftTree.Data.Cover;
                tfi.HasLeafStatistics = true;
            }

            if (rightTree.IsLeafNode && deepening == 0)
            {
                var tfi = _treeFeatureInteractions[fi.Name];
                tfi.SumLeafValuesRight += rightTree.Data.LeafValue;
                tfi.SumLeafCoversRight += rightTree.Data.Cover;
                tfi.HasLeafStatistics = true;
            }

            CollectFeatureInteractions((XgbTree)tree.Left, currentInteractionLeft, currentGain, currentCover, pathProbabilityLeft, depth + 1, deepening);
            CollectFeatureInteractions((XgbTree)tree.Right, currentInteractionRight, currentGain, currentCover, pathProbabilityRight, depth + 1, deepening);
        }
Example #3
0
        private static void ConstructXgbTree(XgbTree tree)
        {
            if (tree.Data.LeftChild != null)
            {
                tree.Add(new XgbTree(xgbNodeList[(int)tree.Data.LeftChild]));
                ConstructXgbTree((XgbTree)tree.Left);
            }


            if (tree.Data.RightChild != null)
            {
                tree.Add(new XgbTree(xgbNodeList[(int)tree.Data.RightChild]));
                ConstructXgbTree((XgbTree)tree.Right);
            }

        }
Example #4
0
        private void CollectFeatureInteractions(XgbTree tree, HashSet<XgbTreeNode> currentInteraction, double currentGain, double currentCover, double pathProbability, int depth)
        {
            if (tree.IsLeafNode)
                return;

            currentInteraction.Add(tree.Data);
            currentGain += tree.Data.Gain;
            currentCover += tree.Data.Cover;

            var pathProbabilityLeft = pathProbability * (((XgbTree)tree.Left).Data.Cover / tree.Data.Cover);
            var pathProbabilityRight = pathProbability * (((XgbTree)tree.Right).Data.Cover / tree.Data.Cover);

            var fi = new FeatureInteraction(currentInteraction, currentGain, currentCover, pathProbability, 1);

            if (depth < _maxDeepening || _maxDeepening < 0)
            {
                var newInteractionLeft = new HashSet<XgbTreeNode>() { };
                var newInteractionRight = new HashSet<XgbTreeNode>() { };

                CollectFeatureInteractions((XgbTree)tree.Left, newInteractionLeft, 0, 0, pathProbabilityLeft, depth + 1);
                CollectFeatureInteractions((XgbTree)tree.Right, newInteractionRight, 0, 0, pathProbabilityRight, depth + 1);
            }

            var path = string.Join("-", currentInteraction.Select(x => x.Number));

            if (!_treeFeatureInteractions.ContainsKey(fi.Name))
            {
                _treeFeatureInteractions.Add(fi.Name, fi);
                _pathMemo.Add(path);
            }
            else
            {
                // reoccurrence?
                if (!_pathMemo.Contains(path))
                {
                    _pathMemo.Add(path);
                    var tfi = _treeFeatureInteractions[fi.Name];
                    tfi.Gain += currentGain;
                    tfi.Cover += currentCover;
                    tfi.FScore += 1;
                    tfi.FScoreWeighted += pathProbability;
                    tfi.AverageFScoreWeighted = tfi.FScoreWeighted / tfi.FScore;
                    tfi.AverageGain = tfi.Gain / tfi.FScore;
                    tfi.ExpectedGain += currentGain * pathProbability;
                }

                return;
            }

            if (currentInteraction.Count - 1 == _maxInteractionDepth)
                return;

            var currentInteractionLeft = new HashSet<XgbTreeNode>(currentInteraction);
            var currentInteractionRight = new HashSet<XgbTreeNode>(currentInteraction);

            CollectFeatureInteractions((XgbTree)tree.Left, currentInteractionLeft, currentGain, currentCover, pathProbabilityLeft, depth + 1);
            CollectFeatureInteractions((XgbTree)tree.Right, currentInteractionRight, currentGain, currentCover, pathProbabilityRight, depth + 1);
        }