예제 #1
0
        private static void ConstructXgbTree(XgbTree tree)
        {
            if (tree.Data.LeftChild != null)
            {
                tree.Add(new XgbTree(xgbNodeList[(int)tree.Data.LeftChild]));
                ConstructXgbTree((XgbTree)tree.Left);
            }


            if (tree.Data.RightChild != null)
            {
                tree.Add(new XgbTree(xgbNodeList[(int)tree.Data.RightChild]));
                ConstructXgbTree((XgbTree)tree.Right);
            }
        }
예제 #2
0
        public static XgbModel GetXgbModelFromFile(string fileName, int maxTrees)
        {
            XgbModel xgbModel = new XgbModel();

            if (!File.Exists(fileName))
            {
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine(String.Format("Error: File {0} does not exist.", fileName));
                Console.ResetColor();
                return(null);
            }


            Console.ResetColor();
            Console.WriteLine(String.Format("Parsing {0}", fileName));

            int numTree = 0;

            var fileInfo = new FileInfo(fileName);

            GlobalStats.ModelFileSize = fileInfo.Length;

            using (StreamReader sr = new StreamReader(fileName))
            {
                while (!sr.EndOfStream)
                {
                    var line = sr.ReadLine().Trim();
                    if (line.StartsWith("booster") || line == String.Empty)
                    {
                        if (xgbNodeList.Count > 0)
                        {
                            numTree++;
                            Console.ForegroundColor = ConsoleColor.DarkGreen;
                            Console.Write(String.Format("Constructing tree #{0} ", numTree));


                            XgbTree tree = new XgbTree(xgbNodeList[0]);
                            ConstructXgbTree(tree);

                            Console.WriteLine(String.Format("=> depth: {0} ({1} nodes)", tree.Height, xgbNodeList.Count));
                            Console.ResetColor();

                            xgbModel.XgbTrees.Add(tree);
                            xgbNodeList.Clear();
                            if (numTree == maxTrees)
                            {
                                break;
                            }
                        }
                    }
                    else
                    {
                        var node = ParseXgbTreeNode(line);
                        if (node == null)
                        {
                            return(null);
                        }
                        xgbNodeList.Add(node.Number, node);
                    }
                }
            }
            if (xgbNodeList.Count > 0 && (maxTrees < 0 || numTree < maxTrees))
            {
                numTree++;
                Console.ForegroundColor = ConsoleColor.DarkGreen;
                Console.Write(String.Format("Constructing tree #{0} ", numTree));


                XgbTree tree = new XgbTree(xgbNodeList[0]);
                ConstructXgbTree(tree);

                Console.WriteLine(String.Format("=> depth: {0} ({1} nodes)", tree.Height, xgbNodeList.Count));
                Console.ResetColor();

                xgbModel.XgbTrees.Add(tree);
                xgbNodeList.Clear();
            }
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine(String.Format("{0} trees has been constructed.\n", xgbModel.NumTrees));
            Console.ResetColor();
            return(xgbModel);
        }
예제 #3
0
        private void CollectFeatureInteractions(XgbTree tree, HashSet <XgbTreeNode> currentInteraction, double currentGain, double currentCover, double pathProbability, int depth, int deepening)
        {
            if (tree.IsLeafNode)
            {
                return;
            }

            currentInteraction.Add(tree.Data);
            currentGain  += tree.Data.Gain;
            currentCover += tree.Data.Cover;

            var pathProbabilityLeft  = pathProbability * (((XgbTree)tree.Left).Data.Cover / tree.Data.Cover);
            var pathProbabilityRight = pathProbability * (((XgbTree)tree.Right).Data.Cover / tree.Data.Cover);

            var fi = new FeatureInteraction(currentInteraction, currentGain, currentCover, pathProbability, 1);

            if (depth < _maxDeepening || _maxDeepening < 0)
            {
                var newInteractionLeft = new HashSet <XgbTreeNode>()
                {
                };
                var newInteractionRight = new HashSet <XgbTreeNode>()
                {
                };

                CollectFeatureInteractions((XgbTree)tree.Left, newInteractionLeft, 0, 0, pathProbabilityLeft, depth + 1, deepening + 1);
                CollectFeatureInteractions((XgbTree)tree.Right, newInteractionRight, 0, 0, pathProbabilityRight, depth + 1, deepening + 1);
            }

            var path = string.Join("-", currentInteraction.Select(x => x.Number));

            if (!_treeFeatureInteractions.ContainsKey(fi.Name))
            {
                _treeFeatureInteractions.Add(fi.Name, fi);
                _pathMemo.Add(path);
            }
            else
            {
                // reoccurrence?
                if (_pathMemo.Contains(path))
                {
                    return;
                }

                _pathMemo.Add(path);
                var tfi = _treeFeatureInteractions[fi.Name];
                tfi.Gain                 += currentGain;
                tfi.Cover                += currentCover;
                tfi.FScore               += 1;
                tfi.FScoreWeighted       += pathProbability;
                tfi.AverageFScoreWeighted = tfi.FScoreWeighted / tfi.FScore;
                tfi.AverageGain           = tfi.Gain / tfi.FScore;
                tfi.ExpectedGain         += currentGain * pathProbability;
            }

            if (currentInteraction.Count - 1 == _maxInteractionDepth)
            {
                return;
            }


            var currentInteractionLeft  = new HashSet <XgbTreeNode>(currentInteraction);
            var currentInteractionRight = new HashSet <XgbTreeNode>(currentInteraction);

            var leftTree  = (XgbTree)(tree.Left);
            var rightTree = (XgbTree)(tree.Right);

            if (leftTree.IsLeafNode && deepening == 0)
            {
                var tfi = _treeFeatureInteractions[fi.Name];
                tfi.SumLeafValuesLeft += leftTree.Data.LeafValue;
                tfi.SumLeafCoversLeft += leftTree.Data.Cover;
                tfi.HasLeafStatistics  = true;
            }

            if (rightTree.IsLeafNode && deepening == 0)
            {
                var tfi = _treeFeatureInteractions[fi.Name];
                tfi.SumLeafValuesRight += rightTree.Data.LeafValue;
                tfi.SumLeafCoversRight += rightTree.Data.Cover;
                tfi.HasLeafStatistics   = true;
            }

            CollectFeatureInteractions((XgbTree)tree.Left, currentInteractionLeft, currentGain, currentCover, pathProbabilityLeft, depth + 1, deepening);
            CollectFeatureInteractions((XgbTree)tree.Right, currentInteractionRight, currentGain, currentCover, pathProbabilityRight, depth + 1, deepening);
        }