public void AddTree(RegressionTree tree) => _trees.Add(tree);
 public void AddTreeAt(RegressionTree tree, int index) => _trees.Insert(index, tree);
        public IPredictorProducing <float> CombineModels(IEnumerable <IPredictorProducing <float> > models)
        {
            _host.CheckValue(models, nameof(models));

            var  ensemble         = new Ensemble();
            int  modelCount       = 0;
            int  featureCount     = -1;
            bool binaryClassifier = false;

            foreach (var model in models)
            {
                modelCount++;

                var predictor = model;
                _host.CheckValue(predictor, nameof(models), "One of the models is null");

                var    calibrated = predictor as CalibratedPredictorBase;
                double paramA     = 1;
                if (calibrated != null)
                {
                    _host.Check(calibrated.Calibrator is PlattCalibrator,
                                "Combining FastTree models can only be done when the models are calibrated with Platt calibrator");
                    predictor = calibrated.SubPredictor;
                    paramA    = -(calibrated.Calibrator as PlattCalibrator).ParamA;
                }
                var tree = predictor as FastTreePredictionWrapper;
                if (tree == null)
                {
                    throw _host.Except("Model is not a tree ensemble");
                }
                foreach (var t in tree.TrainedEnsemble.Trees)
                {
                    var bytes    = new byte[t.SizeInBytes()];
                    int position = -1;
                    t.ToByteArray(bytes, ref position);
                    position = -1;
                    var tNew = new RegressionTree(bytes, ref position);
                    if (paramA != 1)
                    {
                        for (int i = 0; i < tNew.NumLeaves; i++)
                        {
                            tNew.SetOutput(i, tNew.LeafValues[i] * paramA);
                        }
                    }
                    ensemble.AddTree(tNew);
                }

                if (modelCount == 1)
                {
                    binaryClassifier = calibrated != null;
                    featureCount     = tree.InputType.ValueCount;
                }
                else
                {
                    _host.Check((calibrated != null) == binaryClassifier, "Ensemble contains both calibrated and uncalibrated models");
                    _host.Check(featureCount == tree.InputType.ValueCount, "Found models with different number of features");
                }
            }

            var scale = 1 / (double)modelCount;

            foreach (var t in ensemble.Trees)
            {
                for (int i = 0; i < t.NumLeaves; i++)
                {
                    t.SetOutput(i, t.LeafValues[i] * scale);
                }
            }

            switch (_kind)
            {
            case PredictionKind.BinaryClassification:
                if (!binaryClassifier)
                {
                    return(new FastTreeBinaryPredictor(_host, ensemble, featureCount, null));
                }

                var cali = new PlattCalibrator(_host, -1, 0);
                return(new FeatureWeightsCalibratedPredictor(_host, new FastTreeBinaryPredictor(_host, ensemble, featureCount, null), cali));

            case PredictionKind.Regression:
                return(new FastTreeRegressionPredictor(_host, ensemble, featureCount, null));

            case PredictionKind.Ranking:
                return(new FastTreeRankingPredictor(_host, ensemble, featureCount, null));

            default:
                _host.Assert(false);
                throw _host.ExceptNotSupp();
            }
        }
Exemple #4
0
 public virtual void AddScores(RegressionTree tree, double multiplier)
 {
     tree.AddOutputsToScores(Dataset, Scores, multiplier);
     SendScoresUpdatedMessage();
 }
Exemple #5
0
 public RegressionTreeVisualizator(RegressionTree tree, Chart chart)
 {
     Tree            = tree;
     RegressionChart = chart;
 }
Exemple #6
0
        /// <summary>
        /// Constructs partitioning object based on the documents and RegressionTree splits
        /// NOTE: It has been optimized for speed and multiprocs with 10x gain on naive LINQ implementation
        /// </summary>
        public DocumentPartitioning(RegressionTree tree, Dataset dataset)
            : this(dataset.NumDocs, tree.NumLeaves)
        {
            using (Timer.Time(TimerEvent.DocumentPartitioningConstruction))
            {
                // figure out which leaf each document belongs to
                // NOTE: break it up into NumThreads chunks. This minimizes the number of re-computations necessary in
                // the row-wise indexer.
                int innerLoopSize = 1 + dataset.NumDocs / BlockingThreadPool.NumThreads; // +1 is to make sure we don't have a few left over at the end

                // figure out the exact number of chunks, needed in pathological cases when NumDocs < NumThreads
                int numChunks = dataset.NumDocs / innerLoopSize;
                if (dataset.NumDocs % innerLoopSize != 0)
                {
                    ++numChunks;
                }
                var perChunkDocumentLists = new List <int> [numChunks][];
                // REVIEW: This partitioning doesn't look optimal.
                // Probably make sence to investigate better ways of splitting data?
                var actions     = new Action[(int)Math.Ceiling(1.0 * dataset.NumDocs / innerLoopSize)];
                var actionIndex = 0;
                for (int docStart = 0; docStart < dataset.NumDocs; docStart += innerLoopSize)
                {
                    var fromDoc    = docStart;
                    var toDoc      = Math.Min(docStart + innerLoopSize, dataset.NumDocs);
                    var chunkIndex = docStart / innerLoopSize;
                    actions[actionIndex++] = () =>
                    {
                        Contracts.Assert(perChunkDocumentLists[chunkIndex] == null);

                        var featureBins = dataset.GetFeatureBinRowwiseIndexer();

                        List <int>[] perLeafDocumentLists = Enumerable.Range(0, tree.NumLeaves)
                                                            .Select(x => new List <int>(innerLoopSize / tree.NumLeaves))
                                                            .ToArray();

                        for (int d = fromDoc; d < toDoc; d++)
                        {
                            int leaf = tree.GetLeaf(featureBins[d]);
                            perLeafDocumentLists[leaf].Add(d);
                        }

                        perChunkDocumentLists[chunkIndex] = perLeafDocumentLists;
                    };
                }
                Parallel.Invoke(new ParallelOptions {
                    MaxDegreeOfParallelism = BlockingThreadPool.NumThreads
                }, actions);

                // establish leaf starts and document counts
                _leafCount = Enumerable.Range(0, tree.NumLeaves)
                             .Select(leaf => Enumerable.Range(0, perChunkDocumentLists.Length)
                                     .Select(thread => perChunkDocumentLists[thread][leaf].Count)
                                     .Sum())
                             .ToArray();

                var cumulativeLength = _leafCount.CumulativeSum <int>().Take(tree.NumLeaves - 1);
                _leafBegin = Enumerable.Range(0, 1).Concat(cumulativeLength).ToArray();

                // move all documents that belong to the same leaf together
                Contracts.Assert(_documents.Length == _leafBegin[tree.NumLeaves - 1] + _leafCount[tree.NumLeaves - 1]);
                actions     = new Action[tree.NumLeaves];
                actionIndex = 0;
                for (int leaf = 0; leaf < tree.NumLeaves; leaf++)
                {
                    var l = leaf;
                    actions[actionIndex++] = () =>
                    {
                        int documentPos = _leafBegin[l];
                        for (int chunkIndex = 0; chunkIndex < perChunkDocumentLists.Length; chunkIndex++)
                        {
                            foreach (int d in perChunkDocumentLists[chunkIndex][l])
                            {
                                _documents[documentPos++] = d;
                            }
                            perChunkDocumentLists[chunkIndex][l] = null;
                        }
                    };
                }
                Parallel.Invoke(new ParallelOptions {
                    MaxDegreeOfParallelism = BlockingThreadPool.NumThreads
                }, actions);
            }
        }
Exemple #7
0
 public void Initialize(RegressionTree tree, DocumentPartitioning partitioning, ScoreTracker previousScores)
 {
     _tree           = tree;
     _partitioning   = partitioning;
     _previousScores = previousScores;
 }
Exemple #8
0
        public void AdjustTreeOutputs(IChannel ch, RegressionTree tree, DocumentPartitioning partitioning,
                                      ScoreTracker previousScores)
        {
            _lo.Initialize(tree, partitioning, previousScores);
            _hi.Initialize(tree, partitioning, previousScores);
            _left.Initialize(tree, partitioning, previousScores);
            _right.Initialize(tree, partitioning, previousScores);

            _lo.Step   = _historicStepSize / _phi;
            _left.Step = _historicStepSize;

            if (_lo.Loss.CompareTo(_left.Loss) == 1) // backtrack
            {
                do
                {
                    Rotate(ref _hi, ref _left, ref _lo);
                    if (_hi.Step <= _minStepSize)
                    {
                        goto FINISHED;
                    }
                    _lo.Step = _left.Step / _phi;
                } while (_lo.Loss.CompareTo(_left.Loss) == 1);
            }
            else // extend (or stay)
            {
                _hi.Step = _historicStepSize * _phi;
                while (_hi.Loss.CompareTo(_left.Loss) == 1)
                {
                    Rotate(ref _lo, ref _left, ref _hi);
                    _hi.Step = _left.Step * _phi;
                }
            }

            if (_numPostbracketSteps > 0)
            {
                _right.Step = _lo.Step + (_hi.Step - _lo.Step) / _phi;
                for (int step = 0; step < _numPostbracketSteps; ++step)
                {
                    int cmp = _right.Loss.CompareTo(_left.Loss);
                    if (cmp == 0)
                    {
                        break;
                    }

                    if (cmp == 1) // move right
                    {
                        Rotate(ref _lo, ref _left, ref _right);
                        _right.Step = _lo.Step + (_hi.Step - _lo.Step) / _phi;
                    }
                    else // move left
                    {
                        Rotate(ref _hi, ref _right, ref _left);
                        if (_hi.Step <= _minStepSize)
                        {
                            goto FINISHED;
                        }
                        _left.Step = _hi.Step - (_hi.Step - _lo.Step) / _phi;
                    }
                }

                // prepare to return _left
                if (_right.Loss.CompareTo(_left.Loss) == 1)
                {
                    Swap(ref _left, ref _right);
                }
            }

FINISHED:
            if (_hi.Step < _minStepSize)
            {
                _left.Step = _minStepSize;
            }
            else if (_hi.Step == _minStepSize)
            {
                Swap(ref _hi, ref _left);
            }

            double bestStep = _left.Step;

            ch.Info("multiplier: {0}", bestStep);
            _historicStepSize = bestStep;
            tree.ScaleOutputsBy(bestStep);
        }
        public TreeEnsemble GetModel(int[] categoricalFeatureBoudaries)
        {
            TreeEnsemble res         = new TreeEnsemble();
            string       modelString = GetModelString();

            string[] lines = modelString.Split('\n');
            int      i     = 0;

            for (; i < lines.Length;)
            {
                if (lines[i].StartsWith("Tree="))
                {
                    Dictionary <string, string> kvPairs = new Dictionary <string, string>();
                    ++i;
                    while (!lines[i].StartsWith("Tree=") && lines[i].Trim().Length != 0)
                    {
                        string[] kv = lines[i].Split('=');
                        Contracts.Check(kv.Length == 2);
                        kvPairs[kv[0].Trim()] = kv[1].Trim();
                        ++i;
                    }
                    int numLeaves = int.Parse(kvPairs["num_leaves"], CultureInfo.InvariantCulture);
                    int numCat    = int.Parse(kvPairs["num_cat"], CultureInfo.InvariantCulture);
                    if (numLeaves > 1)
                    {
                        var leftChild                = Str2IntArray(kvPairs["left_child"], ' ');
                        var rightChild               = Str2IntArray(kvPairs["right_child"], ' ');
                        var splitFeature             = Str2IntArray(kvPairs["split_feature"], ' ');
                        var threshold                = Str2DoubleArray(kvPairs["threshold"], ' ');
                        var splitGain                = Str2DoubleArray(kvPairs["split_gain"], ' ');
                        var leafOutput               = Str2DoubleArray(kvPairs["leaf_value"], ' ');
                        var decisionType             = Str2UIntArray(kvPairs["decision_type"], ' ');
                        var defaultValue             = GetDefalutValue(threshold, decisionType);
                        var categoricalSplitFeatures = new int[numLeaves - 1][];
                        var categoricalSplit         = new bool[numLeaves - 1];
                        if (categoricalFeatureBoudaries != null)
                        {
                            // Add offsets to split features.
                            for (int node = 0; node < numLeaves - 1; ++node)
                            {
                                splitFeature[node] = categoricalFeatureBoudaries[splitFeature[node]];
                            }
                        }

                        if (numCat > 0)
                        {
                            var catBoundaries = Str2IntArray(kvPairs["cat_boundaries"], ' ');
                            var catThreshold  = Str2UIntArray(kvPairs["cat_threshold"], ' ');
                            for (int node = 0; node < numLeaves - 1; ++node)
                            {
                                if (GetIsCategoricalSplit(decisionType[node]))
                                {
                                    int catIdx = (int)threshold[node];
                                    var cats   = GetCatThresholds(catThreshold, catBoundaries[catIdx], catBoundaries[catIdx + 1]);
                                    categoricalSplitFeatures[node] = new int[cats.Length];
                                    // Convert Cat thresholds to feature indices.
                                    for (int j = 0; j < cats.Length; ++j)
                                    {
                                        categoricalSplitFeatures[node][j] = splitFeature[node] + cats[j] - 1;
                                    }

                                    splitFeature[node]     = -1;
                                    categoricalSplit[node] = true;
                                    // Swap left and right child.
                                    int t = leftChild[node];
                                    leftChild[node]  = rightChild[node];
                                    rightChild[node] = t;
                                }
                                else
                                {
                                    categoricalSplit[node] = false;
                                }
                            }
                        }
                        RegressionTree tree = RegressionTree.Create(numLeaves, splitFeature, splitGain,
                                                                    threshold.Select(x => (float)(x)).ToArray(), defaultValue.Select(x => (float)(x)).ToArray(), leftChild, rightChild, leafOutput,
                                                                    categoricalSplitFeatures, categoricalSplit);
                        res.AddTree(tree);
                    }
                    else
                    {
                        RegressionTree tree       = new RegressionTree(2);
                        var            leafOutput = Str2DoubleArray(kvPairs["leaf_value"], ' ');
                        if (leafOutput[0] != 0)
                        {
                            // Convert Constant tree to Two-leaf tree, avoid being filter by TLC.
                            var categoricalSplitFeatures = new int[1][];
                            var categoricalSplit         = new bool[1];
                            tree = RegressionTree.Create(2, new int[] { 0 }, new double[] { 0 },
                                                         new float[] { 0 }, new float[] { 0 }, new int[] { -1 }, new int[] { -2 }, new double[] { leafOutput[0], leafOutput[0] },
                                                         categoricalSplitFeatures, categoricalSplit);
                        }
                        res.AddTree(tree);
                    }
                }
                else
                {
                    ++i;
                }
            }
            return(res);
        }
 public void AdjustTreeOutputs(IChannel ch, RegressionTree tree, DocumentPartitioning partitioning, ScoreTracker trainingScores)
 {
 }
        private RegressionTree createTree(string name, Test testSample)
        {
            RegressionTree tree = new RegressionTree(testSample, name, Penalty);

            return(tree);
        }