public void AdjustTreeOutputs(IChannel ch, RegressionTree tree,
                                          DocumentPartitioning partitioning, ScoreTracker trainingScores)
            {
                const double epsilon    = 1.4e-45;
                double       multiplier = LearningRate * Shrinkage;

                double[] means = null;
                if (!BestStepRankingRegressionTrees)
                {
                    means = _parallelTraining.GlobalMean(Dataset, tree, partitioning, Weights, false);
                }
                for (int l = 0; l < tree.NumLeaves; ++l)
                {
                    double output = tree.GetOutput(l);

                    if (BestStepRankingRegressionTrees)
                    {
                        output *= multiplier;
                    }
                    else
                    {
                        output = multiplier * (output + epsilon) / (means[l] + epsilon);
                    }

                    if (output > MaxTreeOutput)
                    {
                        output = MaxTreeOutput;
                    }
                    else if (output < -MaxTreeOutput)
                    {
                        output = -MaxTreeOutput;
                    }
                    tree.SetOutput(l, output);
                }
            }
Exemple #2
0
        internal override void AddScores(InternalRegressionTree tree, DocumentPartitioning partitioning, double multiplier)
        {
            _k++;
            double coeff   = (_k - 1.0) / (_k + 2.0);
            var    actions = new Action[tree.NumLeaves];

            Parallel.For(0, tree.NumLeaves, new ParallelOptions {
                MaxDegreeOfParallelism = BlockingThreadPool.NumThreads
            },
                         (int leaf) =>
            {
                int[] documents;
                int begin;
                int count;
                partitioning.ReferenceLeafDocuments(leaf, out documents, out begin, out count);
                double output = tree.LeafValue(leaf) * multiplier;
                for (int i = begin; i < begin + count; ++i)
                {
                    int doc      = documents[i];
                    double newXK = YK[doc] + output;
                    double newYK = newXK + coeff * (newXK - XK[doc]);
                    XK[doc]      = newXK;
                    YK[doc]      = newYK;
                }
            });
            SendScoresUpdatedMessage();
        }
 internal RegressionTreeNodeDocuments(InternalRegressionTree tree, DocumentPartitioning partitioning, int nodeIndex)
 {
     Tree           = tree;
     Partitioning   = partitioning;
     NodeIndex      = nodeIndex;
     _documentCount = -1;
 }
Exemple #4
0
 public double[] GlobalMean(Dataset dataset, RegressionTree tree, DocumentPartitioning partitioning, double[] weights, bool filterZeroLambdas)
 {
     double[] means = new double[tree.NumLeaves];
     for (int l = 0; l < tree.NumLeaves; ++l)
     {
         means[l] = partitioning.Mean(weights, dataset.SampleWeights, l, filterZeroLambdas);
     }
     return(means);
 }
Exemple #5
0
            public void AdjustTreeOutputs(IChannel ch, RegressionTree tree, DocumentPartitioning partitioning, ScoreTracker trainingScores)
            {
                double shrinkage = LearningRate * Shrinkage;

                for (int l = 0; l < tree.NumLeaves; ++l)
                {
                    double output = tree.GetOutput(l) * shrinkage;
                    tree.SetOutput(l, output);
                }
            }
            public void AdjustTreeOutputs(IChannel ch, InternalRegressionTree tree, DocumentPartitioning partitioning, ScoreTracker trainingScores)
            {
                double shrinkage = LearningRate * Shrinkage;
                var    scores    = trainingScores.Scores;
                var    weights   = trainingScores.Dataset.SampleWeights;

                // Following equation 18, and line 2c of algorithm 1 in the source paper.
                for (int l = 0; l < tree.NumLeaves; ++l)
                {
                    Double num   = 0;
                    Double denom = 0;

                    if (_index1 == 0)
                    {
                        // The index == 1 Poisson case.
                        foreach (int i in partitioning.DocumentsInLeaf(l))
                        {
                            var s = scores[i];
                            var w = weights == null ? 1 : weights[i];
                            num   += w * _labels[i];
                            denom += w * Math.Exp(s);
                        }
                    }
                    else
                    {
                        // The index in (1,2] case.
                        foreach (int i in partitioning.DocumentsInLeaf(l))
                        {
                            var s = scores[i];
                            var w = weights == null ? 1 : weights[i];
                            num   += w * _labels[i] * Math.Exp(_index1 * s);
                            denom += w * Math.Exp(_index2 * s);
                        }
                    }

                    var step = shrinkage * (Math.Log(num) - Math.Log(denom));
                    if (num == 0 && denom == 0)
                    {
                        step = 0;
                    }
                    // If we do not clamp, it is entirely possible for num to be 0 (with 0 labels), which
                    // means that we will have negative infinities in the leaf nodes. This has a number of
                    // bad negative effects we'd prefer to avoid. Nonetheless, we do give up a substantial
                    // amount of "gain" for those examples.
                    if (step < -_maxClamp)
                    {
                        step = -_maxClamp;
                    }
                    else if (step > _maxClamp)
                    {
                        step = _maxClamp;
                    }
                    tree.SetOutput(l, step);
                }
            }
 internal RecursiveRegressionTree(InternalRegressionTree t, DocumentPartitioning p, int n)
     : base(t, p, n)
 {
     _weightedOutput = double.NaN;
     _nodeCount      = int.MaxValue;
     if (!IsLeaf)
     {
         LteNode = new RecursiveRegressionTree(Tree, Partitioning, Tree.GetLteChildForNode(NodeIndex));
         GtNode  = new RecursiveRegressionTree(Tree, Partitioning, Tree.GetGtChildForNode(NodeIndex));
     }
 }
Exemple #8
0
 //Creates linear combination of scores1 + tree * multiplier
 internal void Initialize(ScoreTracker scores1, InternalRegressionTree tree, DocumentPartitioning partitioning, double multiplier)
 {
     InitScores = null;
     if (Scores == null || Scores.Length != scores1.Scores.Length)
     {
         Scores = (double[])scores1.Scores.Clone();
     }
     else
     {
         Array.Copy(scores1.Scores, Scores, Scores.Length);
     }
     AddScores(tree, partitioning, multiplier);
     SendScoresUpdatedMessage();
 }
Exemple #9
0
        //Use faster method for score update with Partitioning
        // suitable for TrainSet
        internal virtual void AddScores(InternalRegressionTree tree, DocumentPartitioning partitioning, double multiplier)
        {
            Parallel.For(0, tree.NumLeaves, new ParallelOptions {
                MaxDegreeOfParallelism = BlockingThreadPool.NumThreads
            }, (leaf) =>
            {
                int[] documents;
                int begin;
                int count;
                partitioning.ReferenceLeafDocuments(leaf, out documents, out begin, out count);
                double output = tree.LeafValue(leaf) * multiplier;
                for (int i = begin; i < begin + count; ++i)
                {
                    Scores[documents[i]] += output;
                }
            });

            SendScoresUpdatedMessage();
        }
Exemple #10
0
 protected TreeLearner(Dataset trainData, int numLeaves)
 {
     TrainData    = trainData;
     NumLeaves    = numLeaves;
     Partitioning = new DocumentPartitioning(TrainData.NumDocs, numLeaves);
 }
Exemple #11
0
        public override void GenerateNewBag()
        {
            int[] trainDocs    = new int[CompleteTrainingSet.NumDocs];
            int[] outOfBagDocs = new int[CompleteTrainingSet.NumDocs];
            int   trainSize    = 0;
            int   outOfBagSize = 0;

            int[]  tmpTrainQueryIndices = new int[CompleteTrainingSet.NumQueries];
            bool[] selectedTrainQueries = new bool[CompleteTrainingSet.NumQueries];

            int qIdx = 0;

            for (int i = 0; i < CompleteTrainingSet.NumQueries; i++)
            {
                int begin        = CompleteTrainingSet.Boundaries[i];
                int numDocuments = CompleteTrainingSet.Boundaries[i + 1] - begin;

                if (RndGenerator.NextDouble() < TrainFraction)
                {
                    for (int d = 0; d < numDocuments; d++)
                    {
                        trainDocs[trainSize] = begin + d;
                        trainSize++;
                    }
                    tmpTrainQueryIndices[qIdx] = i;
                    qIdx++;
                    selectedTrainQueries[i] = true;
                }
            }

            int outOfBagQueriesCount = CompleteTrainingSet.NumQueries - qIdx;

            var currentTrainQueryIndices = new int[CompleteTrainingSet.NumQueries - outOfBagQueriesCount];

            Array.Copy(tmpTrainQueryIndices, currentTrainQueryIndices, currentTrainQueryIndices.Length);

            var currentOutOfBagQueryIndices = new int[outOfBagQueriesCount];
            int outOfBagQIdx = 0;

            for (int q = 0; q < CompleteTrainingSet.NumQueries; q++)
            {
                if (!selectedTrainQueries[q])
                {
                    int begin        = CompleteTrainingSet.Boundaries[q];
                    int numDocuments = CompleteTrainingSet.Boundaries[q + 1] - begin;

                    for (int d = 0; d < numDocuments; d++)
                    {
                        outOfBagDocs[outOfBagSize] = begin + d;
                        outOfBagSize++;
                    }
                    currentOutOfBagQueryIndices[outOfBagQIdx] = q;
                    outOfBagQIdx++;
                }
            }

            CurrentTrainPartition    = new DocumentPartitioning(trainDocs, trainSize, MaxLeaves);
            CurrentOutOfBagPartition = new DocumentPartitioning(outOfBagDocs, outOfBagSize, MaxLeaves);
            CurrentTrainPartition.Initialize();
            CurrentOutOfBagPartition.Initialize();
        }
Exemple #12
0
 public void Initialize(InternalRegressionTree tree, DocumentPartitioning partitioning, ScoreTracker previousScores)
 {
     _tree           = tree;
     _partitioning   = partitioning;
     _previousScores = previousScores;
 }
Exemple #13
0
        void IStepSearch.AdjustTreeOutputs(IChannel ch, InternalRegressionTree tree, DocumentPartitioning partitioning,
                                           ScoreTracker previousScores)
        {
            _lo.Initialize(tree, partitioning, previousScores);
            _hi.Initialize(tree, partitioning, previousScores);
            _left.Initialize(tree, partitioning, previousScores);
            _right.Initialize(tree, partitioning, previousScores);

            _lo.Step   = _historicStepSize / _phi;
            _left.Step = _historicStepSize;

            if (_lo.Loss.CompareTo(_left.Loss) == 1) // backtrack
            {
                do
                {
                    Rotate(ref _hi, ref _left, ref _lo);
                    if (_hi.Step <= _minStepSize)
                    {
                        goto FINISHED;
                    }
                    _lo.Step = _left.Step / _phi;
                } while (_lo.Loss.CompareTo(_left.Loss) == 1);
            }
            else // extend (or stay)
            {
                _hi.Step = _historicStepSize * _phi;
                while (_hi.Loss.CompareTo(_left.Loss) == 1)
                {
                    Rotate(ref _lo, ref _left, ref _hi);
                    _hi.Step = _left.Step * _phi;
                }
            }

            if (_numPostbracketSteps > 0)
            {
                _right.Step = _lo.Step + (_hi.Step - _lo.Step) / _phi;
                for (int step = 0; step < _numPostbracketSteps; ++step)
                {
                    int cmp = _right.Loss.CompareTo(_left.Loss);
                    if (cmp == 0)
                    {
                        break;
                    }

                    if (cmp == 1) // move right
                    {
                        Rotate(ref _lo, ref _left, ref _right);
                        _right.Step = _lo.Step + (_hi.Step - _lo.Step) / _phi;
                    }
                    else // move left
                    {
                        Rotate(ref _hi, ref _right, ref _left);
                        if (_hi.Step <= _minStepSize)
                        {
                            goto FINISHED;
                        }
                        _left.Step = _hi.Step - (_hi.Step - _lo.Step) / _phi;
                    }
                }

                // prepare to return _left
                if (_right.Loss.CompareTo(_left.Loss) == 1)
                {
                    Swap(ref _left, ref _right);
                }
            }

FINISHED:
            if (_hi.Step < _minStepSize)
            {
                _left.Step = _minStepSize;
            }
            else if (_hi.Step == _minStepSize)
            {
                Swap(ref _hi, ref _left);
            }

            double bestStep = _left.Step;

            ch.Info("multiplier: {0}", bestStep);
            _historicStepSize = bestStep;
            tree.ScaleOutputsBy(bestStep);
        }