public void AdjustTreeOutputs(IChannel ch, RegressionTree tree, DocumentPartitioning partitioning, ScoreTracker trainingScores) { const double epsilon = 1.4e-45; double multiplier = LearningRate * Shrinkage; double[] means = null; if (!BestStepRankingRegressionTrees) { means = _parallelTraining.GlobalMean(Dataset, tree, partitioning, Weights, false); } for (int l = 0; l < tree.NumLeaves; ++l) { double output = tree.GetOutput(l); if (BestStepRankingRegressionTrees) { output *= multiplier; } else { output = multiplier * (output + epsilon) / (means[l] + epsilon); } if (output > MaxTreeOutput) { output = MaxTreeOutput; } else if (output < -MaxTreeOutput) { output = -MaxTreeOutput; } tree.SetOutput(l, output); } }
internal override void AddScores(InternalRegressionTree tree, DocumentPartitioning partitioning, double multiplier) { _k++; double coeff = (_k - 1.0) / (_k + 2.0); var actions = new Action[tree.NumLeaves]; Parallel.For(0, tree.NumLeaves, new ParallelOptions { MaxDegreeOfParallelism = BlockingThreadPool.NumThreads }, (int leaf) => { int[] documents; int begin; int count; partitioning.ReferenceLeafDocuments(leaf, out documents, out begin, out count); double output = tree.LeafValue(leaf) * multiplier; for (int i = begin; i < begin + count; ++i) { int doc = documents[i]; double newXK = YK[doc] + output; double newYK = newXK + coeff * (newXK - XK[doc]); XK[doc] = newXK; YK[doc] = newYK; } }); SendScoresUpdatedMessage(); }
internal RegressionTreeNodeDocuments(InternalRegressionTree tree, DocumentPartitioning partitioning, int nodeIndex) { Tree = tree; Partitioning = partitioning; NodeIndex = nodeIndex; _documentCount = -1; }
public double[] GlobalMean(Dataset dataset, RegressionTree tree, DocumentPartitioning partitioning, double[] weights, bool filterZeroLambdas) { double[] means = new double[tree.NumLeaves]; for (int l = 0; l < tree.NumLeaves; ++l) { means[l] = partitioning.Mean(weights, dataset.SampleWeights, l, filterZeroLambdas); } return(means); }
public void AdjustTreeOutputs(IChannel ch, RegressionTree tree, DocumentPartitioning partitioning, ScoreTracker trainingScores) { double shrinkage = LearningRate * Shrinkage; for (int l = 0; l < tree.NumLeaves; ++l) { double output = tree.GetOutput(l) * shrinkage; tree.SetOutput(l, output); } }
public void AdjustTreeOutputs(IChannel ch, InternalRegressionTree tree, DocumentPartitioning partitioning, ScoreTracker trainingScores) { double shrinkage = LearningRate * Shrinkage; var scores = trainingScores.Scores; var weights = trainingScores.Dataset.SampleWeights; // Following equation 18, and line 2c of algorithm 1 in the source paper. for (int l = 0; l < tree.NumLeaves; ++l) { Double num = 0; Double denom = 0; if (_index1 == 0) { // The index == 1 Poisson case. foreach (int i in partitioning.DocumentsInLeaf(l)) { var s = scores[i]; var w = weights == null ? 1 : weights[i]; num += w * _labels[i]; denom += w * Math.Exp(s); } } else { // The index in (1,2] case. foreach (int i in partitioning.DocumentsInLeaf(l)) { var s = scores[i]; var w = weights == null ? 1 : weights[i]; num += w * _labels[i] * Math.Exp(_index1 * s); denom += w * Math.Exp(_index2 * s); } } var step = shrinkage * (Math.Log(num) - Math.Log(denom)); if (num == 0 && denom == 0) { step = 0; } // If we do not clamp, it is entirely possible for num to be 0 (with 0 labels), which // means that we will have negative infinities in the leaf nodes. This has a number of // bad negative effects we'd prefer to avoid. Nonetheless, we do give up a substantial // amount of "gain" for those examples. if (step < -_maxClamp) { step = -_maxClamp; } else if (step > _maxClamp) { step = _maxClamp; } tree.SetOutput(l, step); } }
internal RecursiveRegressionTree(InternalRegressionTree t, DocumentPartitioning p, int n) : base(t, p, n) { _weightedOutput = double.NaN; _nodeCount = int.MaxValue; if (!IsLeaf) { LteNode = new RecursiveRegressionTree(Tree, Partitioning, Tree.GetLteChildForNode(NodeIndex)); GtNode = new RecursiveRegressionTree(Tree, Partitioning, Tree.GetGtChildForNode(NodeIndex)); } }
//Creates linear combination of scores1 + tree * multiplier internal void Initialize(ScoreTracker scores1, InternalRegressionTree tree, DocumentPartitioning partitioning, double multiplier) { InitScores = null; if (Scores == null || Scores.Length != scores1.Scores.Length) { Scores = (double[])scores1.Scores.Clone(); } else { Array.Copy(scores1.Scores, Scores, Scores.Length); } AddScores(tree, partitioning, multiplier); SendScoresUpdatedMessage(); }
//Use faster method for score update with Partitioning // suitable for TrainSet internal virtual void AddScores(InternalRegressionTree tree, DocumentPartitioning partitioning, double multiplier) { Parallel.For(0, tree.NumLeaves, new ParallelOptions { MaxDegreeOfParallelism = BlockingThreadPool.NumThreads }, (leaf) => { int[] documents; int begin; int count; partitioning.ReferenceLeafDocuments(leaf, out documents, out begin, out count); double output = tree.LeafValue(leaf) * multiplier; for (int i = begin; i < begin + count; ++i) { Scores[documents[i]] += output; } }); SendScoresUpdatedMessage(); }
protected TreeLearner(Dataset trainData, int numLeaves) { TrainData = trainData; NumLeaves = numLeaves; Partitioning = new DocumentPartitioning(TrainData.NumDocs, numLeaves); }
public override void GenerateNewBag() { int[] trainDocs = new int[CompleteTrainingSet.NumDocs]; int[] outOfBagDocs = new int[CompleteTrainingSet.NumDocs]; int trainSize = 0; int outOfBagSize = 0; int[] tmpTrainQueryIndices = new int[CompleteTrainingSet.NumQueries]; bool[] selectedTrainQueries = new bool[CompleteTrainingSet.NumQueries]; int qIdx = 0; for (int i = 0; i < CompleteTrainingSet.NumQueries; i++) { int begin = CompleteTrainingSet.Boundaries[i]; int numDocuments = CompleteTrainingSet.Boundaries[i + 1] - begin; if (RndGenerator.NextDouble() < TrainFraction) { for (int d = 0; d < numDocuments; d++) { trainDocs[trainSize] = begin + d; trainSize++; } tmpTrainQueryIndices[qIdx] = i; qIdx++; selectedTrainQueries[i] = true; } } int outOfBagQueriesCount = CompleteTrainingSet.NumQueries - qIdx; var currentTrainQueryIndices = new int[CompleteTrainingSet.NumQueries - outOfBagQueriesCount]; Array.Copy(tmpTrainQueryIndices, currentTrainQueryIndices, currentTrainQueryIndices.Length); var currentOutOfBagQueryIndices = new int[outOfBagQueriesCount]; int outOfBagQIdx = 0; for (int q = 0; q < CompleteTrainingSet.NumQueries; q++) { if (!selectedTrainQueries[q]) { int begin = CompleteTrainingSet.Boundaries[q]; int numDocuments = CompleteTrainingSet.Boundaries[q + 1] - begin; for (int d = 0; d < numDocuments; d++) { outOfBagDocs[outOfBagSize] = begin + d; outOfBagSize++; } currentOutOfBagQueryIndices[outOfBagQIdx] = q; outOfBagQIdx++; } } CurrentTrainPartition = new DocumentPartitioning(trainDocs, trainSize, MaxLeaves); CurrentOutOfBagPartition = new DocumentPartitioning(outOfBagDocs, outOfBagSize, MaxLeaves); CurrentTrainPartition.Initialize(); CurrentOutOfBagPartition.Initialize(); }
public void Initialize(InternalRegressionTree tree, DocumentPartitioning partitioning, ScoreTracker previousScores) { _tree = tree; _partitioning = partitioning; _previousScores = previousScores; }
void IStepSearch.AdjustTreeOutputs(IChannel ch, InternalRegressionTree tree, DocumentPartitioning partitioning, ScoreTracker previousScores) { _lo.Initialize(tree, partitioning, previousScores); _hi.Initialize(tree, partitioning, previousScores); _left.Initialize(tree, partitioning, previousScores); _right.Initialize(tree, partitioning, previousScores); _lo.Step = _historicStepSize / _phi; _left.Step = _historicStepSize; if (_lo.Loss.CompareTo(_left.Loss) == 1) // backtrack { do { Rotate(ref _hi, ref _left, ref _lo); if (_hi.Step <= _minStepSize) { goto FINISHED; } _lo.Step = _left.Step / _phi; } while (_lo.Loss.CompareTo(_left.Loss) == 1); } else // extend (or stay) { _hi.Step = _historicStepSize * _phi; while (_hi.Loss.CompareTo(_left.Loss) == 1) { Rotate(ref _lo, ref _left, ref _hi); _hi.Step = _left.Step * _phi; } } if (_numPostbracketSteps > 0) { _right.Step = _lo.Step + (_hi.Step - _lo.Step) / _phi; for (int step = 0; step < _numPostbracketSteps; ++step) { int cmp = _right.Loss.CompareTo(_left.Loss); if (cmp == 0) { break; } if (cmp == 1) // move right { Rotate(ref _lo, ref _left, ref _right); _right.Step = _lo.Step + (_hi.Step - _lo.Step) / _phi; } else // move left { Rotate(ref _hi, ref _right, ref _left); if (_hi.Step <= _minStepSize) { goto FINISHED; } _left.Step = _hi.Step - (_hi.Step - _lo.Step) / _phi; } } // prepare to return _left if (_right.Loss.CompareTo(_left.Loss) == 1) { Swap(ref _left, ref _right); } } FINISHED: if (_hi.Step < _minStepSize) { _left.Step = _minStepSize; } else if (_hi.Step == _minStepSize) { Swap(ref _hi, ref _left); } double bestStep = _left.Step; ch.Info("multiplier: {0}", bestStep); _historicStepSize = bestStep; tree.ScaleOutputsBy(bestStep); }