public RecursiveRegressionTree(RegressionTree t, DocumentPartitioning p, int n) : base(t, p, n) { _weightedOutput = double.NaN; _nodeCount = int.MaxValue; if (!IsLeaf) { LteNode = new RecursiveRegressionTree(Tree, Partitioning, Tree.GetLteChildForNode(NodeIndex)); GtNode = new RecursiveRegressionTree(Tree, Partitioning, Tree.GetGtChildForNode(NodeIndex)); } }
//Creates linear combination of scores1 + tree * multiplier public void Initialize(ScoreTracker scores1, RegressionTree tree, DocumentPartitioning partitioning, double multiplier) { InitScores = null; if (Scores == null || Scores.Length != scores1.Scores.Length) { Scores = (double[])scores1.Scores.Clone(); } else { Array.Copy(scores1.Scores, Scores, Scores.Length); } AddScores(tree, partitioning, multiplier); SendScoresUpdatedMessage(); }
//Use faster method for score update with Partitioning // suitable for TrainSet public virtual void AddScores(RegressionTree tree, DocumentPartitioning partitioning, double multiplier) { Parallel.For(0, tree.NumLeaves, new ParallelOptions { MaxDegreeOfParallelism = BlockingThreadPool.NumThreads }, (leaf) => { int[] documents; int begin; int count; partitioning.ReferenceLeafDocuments(leaf, out documents, out begin, out count); double output = tree.LeafValue(leaf) * multiplier; for (int i = begin; i < begin + count; ++i) { Scores[documents[i]] += output; } }); SendScoresUpdatedMessage(); }
public void Initialize(RegressionTree tree, DocumentPartitioning partitioning, ScoreTracker previousScores) { _tree = tree; _partitioning = partitioning; _previousScores = previousScores; }
public void AdjustTreeOutputs(IChannel ch, RegressionTree tree, DocumentPartitioning partitioning, ScoreTracker previousScores) { _lo.Initialize(tree, partitioning, previousScores); _hi.Initialize(tree, partitioning, previousScores); _left.Initialize(tree, partitioning, previousScores); _right.Initialize(tree, partitioning, previousScores); _lo.Step = _historicStepSize / _phi; _left.Step = _historicStepSize; if (_lo.Loss.CompareTo(_left.Loss) == 1) // backtrack { do { Rotate(ref _hi, ref _left, ref _lo); if (_hi.Step <= _minStepSize) { goto FINISHED; } _lo.Step = _left.Step / _phi; } while (_lo.Loss.CompareTo(_left.Loss) == 1); } else // extend (or stay) { _hi.Step = _historicStepSize * _phi; while (_hi.Loss.CompareTo(_left.Loss) == 1) { Rotate(ref _lo, ref _left, ref _hi); _hi.Step = _left.Step * _phi; } } if (_numPostbracketSteps > 0) { _right.Step = _lo.Step + (_hi.Step - _lo.Step) / _phi; for (int step = 0; step < _numPostbracketSteps; ++step) { int cmp = _right.Loss.CompareTo(_left.Loss); if (cmp == 0) { break; } if (cmp == 1) // move right { Rotate(ref _lo, ref _left, ref _right); _right.Step = _lo.Step + (_hi.Step - _lo.Step) / _phi; } else // move left { Rotate(ref _hi, ref _right, ref _left); if (_hi.Step <= _minStepSize) { goto FINISHED; } _left.Step = _hi.Step - (_hi.Step - _lo.Step) / _phi; } } // prepare to return _left if (_right.Loss.CompareTo(_left.Loss) == 1) { Swap(ref _left, ref _right); } } FINISHED: if (_hi.Step < _minStepSize) { _left.Step = _minStepSize; } else if (_hi.Step == _minStepSize) { Swap(ref _hi, ref _left); } double bestStep = _left.Step; ch.Info("multiplier: {0}", bestStep); _historicStepSize = bestStep; tree.ScaleOutputsBy(bestStep); }
public override void GenerateNewBag() { int[] trainDocs = new int[CompleteTrainingSet.NumDocs]; int[] outOfBagDocs = new int[CompleteTrainingSet.NumDocs]; int trainSize = 0; int outOfBagSize = 0; int[] tmpTrainQueryIndices = new int[CompleteTrainingSet.NumQueries]; bool[] selectedTrainQueries = new bool[CompleteTrainingSet.NumQueries]; int qIdx = 0; for (int i = 0; i < CompleteTrainingSet.NumQueries; i++) { int begin = CompleteTrainingSet.Boundaries[i]; int numDocuments = CompleteTrainingSet.Boundaries[i + 1] - begin; if (RndGenerator.NextDouble() < TrainFraction) { for (int d = 0; d < numDocuments; d++) { trainDocs[trainSize] = begin + d; trainSize++; } tmpTrainQueryIndices[qIdx] = i; qIdx++; selectedTrainQueries[i] = true; } } int outOfBagQueriesCount = CompleteTrainingSet.NumQueries - qIdx; var currentTrainQueryIndices = new int[CompleteTrainingSet.NumQueries - outOfBagQueriesCount]; Array.Copy(tmpTrainQueryIndices, currentTrainQueryIndices, currentTrainQueryIndices.Length); var currentOutOfBagQueryIndices = new int[outOfBagQueriesCount]; int outOfBagQIdx = 0; for (int q = 0; q < CompleteTrainingSet.NumQueries; q++) { if (!selectedTrainQueries[q]) { int begin = CompleteTrainingSet.Boundaries[q]; int numDocuments = CompleteTrainingSet.Boundaries[q + 1] - begin; for (int d = 0; d < numDocuments; d++) { outOfBagDocs[outOfBagSize] = begin + d; outOfBagSize++; } currentOutOfBagQueryIndices[outOfBagQIdx] = q; outOfBagQIdx++; } } CurrentTrainPartition = new DocumentPartitioning(trainDocs, trainSize, MaxLeaves); CurrentOutOfBagPartition = new DocumentPartitioning(outOfBagDocs, outOfBagSize, MaxLeaves); CurrentTrainPartition.Initialize(); CurrentOutOfBagPartition.Initialize(); }
protected TreeLearner(Dataset trainData, int numLeaves) { TrainData = trainData; NumLeaves = numLeaves; Partitioning = new DocumentPartitioning(TrainData.NumDocs, numLeaves); }
public void AdjustTreeOutputs(IChannel ch, RegressionTree tree, DocumentPartitioning partitioning, ScoreTracker trainingScores) { }