public void AddTree(RegressionTree tree) => _trees.Add(tree);
public void AddTreeAt(RegressionTree tree, int index) => _trees.Insert(index, tree);
public IPredictorProducing <float> CombineModels(IEnumerable <IPredictorProducing <float> > models) { _host.CheckValue(models, nameof(models)); var ensemble = new Ensemble(); int modelCount = 0; int featureCount = -1; bool binaryClassifier = false; foreach (var model in models) { modelCount++; var predictor = model; _host.CheckValue(predictor, nameof(models), "One of the models is null"); var calibrated = predictor as CalibratedPredictorBase; double paramA = 1; if (calibrated != null) { _host.Check(calibrated.Calibrator is PlattCalibrator, "Combining FastTree models can only be done when the models are calibrated with Platt calibrator"); predictor = calibrated.SubPredictor; paramA = -(calibrated.Calibrator as PlattCalibrator).ParamA; } var tree = predictor as FastTreePredictionWrapper; if (tree == null) { throw _host.Except("Model is not a tree ensemble"); } foreach (var t in tree.TrainedEnsemble.Trees) { var bytes = new byte[t.SizeInBytes()]; int position = -1; t.ToByteArray(bytes, ref position); position = -1; var tNew = new RegressionTree(bytes, ref position); if (paramA != 1) { for (int i = 0; i < tNew.NumLeaves; i++) { tNew.SetOutput(i, tNew.LeafValues[i] * paramA); } } ensemble.AddTree(tNew); } if (modelCount == 1) { binaryClassifier = calibrated != null; featureCount = tree.InputType.ValueCount; } else { _host.Check((calibrated != null) == binaryClassifier, "Ensemble contains both calibrated and uncalibrated models"); _host.Check(featureCount == tree.InputType.ValueCount, "Found models with different number of features"); } } var scale = 1 / (double)modelCount; foreach (var t in ensemble.Trees) { for (int i = 0; i < t.NumLeaves; i++) { t.SetOutput(i, t.LeafValues[i] * scale); } } switch (_kind) { case PredictionKind.BinaryClassification: if (!binaryClassifier) { return(new FastTreeBinaryPredictor(_host, ensemble, featureCount, null)); } var cali = new PlattCalibrator(_host, -1, 0); return(new FeatureWeightsCalibratedPredictor(_host, new FastTreeBinaryPredictor(_host, ensemble, featureCount, null), cali)); case PredictionKind.Regression: return(new FastTreeRegressionPredictor(_host, ensemble, featureCount, null)); case PredictionKind.Ranking: return(new FastTreeRankingPredictor(_host, ensemble, featureCount, null)); default: _host.Assert(false); throw _host.ExceptNotSupp(); } }
public virtual void AddScores(RegressionTree tree, double multiplier) { tree.AddOutputsToScores(Dataset, Scores, multiplier); SendScoresUpdatedMessage(); }
public RegressionTreeVisualizator(RegressionTree tree, Chart chart) { Tree = tree; RegressionChart = chart; }
/// <summary> /// Constructs partitioning object based on the documents and RegressionTree splits /// NOTE: It has been optimized for speed and multiprocs with 10x gain on naive LINQ implementation /// </summary> public DocumentPartitioning(RegressionTree tree, Dataset dataset) : this(dataset.NumDocs, tree.NumLeaves) { using (Timer.Time(TimerEvent.DocumentPartitioningConstruction)) { // figure out which leaf each document belongs to // NOTE: break it up into NumThreads chunks. This minimizes the number of re-computations necessary in // the row-wise indexer. int innerLoopSize = 1 + dataset.NumDocs / BlockingThreadPool.NumThreads; // +1 is to make sure we don't have a few left over at the end // figure out the exact number of chunks, needed in pathological cases when NumDocs < NumThreads int numChunks = dataset.NumDocs / innerLoopSize; if (dataset.NumDocs % innerLoopSize != 0) { ++numChunks; } var perChunkDocumentLists = new List <int> [numChunks][]; // REVIEW: This partitioning doesn't look optimal. // Probably make sence to investigate better ways of splitting data? var actions = new Action[(int)Math.Ceiling(1.0 * dataset.NumDocs / innerLoopSize)]; var actionIndex = 0; for (int docStart = 0; docStart < dataset.NumDocs; docStart += innerLoopSize) { var fromDoc = docStart; var toDoc = Math.Min(docStart + innerLoopSize, dataset.NumDocs); var chunkIndex = docStart / innerLoopSize; actions[actionIndex++] = () => { Contracts.Assert(perChunkDocumentLists[chunkIndex] == null); var featureBins = dataset.GetFeatureBinRowwiseIndexer(); List <int>[] perLeafDocumentLists = Enumerable.Range(0, tree.NumLeaves) .Select(x => new List <int>(innerLoopSize / tree.NumLeaves)) .ToArray(); for (int d = fromDoc; d < toDoc; d++) { int leaf = tree.GetLeaf(featureBins[d]); perLeafDocumentLists[leaf].Add(d); } perChunkDocumentLists[chunkIndex] = perLeafDocumentLists; }; } Parallel.Invoke(new ParallelOptions { MaxDegreeOfParallelism = BlockingThreadPool.NumThreads }, actions); // establish leaf starts and document counts _leafCount = Enumerable.Range(0, tree.NumLeaves) .Select(leaf => Enumerable.Range(0, perChunkDocumentLists.Length) .Select(thread => perChunkDocumentLists[thread][leaf].Count) .Sum()) .ToArray(); var cumulativeLength = _leafCount.CumulativeSum <int>().Take(tree.NumLeaves - 1); _leafBegin = Enumerable.Range(0, 1).Concat(cumulativeLength).ToArray(); // move all documents that belong to the same leaf together Contracts.Assert(_documents.Length == _leafBegin[tree.NumLeaves - 1] + _leafCount[tree.NumLeaves - 1]); actions = new Action[tree.NumLeaves]; actionIndex = 0; for (int leaf = 0; leaf < tree.NumLeaves; leaf++) { var l = leaf; actions[actionIndex++] = () => { int documentPos = _leafBegin[l]; for (int chunkIndex = 0; chunkIndex < perChunkDocumentLists.Length; chunkIndex++) { foreach (int d in perChunkDocumentLists[chunkIndex][l]) { _documents[documentPos++] = d; } perChunkDocumentLists[chunkIndex][l] = null; } }; } Parallel.Invoke(new ParallelOptions { MaxDegreeOfParallelism = BlockingThreadPool.NumThreads }, actions); } }
public void Initialize(RegressionTree tree, DocumentPartitioning partitioning, ScoreTracker previousScores) { _tree = tree; _partitioning = partitioning; _previousScores = previousScores; }
public void AdjustTreeOutputs(IChannel ch, RegressionTree tree, DocumentPartitioning partitioning, ScoreTracker previousScores) { _lo.Initialize(tree, partitioning, previousScores); _hi.Initialize(tree, partitioning, previousScores); _left.Initialize(tree, partitioning, previousScores); _right.Initialize(tree, partitioning, previousScores); _lo.Step = _historicStepSize / _phi; _left.Step = _historicStepSize; if (_lo.Loss.CompareTo(_left.Loss) == 1) // backtrack { do { Rotate(ref _hi, ref _left, ref _lo); if (_hi.Step <= _minStepSize) { goto FINISHED; } _lo.Step = _left.Step / _phi; } while (_lo.Loss.CompareTo(_left.Loss) == 1); } else // extend (or stay) { _hi.Step = _historicStepSize * _phi; while (_hi.Loss.CompareTo(_left.Loss) == 1) { Rotate(ref _lo, ref _left, ref _hi); _hi.Step = _left.Step * _phi; } } if (_numPostbracketSteps > 0) { _right.Step = _lo.Step + (_hi.Step - _lo.Step) / _phi; for (int step = 0; step < _numPostbracketSteps; ++step) { int cmp = _right.Loss.CompareTo(_left.Loss); if (cmp == 0) { break; } if (cmp == 1) // move right { Rotate(ref _lo, ref _left, ref _right); _right.Step = _lo.Step + (_hi.Step - _lo.Step) / _phi; } else // move left { Rotate(ref _hi, ref _right, ref _left); if (_hi.Step <= _minStepSize) { goto FINISHED; } _left.Step = _hi.Step - (_hi.Step - _lo.Step) / _phi; } } // prepare to return _left if (_right.Loss.CompareTo(_left.Loss) == 1) { Swap(ref _left, ref _right); } } FINISHED: if (_hi.Step < _minStepSize) { _left.Step = _minStepSize; } else if (_hi.Step == _minStepSize) { Swap(ref _hi, ref _left); } double bestStep = _left.Step; ch.Info("multiplier: {0}", bestStep); _historicStepSize = bestStep; tree.ScaleOutputsBy(bestStep); }
public TreeEnsemble GetModel(int[] categoricalFeatureBoudaries) { TreeEnsemble res = new TreeEnsemble(); string modelString = GetModelString(); string[] lines = modelString.Split('\n'); int i = 0; for (; i < lines.Length;) { if (lines[i].StartsWith("Tree=")) { Dictionary <string, string> kvPairs = new Dictionary <string, string>(); ++i; while (!lines[i].StartsWith("Tree=") && lines[i].Trim().Length != 0) { string[] kv = lines[i].Split('='); Contracts.Check(kv.Length == 2); kvPairs[kv[0].Trim()] = kv[1].Trim(); ++i; } int numLeaves = int.Parse(kvPairs["num_leaves"], CultureInfo.InvariantCulture); int numCat = int.Parse(kvPairs["num_cat"], CultureInfo.InvariantCulture); if (numLeaves > 1) { var leftChild = Str2IntArray(kvPairs["left_child"], ' '); var rightChild = Str2IntArray(kvPairs["right_child"], ' '); var splitFeature = Str2IntArray(kvPairs["split_feature"], ' '); var threshold = Str2DoubleArray(kvPairs["threshold"], ' '); var splitGain = Str2DoubleArray(kvPairs["split_gain"], ' '); var leafOutput = Str2DoubleArray(kvPairs["leaf_value"], ' '); var decisionType = Str2UIntArray(kvPairs["decision_type"], ' '); var defaultValue = GetDefalutValue(threshold, decisionType); var categoricalSplitFeatures = new int[numLeaves - 1][]; var categoricalSplit = new bool[numLeaves - 1]; if (categoricalFeatureBoudaries != null) { // Add offsets to split features. for (int node = 0; node < numLeaves - 1; ++node) { splitFeature[node] = categoricalFeatureBoudaries[splitFeature[node]]; } } if (numCat > 0) { var catBoundaries = Str2IntArray(kvPairs["cat_boundaries"], ' '); var catThreshold = Str2UIntArray(kvPairs["cat_threshold"], ' '); for (int node = 0; node < numLeaves - 1; ++node) { if (GetIsCategoricalSplit(decisionType[node])) { int catIdx = (int)threshold[node]; var cats = GetCatThresholds(catThreshold, catBoundaries[catIdx], catBoundaries[catIdx + 1]); categoricalSplitFeatures[node] = new int[cats.Length]; // Convert Cat thresholds to feature indices. for (int j = 0; j < cats.Length; ++j) { categoricalSplitFeatures[node][j] = splitFeature[node] + cats[j] - 1; } splitFeature[node] = -1; categoricalSplit[node] = true; // Swap left and right child. int t = leftChild[node]; leftChild[node] = rightChild[node]; rightChild[node] = t; } else { categoricalSplit[node] = false; } } } RegressionTree tree = RegressionTree.Create(numLeaves, splitFeature, splitGain, threshold.Select(x => (float)(x)).ToArray(), defaultValue.Select(x => (float)(x)).ToArray(), leftChild, rightChild, leafOutput, categoricalSplitFeatures, categoricalSplit); res.AddTree(tree); } else { RegressionTree tree = new RegressionTree(2); var leafOutput = Str2DoubleArray(kvPairs["leaf_value"], ' '); if (leafOutput[0] != 0) { // Convert Constant tree to Two-leaf tree, avoid being filter by TLC. var categoricalSplitFeatures = new int[1][]; var categoricalSplit = new bool[1]; tree = RegressionTree.Create(2, new int[] { 0 }, new double[] { 0 }, new float[] { 0 }, new float[] { 0 }, new int[] { -1 }, new int[] { -2 }, new double[] { leafOutput[0], leafOutput[0] }, categoricalSplitFeatures, categoricalSplit); } res.AddTree(tree); } } else { ++i; } } return(res); }
public void AdjustTreeOutputs(IChannel ch, RegressionTree tree, DocumentPartitioning partitioning, ScoreTracker trainingScores) { }
private RegressionTree createTree(string name, Test testSample) { RegressionTree tree = new RegressionTree(testSample, name, Penalty); return(tree); }