public void AdjustTreeOutputs(IChannel ch, InternalRegressionTree tree, DocumentPartitioning partitioning, ScoreTracker trainingScores) { const double epsilon = 1.4e-45; double multiplier = LearningRate * Shrinkage; double[] means = null; if (!BestStepRankingRegressionTrees) { means = _parallelTraining.GlobalMean(Dataset, tree, partitioning, Weights, false); } for (int l = 0; l < tree.NumLeaves; ++l) { double output = tree.GetOutput(l); if (BestStepRankingRegressionTrees) { output *= multiplier; } else { output = multiplier * (output + epsilon) / (means[l] + epsilon); } if (output > MaxTreeOutput) { output = MaxTreeOutput; } else if (output < -MaxTreeOutput) { output = -MaxTreeOutput; } tree.SetOutput(l, output); } }
public void AdjustTreeOutputs(IChannel ch, InternalRegressionTree tree, DocumentPartitioning partitioning, ScoreTracker trainingScores) { double shrinkage = LearningRate * Shrinkage; for (int l = 0; l < tree.NumLeaves; ++l) { double output = tree.GetOutput(l) * shrinkage; tree.SetOutput(l, output); } }
public void AdjustTreeOutputs(IChannel ch, InternalRegressionTree tree, DocumentPartitioning partitioning, ScoreTracker trainingScores) { double shrinkage = LearningRate * Shrinkage; var scores = trainingScores.Scores; var weights = trainingScores.Dataset.SampleWeights; // Following equation 18, and line 2c of algorithm 1 in the source paper. for (int l = 0; l < tree.NumLeaves; ++l) { Double num = 0; Double denom = 0; if (_index1 == 0) { // The index == 1 Poisson case. foreach (int i in partitioning.DocumentsInLeaf(l)) { var s = scores[i]; var w = weights == null ? 1 : weights[i]; num += w * _labels[i]; denom += w * Math.Exp(s); } } else { // The index in (1,2] case. foreach (int i in partitioning.DocumentsInLeaf(l)) { var s = scores[i]; var w = weights == null ? 1 : weights[i]; num += w * _labels[i] * Math.Exp(_index1 * s); denom += w * Math.Exp(_index2 * s); } } var step = shrinkage * (Math.Log(num) - Math.Log(denom)); if (num == 0 && denom == 0) { step = 0; } // If we do not clamp, it is entirely possible for num to be 0 (with 0 labels), which // means that we will have negative infinities in the leaf nodes. This has a number of // bad negative effects we'd prefer to avoid. Nonetheless, we do give up a substantial // amount of "gain" for those examples. if (step < -_maxClamp) { step = -_maxClamp; } else if (step > _maxClamp) { step = _maxClamp; } tree.SetOutput(l, step); } }
IPredictor IModelCombiner.CombineModels(IEnumerable <IPredictor> models) { _host.CheckValue(models, nameof(models)); var ensemble = new InternalTreeEnsemble(); int modelCount = 0; int featureCount = -1; bool binaryClassifier = false; foreach (var model in models) { modelCount++; var predictor = model; _host.CheckValue(predictor, nameof(models), "One of the models is null"); var calibrated = predictor as IWeaklyTypedCalibratedModelParameters; double paramA = 1; if (calibrated != null) { _host.Check(calibrated.WeeklyTypedCalibrator is PlattCalibrator, "Combining FastTree models can only be done when the models are calibrated with Platt calibrator"); } predictor = calibrated.WeeklyTypedSubModel; paramA = -((PlattCalibrator)calibrated.WeeklyTypedCalibrator).Slope; var tree = predictor as TreeEnsembleModelParameters; if (tree == null) { throw _host.Except("Model is not a tree ensemble"); } foreach (var t in tree.TrainedEnsemble.Trees) { var bytes = new byte[t.SizeInBytes()]; int position = -1; t.ToByteArray(bytes, ref position); position = -1; var tNew = new InternalRegressionTree(bytes, ref position); if (paramA != 1) { for (int i = 0; i < tNew.NumLeaves; i++) { tNew.SetOutput(i, tNew.LeafValues[i] * paramA); } } ensemble.AddTree(tNew); } if (modelCount == 1) { binaryClassifier = calibrated != null; featureCount = tree.InputType.GetValueCount(); } else { _host.Check((calibrated != null) == binaryClassifier, "Ensemble contains both calibrated and uncalibrated models"); _host.Check(featureCount == tree.InputType.GetValueCount(), "Found models with different number of features"); } } var scale = 1 / (double)modelCount; foreach (var t in ensemble.Trees) { for (int i = 0; i < t.NumLeaves; i++) { t.SetOutput(i, t.LeafValues[i] * scale); } } switch (_kind) { case PredictionKind.BinaryClassification: if (!binaryClassifier) { return(new FastTreeBinaryModelParameters(_host, ensemble, featureCount, null)); } var cali = new PlattCalibrator(_host, -1, 0); var fastTreeModel = new FastTreeBinaryModelParameters(_host, ensemble, featureCount, null); return(new FeatureWeightsCalibratedModelParameters <FastTreeBinaryModelParameters, PlattCalibrator>(_host, fastTreeModel, cali)); case PredictionKind.Regression: return(new FastTreeRegressionModelParameters(_host, ensemble, featureCount, null)); case PredictionKind.Ranking: return(new FastTreeRankingModelParameters(_host, ensemble, featureCount, null)); default: _host.Assert(false); throw _host.ExceptNotSupp(); } }