public OptimizationAlgorithm(InternalTreeEnsemble ensemble, Dataset trainData, double[] initTrainScores) { Ensemble = ensemble; TrainingScores = ConstructScoreTracker("train", trainData, initTrainScores); TrackedScores = new List <ScoreTracker>(); TrackedScores.Add(TrainingScores); DropoutRng = new Random(); UseFastTrainingScoresUpdate = true; }
// Divides output values of leaves to bag count. // This brings back the final scores generated by model on a same // range as when we didn't use bagging internal void ScaleEnsembleLeaves(int numTrees, int bagSize, InternalTreeEnsemble ensemble) { int bagCount = GetBagCount(numTrees, bagSize); for (int t = 0; t < ensemble.NumTrees; t++) { InternalRegressionTree tree = ensemble.GetTreeAt(t); tree.ScaleOutputsBy(1.0 / bagCount); } }
public static string TreeEnsembleToIni( IHost host, InternalTreeEnsemble ensemble, RoleMappedSchema schema, ICalibrator calibrator, string trainingParams, bool appendFeatureGain, bool includeZeroGainFeatures) { host.CheckValue(ensemble, nameof(ensemble)); host.CheckValue(schema, nameof(schema)); string ensembleIni = ensemble.ToTreeEnsembleIni(new FeaturesToContentMap(schema), trainingParams, appendFeatureGain, includeZeroGainFeatures); ensembleIni = AddCalibrationToIni(host, ensembleIni, calibrator); return(ensembleIni); }
bool IEnsembleCompressor <short> .Compress(IChannel ch, InternalTreeEnsemble ensemble, double[] trainScores, int bestIteration, int maxTreesAfterCompression) { LoadTargets(trainScores, bestIteration); LassoFit fit = GetLassoFit(ch, maxTreesAfterCompression); int numberOfSolutions = fit.NumberOfLambdas; int bestSolutionIdx = 0; ch.Info("Compression R2 values:"); for (int i = 0; i < numberOfSolutions; i++) { ch.Info("Solution {0}:\t{1}\t{2}", i + 1, fit.NonZeroWeights[i], fit.Rsquared[i]); } bestSolutionIdx = numberOfSolutions - 1; _compressedEnsemble = GetEnsembleFromSolution(fit, bestSolutionIdx, ensemble); return(true); }
private InternalTreeEnsemble GetEnsembleFromSolution(LassoFit fit, int solutionIdx, InternalTreeEnsemble originalEnsemble) { InternalTreeEnsemble ensemble = new InternalTreeEnsemble(); int weightsCount = fit.NumberOfWeights[solutionIdx]; for (int i = 0; i < weightsCount; i++) { double weight = fit.CompressedWeights[solutionIdx][i]; if (weight != 0) { InternalRegressionTree tree = originalEnsemble.GetTreeAt(fit.Indices[i]); tree.Weight = weight; ensemble.AddTree(tree); } } ensemble.Bias = fit.Intercepts[solutionIdx]; return(ensemble); }
internal GradientDescent(InternalTreeEnsemble ensemble, Dataset trainData, double[] initTrainScores, IGradientAdjuster gradientWrapper) : base(ensemble, trainData, initTrainScores) { _gradientWrapper = gradientWrapper; _treeScores = new List <double[]>(); }
public IPredictor CombineModels(IEnumerable <IPredictor> models) { _host.CheckValue(models, nameof(models)); var ensemble = new InternalTreeEnsemble(); int modelCount = 0; int featureCount = -1; bool binaryClassifier = false; foreach (var model in models) { modelCount++; var predictor = model; _host.CheckValue(predictor, nameof(models), "One of the models is null"); var calibrated = predictor as CalibratedPredictorBase; double paramA = 1; if (calibrated != null) { _host.Check(calibrated.Calibrator is PlattCalibrator, "Combining FastTree models can only be done when the models are calibrated with Platt calibrator"); predictor = calibrated.SubPredictor; paramA = -(calibrated.Calibrator as PlattCalibrator).Slope; } var tree = predictor as TreeEnsembleModelParameters; if (tree == null) { throw _host.Except("Model is not a tree ensemble"); } foreach (var t in tree.TrainedEnsemble.Trees) { var bytes = new byte[t.SizeInBytes()]; int position = -1; t.ToByteArray(bytes, ref position); position = -1; var tNew = new InternalRegressionTree(bytes, ref position); if (paramA != 1) { for (int i = 0; i < tNew.NumLeaves; i++) { tNew.SetOutput(i, tNew.LeafValues[i] * paramA); } } ensemble.AddTree(tNew); } if (modelCount == 1) { binaryClassifier = calibrated != null; featureCount = tree.InputType.GetValueCount(); } else { _host.Check((calibrated != null) == binaryClassifier, "Ensemble contains both calibrated and uncalibrated models"); _host.Check(featureCount == tree.InputType.GetValueCount(), "Found models with different number of features"); } } var scale = 1 / (double)modelCount; foreach (var t in ensemble.Trees) { for (int i = 0; i < t.NumLeaves; i++) { t.SetOutput(i, t.LeafValues[i] * scale); } } switch (_kind) { case PredictionKind.BinaryClassification: if (!binaryClassifier) { return(new FastTreeBinaryModelParameters(_host, ensemble, featureCount, null)); } var cali = new PlattCalibrator(_host, -1, 0); return(new FeatureWeightsCalibratedPredictor(_host, new FastTreeBinaryModelParameters(_host, ensemble, featureCount, null), cali)); case PredictionKind.Regression: return(new FastTreeRegressionModelParameters(_host, ensemble, featureCount, null)); case PredictionKind.Ranking: return(new FastTreeRankingModelParameters(_host, ensemble, featureCount, null)); default: _host.Assert(false); throw _host.ExceptNotSupp(); } }