protected override IPredictorWithFeatureWeights <float> TrainModelCore(TrainContext context) { Host.CheckValue(context, nameof(context)); var trainData = context.TrainingSet; ValidData = context.ValidationSet; using (var ch = Host.Start("Training")) { ch.CheckValue(trainData, nameof(trainData)); trainData.CheckBinaryLabel(); trainData.CheckFeatureFloatVector(); trainData.CheckOptFloatWeight(); FeatureCount = trainData.Schema.Feature.Type.ValueCount; ConvertData(trainData); TrainCore(ch); } // The FastTree binary classification boosting is naturally calibrated to // output probabilities when transformed using a scaled logistic function, // so transform the scores using that. var pred = new FastTreeBinaryPredictor(Host, TrainedEnsemble, FeatureCount, InnerArgs); // FastTree's binary classification boosting framework's natural probabilistic interpretation // is explained in "From RankNet to LambdaRank to LambdaMART: An Overview" by Chris Burges. // The correctness of this scaling depends upon the gradient calculation in // BinaryClassificationObjectiveFunction.GetGradientInOneQuery being consistent with the // description in section 6 of the paper. var cali = new PlattCalibrator(Host, -1 * _sigmoidParameter, 0); return(new FeatureWeightsCalibratedPredictor(Host, pred, cali)); }
private protected override OneVersusAllModelParameters CreatePredictor() { Host.Check(TrainedEnsemble != null, "The predictor cannot be created before training is complete."); Host.Assert(_numClass > 1, "Must know the number of classes before creating a predictor."); Host.Assert(TrainedEnsemble.NumTrees % _numClass == 0, "Number of trees should be a multiple of number of classes."); var innerArgs = LightGbmInterfaceUtils.JoinParameters(GbmOptions); IPredictorProducing <float>[] predictors = new IPredictorProducing <float> [_tlcNumClass]; for (int i = 0; i < _tlcNumClass; ++i) { var pred = CreateBinaryPredictor(i, innerArgs); var cali = new PlattCalibrator(Host, -0.5, 0); predictors[i] = new FeatureWeightsCalibratedModelParameters <LightGbmBinaryModelParameters, PlattCalibrator>(Host, pred, cali); } string obj = (string)GetGbmParameters()["objective"]; if (obj == "multiclass") { return(OneVersusAllModelParameters.Create(Host, OneVersusAllModelParameters.OutputFormula.Softmax, predictors)); } else { return(OneVersusAllModelParameters.Create(Host, predictors)); } }
/// <summary> /// Output the weights of a linear model to a given writer /// </summary> public static string LinearModelAsText( string userName, string loadName, string settings, ref VBuffer <Float> weights, Float bias, RoleMappedSchema schema = null, PlattCalibrator calibrator = null) { // Review: added a text description for each calibrator (not only Platt), would be nice to add to this method. // Would it mess with the baselines a lot? StringBuilder b = new StringBuilder(); if (!string.IsNullOrWhiteSpace(userName)) { b.Append(userName).Append(" "); } b.Append("non-zero weights"); if (!string.IsNullOrWhiteSpace(loadName)) { b.Append(" trained as /cl ").Append(loadName); if (!string.IsNullOrWhiteSpace(settings)) { b.Append(" { ").Append(settings).Append(" }"); } } b.AppendLine(); List <KeyValuePair <string, object> > weightValues = new List <KeyValuePair <string, object> >(); SaveLinearModelWeightsInKeyValuePairs(ref weights, bias, schema, weightValues); foreach (var weightValue in weightValues) { Contracts.Assert(weightValue.Value is Float); b.AppendLine().AppendFormat("{0}\t{1}", weightValue.Key, (Float)weightValue.Value); } return(b.ToString()); }
private protected override IPredictorWithFeatureWeights <double> CreateManagedPredictor() { var pred = new BinaryPredictor(TrainedEnsemble, FeatureCount, AverageOutput); var cali = new PlattCalibrator(-Objective.Sigmoid); return(new CalibratedPredictor(pred, cali)); }
private protected override IPredictorProducing <float> TrainModelCore(TrainContext context) { TrainBase(context); var predictor = new BinaryClassGamPredictor(Host, InputLength, TrainSet, MeanEffect, BinEffects, FeatureMap); var calibrator = new PlattCalibrator(Host, -1.0 * _sigmoidParameter, 0); return(new CalibratedPredictor(Host, predictor, calibrator)); }
private protected override IPredictorWithFeatureWeights <float> CreatePredictor() { Host.Check(TrainedEnsemble != null, "The predictor cannot be created before training is complete"); var innerArgs = LightGbmInterfaceUtils.JoinParameters(Options); var pred = new LightGbmBinaryModelParameters(Host, TrainedEnsemble, FeatureCount, innerArgs); var cali = new PlattCalibrator(Host, -0.5, 0); return(new FeatureWeightsCalibratedPredictor(Host, pred, cali)); }
private protected override CalibratedPredictorBase TrainModelCore(TrainContext context) { TrainBase(context); var predictor = new BinaryClassificationGamModelParameters(Host, BinUpperBounds, BinEffects, MeanEffect, InputLength, FeatureMap); var calibrator = new PlattCalibrator(Host, -1.0 * _sigmoidParameter, 0); return(new CalibratedPredictor(Host, predictor, calibrator)); }
public static string GetCalibratorEvaluatorIni(string originalIni, PlattCalibrator calibrator) { // Bing-style output as a second evaluator // Sigmoid: P(z) = 1/(1+exp(-z)). // Calibrator: P(x) = 1/(1+exp(ax+b)), where x is output of model (evaluator 1) // => z = -ax + -b StringBuilder newEvaluator = new StringBuilder(); newEvaluator.AppendLine("EvaluatorType=Aggregator"); newEvaluator.AppendLine("Type=Sigmoid"); newEvaluator.AppendLine("Bias=" + -calibrator.Offset); newEvaluator.AppendLine("NumNodes=1"); newEvaluator.AppendLine("Nodes=E:" + NumEvaluators(originalIni)); newEvaluator.AppendLine("Weights=" + -calibrator.Slope); return newEvaluator.ToString(); }
private protected override OvaPredictor CreatePredictor() { Host.Check(TrainedEnsemble != null, "The predictor cannot be created before training is complete."); Host.Assert(_numClass > 1, "Must know the number of classes before creating a predictor."); Host.Assert(TrainedEnsemble.NumTrees % _numClass == 0, "Number of trees should be a multiple of number of classes."); var innerArgs = LightGbmInterfaceUtils.JoinParameters(Options); IPredictorProducing<float>[] predictors = new IPredictorProducing<float>[_tlcNumClass]; for (int i = 0; i < _tlcNumClass; ++i) { var pred = CreateBinaryPredictor(i, innerArgs); var cali = new PlattCalibrator(Host, -0.5, 0); predictors[i] = new FeatureWeightsCalibratedPredictor(Host, pred, cali); } return OvaPredictor.Create(Host, predictors); }
public override IPredictorWithFeatureWeights <Float> CreatePredictor() { Host.Check(TrainedEnsemble != null, "The predictor cannot be created before training is complete"); // The FastTree binary classification boosting is naturally calibrated to // output probabilities when transformed using a scaled logistic function, // so transform the scores using that. var pred = new FastTreeBinaryPredictor(Host, TrainedEnsemble, FeatureCount, InnerArgs); // FastTree's binary classification boosting framework's natural probabilistic interpretation // is explained in "From RankNet to LambdaRank to LambdaMART: An Overview" by Chris Burges. // The correctness of this scaling depends upon the gradient calculation in // BinaryClassificationObjectiveFunction.GetGradientInOneQuery being consistent with the // description in section 6 of the paper. var cali = new PlattCalibrator(Host, -2 * Args.LearningRates, 0); return(new FeatureWeightsCalibratedPredictor(Host, pred, cali)); }
IPredictor IModelCombiner.CombineModels(IEnumerable <IPredictor> models) { _host.CheckValue(models, nameof(models)); var ensemble = new InternalTreeEnsemble(); int modelCount = 0; int featureCount = -1; bool binaryClassifier = false; foreach (var model in models) { modelCount++; var predictor = model; _host.CheckValue(predictor, nameof(models), "One of the models is null"); var calibrated = predictor as IWeaklyTypedCalibratedModelParameters; double paramA = 1; if (calibrated != null) { _host.Check(calibrated.WeeklyTypedCalibrator is PlattCalibrator, "Combining FastTree models can only be done when the models are calibrated with Platt calibrator"); } predictor = calibrated.WeeklyTypedSubModel; paramA = -((PlattCalibrator)calibrated.WeeklyTypedCalibrator).Slope; var tree = predictor as TreeEnsembleModelParameters; if (tree == null) { throw _host.Except("Model is not a tree ensemble"); } foreach (var t in tree.TrainedEnsemble.Trees) { var bytes = new byte[t.SizeInBytes()]; int position = -1; t.ToByteArray(bytes, ref position); position = -1; var tNew = new InternalRegressionTree(bytes, ref position); if (paramA != 1) { for (int i = 0; i < tNew.NumLeaves; i++) { tNew.SetOutput(i, tNew.LeafValues[i] * paramA); } } ensemble.AddTree(tNew); } if (modelCount == 1) { binaryClassifier = calibrated != null; featureCount = tree.InputType.GetValueCount(); } else { _host.Check((calibrated != null) == binaryClassifier, "Ensemble contains both calibrated and uncalibrated models"); _host.Check(featureCount == tree.InputType.GetValueCount(), "Found models with different number of features"); } } var scale = 1 / (double)modelCount; foreach (var t in ensemble.Trees) { for (int i = 0; i < t.NumLeaves; i++) { t.SetOutput(i, t.LeafValues[i] * scale); } } switch (_kind) { case PredictionKind.BinaryClassification: if (!binaryClassifier) { return(new FastTreeBinaryModelParameters(_host, ensemble, featureCount, null)); } var cali = new PlattCalibrator(_host, -1, 0); var fastTreeModel = new FastTreeBinaryModelParameters(_host, ensemble, featureCount, null); return(new FeatureWeightsCalibratedModelParameters <FastTreeBinaryModelParameters, PlattCalibrator>(_host, fastTreeModel, cali)); case PredictionKind.Regression: return(new FastTreeRegressionModelParameters(_host, ensemble, featureCount, null)); case PredictionKind.Ranking: return(new FastTreeRankingModelParameters(_host, ensemble, featureCount, null)); default: _host.Assert(false); throw _host.ExceptNotSupp(); } }
/// <summary> /// Build a Bing TreeEnsemble .ini representation of the given predictor /// </summary> public static string LinearModelAsIni(ref VBuffer <Float> weights, Float bias, IPredictor predictor = null, RoleMappedSchema schema = null, PlattCalibrator calibrator = null) { // TODO: Might need to consider a max line length for the Weights list, requiring us to split it up into // multiple evaluators StringBuilder inputBuilder = new StringBuilder(); StringBuilder aggregatedNodesBuilder = new StringBuilder("Nodes="); StringBuilder weightsBuilder = new StringBuilder("Weights="); var featureNames = default(VBuffer <ReadOnlyMemory <char> >); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Length, ref featureNames); int numNonZeroWeights = 0; const string weightsSep = "\t"; VBufferUtils.ForEachDefined(ref weights, (idx, value) => { if (Math.Abs(value - 0) >= Epsilon) { numNonZeroWeights++; var name = featureNames.GetItemOrDefault(idx); inputBuilder.AppendLine("[Input:" + numNonZeroWeights + "]"); inputBuilder.AppendLine("Name=" + (featureNames.Count == 0 ? "Feature_" + idx : name.IsEmpty ? $"f{idx}" : name.ToString())); inputBuilder.AppendLine("Transform=linear"); inputBuilder.AppendLine("Slope=1"); inputBuilder.AppendLine("Intercept=0"); inputBuilder.AppendLine(); aggregatedNodesBuilder.Append("I:" + numNonZeroWeights + weightsSep); weightsBuilder.Append(value + weightsSep); } }); StringBuilder builder = new StringBuilder(); builder.AppendLine("[TreeEnsemble]"); builder.AppendLine("Inputs=" + numNonZeroWeights); builder.AppendLine("Evaluators=1"); builder.AppendLine(); builder.AppendLine(inputBuilder.ToString()); builder.AppendLine("[Evaluator:1]"); builder.AppendLine("EvaluatorType=Aggregator"); builder.AppendLine("Type=Linear"); builder.AppendLine("Bias=" + bias); builder.AppendLine("NumNodes=" + numNonZeroWeights); builder.AppendLine(aggregatedNodesBuilder.ToString().Trim()); builder.AppendLine(weightsBuilder.ToString().Trim()); #if false // REVIEW: This should be done by the caller using the actual training args! builder.AppendLine(); builder.AppendLine("[Comments]"); builder.Append("Trained by TLC"); if (predictor != null) { builder.Append(" as /cl " + predictor.GetType().Name); if (predictor is IInitializable) { string settings = string.Join(";", (predictor as IInitializable).GetSettings()); if (!string.IsNullOrEmpty(settings)) { builder.Append(" /cls " + settings); } } } #endif string ini = builder.ToString(); // Add the calibration if the model was trained with calibration if (calibrator != null) { string calibratorEvaluatorIni = IniFileUtils.GetCalibratorEvaluatorIni(ini, calibrator); ini = IniFileUtils.AddEvaluator(ini, calibratorEvaluatorIni); } return(ini); }
public IPredictor CombineModels(IEnumerable<IPredictor> models) { _host.CheckValue(models, nameof(models)); var ensemble = new Ensemble(); int modelCount = 0; int featureCount = -1; bool binaryClassifier = false; foreach (var model in models) { modelCount++; var predictor = model; _host.CheckValue(predictor, nameof(models), "One of the models is null"); var calibrated = predictor as CalibratedPredictorBase; double paramA = 1; if (calibrated != null) { _host.Check(calibrated.Calibrator is PlattCalibrator, "Combining FastTree models can only be done when the models are calibrated with Platt calibrator"); predictor = calibrated.SubPredictor; paramA = -(calibrated.Calibrator as PlattCalibrator).ParamA; } var tree = predictor as FastTreePredictionWrapper; if (tree == null) throw _host.Except("Model is not a tree ensemble"); foreach (var t in tree.TrainedEnsemble.Trees) { var bytes = new byte[t.SizeInBytes()]; int position = -1; t.ToByteArray(bytes, ref position); position = -1; var tNew = new RegressionTree(bytes, ref position); if (paramA != 1) { for (int i = 0; i < tNew.NumLeaves; i++) tNew.SetOutput(i, tNew.LeafValues[i] * paramA); } ensemble.AddTree(tNew); } if (modelCount == 1) { binaryClassifier = calibrated != null; featureCount = tree.InputType.ValueCount; } else { _host.Check((calibrated != null) == binaryClassifier, "Ensemble contains both calibrated and uncalibrated models"); _host.Check(featureCount == tree.InputType.ValueCount, "Found models with different number of features"); } } var scale = 1 / (double)modelCount; foreach (var t in ensemble.Trees) { for (int i = 0; i < t.NumLeaves; i++) t.SetOutput(i, t.LeafValues[i] * scale); } switch (_kind) { case PredictionKind.BinaryClassification: if (!binaryClassifier) return new FastTreeBinaryPredictor(_host, ensemble, featureCount, null); var cali = new PlattCalibrator(_host, -1, 0); return new FeatureWeightsCalibratedPredictor(_host, new FastTreeBinaryPredictor(_host, ensemble, featureCount, null), cali); case PredictionKind.Regression: return new FastTreeRegressionPredictor(_host, ensemble, featureCount, null); case PredictionKind.Ranking: return new FastTreeRankingPredictor(_host, ensemble, featureCount, null); default: _host.Assert(false); throw _host.ExceptNotSupp(); } }