Exemple #1
0
        protected override IPredictorWithFeatureWeights <float> TrainModelCore(TrainContext context)
        {
            Host.CheckValue(context, nameof(context));
            var trainData = context.TrainingSet;

            ValidData = context.ValidationSet;

            using (var ch = Host.Start("Training"))
            {
                ch.CheckValue(trainData, nameof(trainData));
                trainData.CheckBinaryLabel();
                trainData.CheckFeatureFloatVector();
                trainData.CheckOptFloatWeight();
                FeatureCount = trainData.Schema.Feature.Type.ValueCount;
                ConvertData(trainData);
                TrainCore(ch);
            }

            // The FastTree binary classification boosting is naturally calibrated to
            // output probabilities when transformed using a scaled logistic function,
            // so transform the scores using that.

            var pred = new FastTreeBinaryPredictor(Host, TrainedEnsemble, FeatureCount, InnerArgs);
            // FastTree's binary classification boosting framework's natural probabilistic interpretation
            // is explained in "From RankNet to LambdaRank to LambdaMART: An Overview" by Chris Burges.
            // The correctness of this scaling depends upon the gradient calculation in
            // BinaryClassificationObjectiveFunction.GetGradientInOneQuery being consistent with the
            // description in section 6 of the paper.
            var cali = new PlattCalibrator(Host, -1 * _sigmoidParameter, 0);

            return(new FeatureWeightsCalibratedPredictor(Host, pred, cali));
        }
Exemple #2
0
        private protected override OneVersusAllModelParameters CreatePredictor()
        {
            Host.Check(TrainedEnsemble != null, "The predictor cannot be created before training is complete.");

            Host.Assert(_numClass > 1, "Must know the number of classes before creating a predictor.");
            Host.Assert(TrainedEnsemble.NumTrees % _numClass == 0, "Number of trees should be a multiple of number of classes.");

            var innerArgs = LightGbmInterfaceUtils.JoinParameters(GbmOptions);

            IPredictorProducing <float>[] predictors = new IPredictorProducing <float> [_tlcNumClass];
            for (int i = 0; i < _tlcNumClass; ++i)
            {
                var pred = CreateBinaryPredictor(i, innerArgs);
                var cali = new PlattCalibrator(Host, -0.5, 0);
                predictors[i] = new FeatureWeightsCalibratedModelParameters <LightGbmBinaryModelParameters, PlattCalibrator>(Host, pred, cali);
            }
            string obj = (string)GetGbmParameters()["objective"];

            if (obj == "multiclass")
            {
                return(OneVersusAllModelParameters.Create(Host, OneVersusAllModelParameters.OutputFormula.Softmax, predictors));
            }
            else
            {
                return(OneVersusAllModelParameters.Create(Host, predictors));
            }
        }
Exemple #3
0
        /// <summary>
        /// Output the weights of a linear model to a given writer
        /// </summary>
        public static string LinearModelAsText(
            string userName, string loadName, string settings, ref VBuffer <Float> weights, Float bias,
            RoleMappedSchema schema = null, PlattCalibrator calibrator = null)
        {
            // Review: added a text description for each calibrator (not only Platt), would be nice to add to this method.
            // Would it mess with the baselines a lot?
            StringBuilder b = new StringBuilder();

            if (!string.IsNullOrWhiteSpace(userName))
            {
                b.Append(userName).Append(" ");
            }

            b.Append("non-zero weights");
            if (!string.IsNullOrWhiteSpace(loadName))
            {
                b.Append(" trained as /cl ").Append(loadName);
                if (!string.IsNullOrWhiteSpace(settings))
                {
                    b.Append(" { ").Append(settings).Append(" }");
                }
            }
            b.AppendLine();

            List <KeyValuePair <string, object> > weightValues = new List <KeyValuePair <string, object> >();

            SaveLinearModelWeightsInKeyValuePairs(ref weights, bias, schema, weightValues);
            foreach (var weightValue in weightValues)
            {
                Contracts.Assert(weightValue.Value is Float);
                b.AppendLine().AppendFormat("{0}\t{1}", weightValue.Key, (Float)weightValue.Value);
            }

            return(b.ToString());
        }
Exemple #4
0
        private protected override IPredictorWithFeatureWeights <double> CreateManagedPredictor()
        {
            var pred = new BinaryPredictor(TrainedEnsemble, FeatureCount, AverageOutput);
            var cali = new PlattCalibrator(-Objective.Sigmoid);

            return(new CalibratedPredictor(pred, cali));
        }
        private protected override IPredictorProducing <float> TrainModelCore(TrainContext context)
        {
            TrainBase(context);
            var predictor = new BinaryClassGamPredictor(Host, InputLength, TrainSet,
                                                        MeanEffect, BinEffects, FeatureMap);
            var calibrator = new PlattCalibrator(Host, -1.0 * _sigmoidParameter, 0);

            return(new CalibratedPredictor(Host, predictor, calibrator));
        }
Exemple #6
0
        private protected override IPredictorWithFeatureWeights <float> CreatePredictor()
        {
            Host.Check(TrainedEnsemble != null, "The predictor cannot be created before training is complete");
            var innerArgs = LightGbmInterfaceUtils.JoinParameters(Options);
            var pred      = new LightGbmBinaryModelParameters(Host, TrainedEnsemble, FeatureCount, innerArgs);
            var cali      = new PlattCalibrator(Host, -0.5, 0);

            return(new FeatureWeightsCalibratedPredictor(Host, pred, cali));
        }
        private protected override CalibratedPredictorBase TrainModelCore(TrainContext context)
        {
            TrainBase(context);
            var predictor = new BinaryClassificationGamModelParameters(Host,
                                                                       BinUpperBounds, BinEffects, MeanEffect, InputLength, FeatureMap);
            var calibrator = new PlattCalibrator(Host, -1.0 * _sigmoidParameter, 0);

            return(new CalibratedPredictor(Host, predictor, calibrator));
        }
Exemple #8
0
 public static string GetCalibratorEvaluatorIni(string originalIni, PlattCalibrator calibrator)
 {
     // Bing-style output as a second evaluator
     // Sigmoid: P(z) = 1/(1+exp(-z)).
     // Calibrator: P(x) = 1/(1+exp(ax+b)), where x is output of model (evaluator 1)
     //  => z = -ax + -b
     StringBuilder newEvaluator = new StringBuilder();
     newEvaluator.AppendLine("EvaluatorType=Aggregator");
     newEvaluator.AppendLine("Type=Sigmoid");
     newEvaluator.AppendLine("Bias=" + -calibrator.Offset);
     newEvaluator.AppendLine("NumNodes=1");
     newEvaluator.AppendLine("Nodes=E:" + NumEvaluators(originalIni));
     newEvaluator.AppendLine("Weights=" + -calibrator.Slope);
     return newEvaluator.ToString();
 }
        private protected override OvaPredictor CreatePredictor()
        {
            Host.Check(TrainedEnsemble != null, "The predictor cannot be created before training is complete.");

            Host.Assert(_numClass > 1, "Must know the number of classes before creating a predictor.");
            Host.Assert(TrainedEnsemble.NumTrees % _numClass == 0, "Number of trees should be a multiple of number of classes.");

            var innerArgs = LightGbmInterfaceUtils.JoinParameters(Options);
            IPredictorProducing<float>[] predictors = new IPredictorProducing<float>[_tlcNumClass];
            for (int i = 0; i < _tlcNumClass; ++i)
            {
                var pred = CreateBinaryPredictor(i, innerArgs);
                var cali = new PlattCalibrator(Host, -0.5, 0);
                predictors[i] = new FeatureWeightsCalibratedPredictor(Host, pred, cali);
            }
            return OvaPredictor.Create(Host, predictors);
        }
Exemple #10
0
        public override IPredictorWithFeatureWeights <Float> CreatePredictor()
        {
            Host.Check(TrainedEnsemble != null,
                       "The predictor cannot be created before training is complete");

            // The FastTree binary classification boosting is naturally calibrated to
            // output probabilities when transformed using a scaled logistic function,
            // so transform the scores using that.

            var pred = new FastTreeBinaryPredictor(Host, TrainedEnsemble, FeatureCount, InnerArgs);
            // FastTree's binary classification boosting framework's natural probabilistic interpretation
            // is explained in "From RankNet to LambdaRank to LambdaMART: An Overview" by Chris Burges.
            // The correctness of this scaling depends upon the gradient calculation in
            // BinaryClassificationObjectiveFunction.GetGradientInOneQuery being consistent with the
            // description in section 6 of the paper.
            var cali = new PlattCalibrator(Host, -2 * Args.LearningRates, 0);

            return(new FeatureWeightsCalibratedPredictor(Host, pred, cali));
        }
Exemple #11
0
        IPredictor IModelCombiner.CombineModels(IEnumerable <IPredictor> models)
        {
            _host.CheckValue(models, nameof(models));

            var  ensemble         = new InternalTreeEnsemble();
            int  modelCount       = 0;
            int  featureCount     = -1;
            bool binaryClassifier = false;

            foreach (var model in models)
            {
                modelCount++;

                var predictor = model;
                _host.CheckValue(predictor, nameof(models), "One of the models is null");

                var    calibrated = predictor as IWeaklyTypedCalibratedModelParameters;
                double paramA     = 1;
                if (calibrated != null)
                {
                    _host.Check(calibrated.WeeklyTypedCalibrator is PlattCalibrator,
                                "Combining FastTree models can only be done when the models are calibrated with Platt calibrator");
                }

                predictor = calibrated.WeeklyTypedSubModel;
                paramA    = -((PlattCalibrator)calibrated.WeeklyTypedCalibrator).Slope;

                var tree = predictor as TreeEnsembleModelParameters;

                if (tree == null)
                {
                    throw _host.Except("Model is not a tree ensemble");
                }
                foreach (var t in tree.TrainedEnsemble.Trees)
                {
                    var bytes    = new byte[t.SizeInBytes()];
                    int position = -1;
                    t.ToByteArray(bytes, ref position);
                    position = -1;
                    var tNew = new InternalRegressionTree(bytes, ref position);
                    if (paramA != 1)
                    {
                        for (int i = 0; i < tNew.NumLeaves; i++)
                        {
                            tNew.SetOutput(i, tNew.LeafValues[i] * paramA);
                        }
                    }
                    ensemble.AddTree(tNew);
                }

                if (modelCount == 1)
                {
                    binaryClassifier = calibrated != null;
                    featureCount     = tree.InputType.GetValueCount();
                }
                else
                {
                    _host.Check((calibrated != null) == binaryClassifier, "Ensemble contains both calibrated and uncalibrated models");
                    _host.Check(featureCount == tree.InputType.GetValueCount(), "Found models with different number of features");
                }
            }

            var scale = 1 / (double)modelCount;

            foreach (var t in ensemble.Trees)
            {
                for (int i = 0; i < t.NumLeaves; i++)
                {
                    t.SetOutput(i, t.LeafValues[i] * scale);
                }
            }

            switch (_kind)
            {
            case PredictionKind.BinaryClassification:
                if (!binaryClassifier)
                {
                    return(new FastTreeBinaryModelParameters(_host, ensemble, featureCount, null));
                }

                var cali          = new PlattCalibrator(_host, -1, 0);
                var fastTreeModel = new FastTreeBinaryModelParameters(_host, ensemble, featureCount, null);
                return(new FeatureWeightsCalibratedModelParameters <FastTreeBinaryModelParameters, PlattCalibrator>(_host, fastTreeModel, cali));

            case PredictionKind.Regression:
                return(new FastTreeRegressionModelParameters(_host, ensemble, featureCount, null));

            case PredictionKind.Ranking:
                return(new FastTreeRankingModelParameters(_host, ensemble, featureCount, null));

            default:
                _host.Assert(false);
                throw _host.ExceptNotSupp();
            }
        }
Exemple #12
0
        /// <summary>
        /// Build a Bing TreeEnsemble .ini representation of the given predictor
        /// </summary>
        public static string LinearModelAsIni(ref VBuffer <Float> weights, Float bias, IPredictor predictor = null,
                                              RoleMappedSchema schema = null, PlattCalibrator calibrator = null)
        {
            // TODO: Might need to consider a max line length for the Weights list, requiring us to split it up into
            //   multiple evaluators
            StringBuilder inputBuilder           = new StringBuilder();
            StringBuilder aggregatedNodesBuilder = new StringBuilder("Nodes=");
            StringBuilder weightsBuilder         = new StringBuilder("Weights=");

            var featureNames = default(VBuffer <ReadOnlyMemory <char> >);

            MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Length, ref featureNames);

            int          numNonZeroWeights = 0;
            const string weightsSep        = "\t";

            VBufferUtils.ForEachDefined(ref weights,
                                        (idx, value) =>
            {
                if (Math.Abs(value - 0) >= Epsilon)
                {
                    numNonZeroWeights++;

                    var name = featureNames.GetItemOrDefault(idx);

                    inputBuilder.AppendLine("[Input:" + numNonZeroWeights + "]");
                    inputBuilder.AppendLine("Name=" + (featureNames.Count == 0 ? "Feature_" + idx : name.IsEmpty ? $"f{idx}" : name.ToString()));
                    inputBuilder.AppendLine("Transform=linear");
                    inputBuilder.AppendLine("Slope=1");
                    inputBuilder.AppendLine("Intercept=0");
                    inputBuilder.AppendLine();

                    aggregatedNodesBuilder.Append("I:" + numNonZeroWeights + weightsSep);
                    weightsBuilder.Append(value + weightsSep);
                }
            });

            StringBuilder builder = new StringBuilder();

            builder.AppendLine("[TreeEnsemble]");
            builder.AppendLine("Inputs=" + numNonZeroWeights);
            builder.AppendLine("Evaluators=1");
            builder.AppendLine();

            builder.AppendLine(inputBuilder.ToString());

            builder.AppendLine("[Evaluator:1]");
            builder.AppendLine("EvaluatorType=Aggregator");
            builder.AppendLine("Type=Linear");
            builder.AppendLine("Bias=" + bias);
            builder.AppendLine("NumNodes=" + numNonZeroWeights);
            builder.AppendLine(aggregatedNodesBuilder.ToString().Trim());
            builder.AppendLine(weightsBuilder.ToString().Trim());

#if false // REVIEW: This should be done by the caller using the actual training args!
            builder.AppendLine();
            builder.AppendLine("[Comments]");
            builder.Append("Trained by TLC");
            if (predictor != null)
            {
                builder.Append(" as /cl " + predictor.GetType().Name);
                if (predictor is IInitializable)
                {
                    string settings = string.Join(";", (predictor as IInitializable).GetSettings());
                    if (!string.IsNullOrEmpty(settings))
                    {
                        builder.Append(" /cls " + settings);
                    }
                }
            }
#endif

            string ini = builder.ToString();

            // Add the calibration if the model was trained with calibration
            if (calibrator != null)
            {
                string calibratorEvaluatorIni = IniFileUtils.GetCalibratorEvaluatorIni(ini, calibrator);
                ini = IniFileUtils.AddEvaluator(ini, calibratorEvaluatorIni);
            }
            return(ini);
        }
Exemple #13
0
        public IPredictor CombineModels(IEnumerable<IPredictor> models)
        {
            _host.CheckValue(models, nameof(models));

            var ensemble = new Ensemble();
            int modelCount = 0;
            int featureCount = -1;
            bool binaryClassifier = false;
            foreach (var model in models)
            {
                modelCount++;

                var predictor = model;
                _host.CheckValue(predictor, nameof(models), "One of the models is null");

                var calibrated = predictor as CalibratedPredictorBase;
                double paramA = 1;
                if (calibrated != null)
                {
                    _host.Check(calibrated.Calibrator is PlattCalibrator,
                        "Combining FastTree models can only be done when the models are calibrated with Platt calibrator");
                    predictor = calibrated.SubPredictor;
                    paramA = -(calibrated.Calibrator as PlattCalibrator).ParamA;
                }
                var tree = predictor as FastTreePredictionWrapper;
                if (tree == null)
                    throw _host.Except("Model is not a tree ensemble");
                foreach (var t in tree.TrainedEnsemble.Trees)
                {
                    var bytes = new byte[t.SizeInBytes()];
                    int position = -1;
                    t.ToByteArray(bytes, ref position);
                    position = -1;
                    var tNew = new RegressionTree(bytes, ref position);
                    if (paramA != 1)
                    {
                        for (int i = 0; i < tNew.NumLeaves; i++)
                            tNew.SetOutput(i, tNew.LeafValues[i] * paramA);
                    }
                    ensemble.AddTree(tNew);
                }

                if (modelCount == 1)
                {
                    binaryClassifier = calibrated != null;
                    featureCount = tree.InputType.ValueCount;
                }
                else
                {
                    _host.Check((calibrated != null) == binaryClassifier, "Ensemble contains both calibrated and uncalibrated models");
                    _host.Check(featureCount == tree.InputType.ValueCount, "Found models with different number of features");
                }
            }

            var scale = 1 / (double)modelCount;

            foreach (var t in ensemble.Trees)
            {
                for (int i = 0; i < t.NumLeaves; i++)
                    t.SetOutput(i, t.LeafValues[i] * scale);
            }

            switch (_kind)
            {
                case PredictionKind.BinaryClassification:
                    if (!binaryClassifier)
                        return new FastTreeBinaryPredictor(_host, ensemble, featureCount, null);

                    var cali = new PlattCalibrator(_host, -1, 0);
                    return new FeatureWeightsCalibratedPredictor(_host, new FastTreeBinaryPredictor(_host, ensemble, featureCount, null), cali);
                case PredictionKind.Regression:
                    return new FastTreeRegressionPredictor(_host, ensemble, featureCount, null);
                case PredictionKind.Ranking:
                    return new FastTreeRankingPredictor(_host, ensemble, featureCount, null);
                default:
                    _host.Assert(false);
                    throw _host.ExceptNotSupp();
            }
        }