Пример #1
0
 public OptimizationAlgorithm(TreeEnsemble ensemble, Dataset trainData, double[] initTrainScores)
 {
     Ensemble       = ensemble;
     TrainingScores = ConstructScoreTracker("train", trainData, initTrainScores);
     TrackedScores  = new List <ScoreTracker>();
     TrackedScores.Add(TrainingScores);
     DropoutRng = new Random();
     UseFastTrainingScoresUpdate = true;
 }
        // Divides output values of leaves to bag count.
        // This brings back the final scores generated by model on a same
        // range as when we didn't use bagging
        public void ScaleEnsembleLeaves(int numTrees, int bagSize, TreeEnsemble ensemble)
        {
            int bagCount = GetBagCount(numTrees, bagSize);

            for (int t = 0; t < ensemble.NumTrees; t++)
            {
                RegressionTree tree = ensemble.GetTreeAt(t);
                tree.ScaleOutputsBy(1.0 / bagCount);
            }
        }
Пример #3
0
        public static string TreeEnsembleToIni(
            IHost host, TreeEnsemble ensemble, RoleMappedSchema schema, ICalibrator calibrator,
            string trainingParams, bool appendFeatureGain, bool includeZeroGainFeatures)
        {
            host.CheckValue(ensemble, nameof(ensemble));
            host.CheckValue(schema, nameof(schema));

            string ensembleIni = ensemble.ToTreeEnsembleIni(new FeaturesToContentMap(schema),
                                                            trainingParams, appendFeatureGain, includeZeroGainFeatures);

            ensembleIni = AddCalibrationToIni(host, ensembleIni, calibrator);
            return(ensembleIni);
        }
        public bool Compress(IChannel ch, TreeEnsemble ensemble, double[] trainScores, int bestIteration, int maxTreesAfterCompression)
        {
            LoadTargets(trainScores, bestIteration);

            LassoFit fit = GetLassoFit(ch, maxTreesAfterCompression);
            int      numberOfSolutions = fit.NumberOfLambdas;
            int      bestSolutionIdx   = 0;

            ch.Info("Compression R2 values:");
            for (int i = 0; i < numberOfSolutions; i++)
            {
                ch.Info("Solution {0}:\t{1}\t{2}", i + 1, fit.NonZeroWeights[i], fit.Rsquared[i]);
            }
            bestSolutionIdx     = numberOfSolutions - 1;
            _compressedEnsemble = GetEnsembleFromSolution(fit, bestSolutionIdx, ensemble);
            return(true);
        }
        private TreeEnsemble GetEnsembleFromSolution(LassoFit fit, int solutionIdx, TreeEnsemble originalEnsemble)
        {
            TreeEnsemble ensemble = new TreeEnsemble();

            int weightsCount = fit.NumberOfWeights[solutionIdx];

            for (int i = 0; i < weightsCount; i++)
            {
                double weight = fit.CompressedWeights[solutionIdx][i];
                if (weight != 0)
                {
                    RegressionTree tree = originalEnsemble.GetTreeAt(fit.Indices[i]);
                    tree.Weight = weight;
                    ensemble.AddTree(tree);
                }
            }

            ensemble.Bias = fit.Intercepts[solutionIdx];
            return(ensemble);
        }
 public AcceleratedGradientDescent(TreeEnsemble ensemble, Dataset trainData, double[] initTrainScores, IGradientAdjuster gradientWrapper)
     : base(ensemble, trainData, initTrainScores, gradientWrapper)
 {
     UseFastTrainingScoresUpdate = false;
 }
Пример #7
0
 // REVIEW: When the FastTree appliation is decoupled with tree learner and boosting logic, this class should be removed.
 public RandomForestOptimizer(TreeEnsemble ensemble, Dataset trainData, double[] initTrainScores, IGradientAdjuster gradientWrapper)
     : base(ensemble, trainData, initTrainScores, gradientWrapper)
 {
     _gradientWrapper = gradientWrapper;
 }
        public IPredictor CombineModels(IEnumerable <IPredictor> models)
        {
            _host.CheckValue(models, nameof(models));

            var  ensemble         = new TreeEnsemble();
            int  modelCount       = 0;
            int  featureCount     = -1;
            bool binaryClassifier = false;

            foreach (var model in models)
            {
                modelCount++;

                var predictor = model;
                _host.CheckValue(predictor, nameof(models), "One of the models is null");

                var    calibrated = predictor as CalibratedPredictorBase;
                double paramA     = 1;
                if (calibrated != null)
                {
                    _host.Check(calibrated.Calibrator is PlattCalibrator,
                                "Combining FastTree models can only be done when the models are calibrated with Platt calibrator");
                    predictor = calibrated.SubPredictor;
                    paramA    = -(calibrated.Calibrator as PlattCalibrator).Slope;
                }
                var tree = predictor as TreeEnsembleModelParameters;
                if (tree == null)
                {
                    throw _host.Except("Model is not a tree ensemble");
                }
                foreach (var t in tree.TrainedEnsemble.Trees)
                {
                    var bytes    = new byte[t.SizeInBytes()];
                    int position = -1;
                    t.ToByteArray(bytes, ref position);
                    position = -1;
                    var tNew = new RegressionTree(bytes, ref position);
                    if (paramA != 1)
                    {
                        for (int i = 0; i < tNew.NumLeaves; i++)
                        {
                            tNew.SetOutput(i, tNew.LeafValues[i] * paramA);
                        }
                    }
                    ensemble.AddTree(tNew);
                }

                if (modelCount == 1)
                {
                    binaryClassifier = calibrated != null;
                    featureCount     = tree.InputType.ValueCount;
                }
                else
                {
                    _host.Check((calibrated != null) == binaryClassifier, "Ensemble contains both calibrated and uncalibrated models");
                    _host.Check(featureCount == tree.InputType.ValueCount, "Found models with different number of features");
                }
            }

            var scale = 1 / (double)modelCount;

            foreach (var t in ensemble.Trees)
            {
                for (int i = 0; i < t.NumLeaves; i++)
                {
                    t.SetOutput(i, t.LeafValues[i] * scale);
                }
            }

            switch (_kind)
            {
            case PredictionKind.BinaryClassification:
                if (!binaryClassifier)
                {
                    return(new FastTreeBinaryModelParameters(_host, ensemble, featureCount, null));
                }

                var cali = new PlattCalibrator(_host, -1, 0);
                return(new FeatureWeightsCalibratedPredictor(_host, new FastTreeBinaryModelParameters(_host, ensemble, featureCount, null), cali));

            case PredictionKind.Regression:
                return(new FastTreeRegressionModelParameters(_host, ensemble, featureCount, null));

            case PredictionKind.Ranking:
                return(new FastTreeRankingModelParameters(_host, ensemble, featureCount, null));

            default:
                _host.Assert(false);
                throw _host.ExceptNotSupp();
            }
        }
Пример #9
0
 public GradientDescent(TreeEnsemble ensemble, Dataset trainData, double[] initTrainScores, IGradientAdjuster gradientWrapper)
     : base(ensemble, trainData, initTrainScores)
 {
     _gradientWrapper = gradientWrapper;
     _treeScores      = new List <double[]>();
 }
Пример #10
0
 public ConjugateGradientDescent(TreeEnsemble ensemble, Dataset trainData, double[] initTrainScores, IGradientAdjuster gradientWrapper)
     : base(ensemble, trainData, initTrainScores, gradientWrapper)
 {
     _currentDk = new double[trainData.NumDocs];
 }