Ejemplo n.º 1
0
        public void RegressionGradientBoostLearner_LearnWithEarlyStopping_ToFewIterations()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var splitter = new RandomTrainingTestIndexSplitter <double>(0.6, 1234);
            var split    = splitter.SplitSet(observations, targets);

            var sut       = new RegressionSquareLossGradientBoostLearner(5, 0.1, 3, 1, 1e-6, 1.0, 0, false);
            var evaluator = new MeanSquaredErrorRegressionMetric();

            var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets,
                                                   split.TestSet.Observations, split.TestSet.Targets, evaluator, 5);
        }
Ejemplo n.º 2
0
        public void RegressionSquareLossGradientBoostLearner_Stochastic_Learn()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut   = new RegressionSquareLossGradientBoostLearner(50, 0.1, 3, 1, 1e-6, .5, 0, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.025391913155163696, actual, 0.0001);
        }
Ejemplo n.º 3
0
        public void RegressionSquareLossGradientBoostLearner_FeaturesPrSplit_Learn()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut   = new RegressionSquareLossGradientBoostLearner(50, 0.1, 3, 1, 1e-6, 1.0, 1, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.074376126071145687, actual);
        }
        public void RegressionGradientBoostLearner_LearnWithEarlyStopping_ToFewIterations()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var splitter = new RandomTrainingTestIndexSplitter <double>(0.6, 1234);
            var split    = splitter.SplitSet(observations, targets);

            var sut       = new RegressionSquareLossGradientBoostLearner(5, 0.1, 3, 1, 1e-6, 1.0, 0, false);
            var evaluator = new MeanSquaredErrorRegressionMetric();

            var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets,
                                                   split.TestSet.Observations, split.TestSet.Targets, evaluator, 5);
        }
        public void RegressionSquareLossGradientBoostLearner_FeaturesPrSplit_Learn()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var sut   = new RegressionSquareLossGradientBoostLearner(50, 0.1, 3, 1, 1e-6, 1.0, 1, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.074376126071145687, actual);
        }
        public void RegressionSquareLossGradientBoostLearner_Stochastic_Learn()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var sut   = new RegressionSquareLossGradientBoostLearner(50, 0.1, 3, 1, 1e-6, .5, 0, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.025391913155163696, actual, 0.0001);
        }
        public void GradientBoost_Default_Parameters()
        {
            #region read and split data
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read regression targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // creates training test splitter,
            // Since this is a regression problem, we use the random training/test set splitter.
            // 30 % of the data is used for the test set.
            var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainSet          = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;
            #endregion

            // create learner with default parameters
            var learner = new RegressionSquareLossGradientBoostLearner(runParallel: false);

            // learn model with found parameters
            var model = learner.Learn(trainSet.Observations, trainSet.Targets);

            // predict the training and test set.
            var trainPredictions = model.Predict(trainSet.Observations);
            var testPredictions  = model.Predict(testSet.Observations);

            // since this is a regression problem we are using square error as metric
            // for evaluating how well the model performs.
            var metric = new MeanSquaredErrorRegressionMetric();

            // measure the error on training and test set.
            var trainError = metric.Error(trainSet.Targets, trainPredictions);
            var testError  = metric.Error(testSet.Targets, testPredictions);

            TraceTrainingAndTestError(trainError, testError);
        }
Ejemplo n.º 8
0
        public void RegressionGradientBoostLearner_LearnWithEarlyStopping()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var splitter = new RandomTrainingTestIndexSplitter <double>(0.6, 1234);
            var split    = splitter.SplitSet(observations, targets);

            var sut       = new RegressionSquareLossGradientBoostLearner(1000, 0.1, 3, 1, 1e-6, 1.0, 0, false);
            var evaluator = new MeanSquaredErrorRegressionMetric();

            var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets,
                                                   split.TestSet.Observations, split.TestSet.Targets, evaluator, 5);

            var predictions = model.Predict(split.TestSet.Observations);
            var actual      = evaluator.Error(split.TestSet.Targets, predictions);

            Assert.AreEqual(0.061035472792879512, actual, 0.000001);
            Assert.AreEqual(40, model.Trees.Length);
        }
        public void RegressionGradientBoostLearner_LearnWithEarlyStopping()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var splitter = new RandomTrainingTestIndexSplitter <double>(0.6, 1234);
            var split    = splitter.SplitSet(observations, targets);

            var sut       = new RegressionSquareLossGradientBoostLearner(1000, 0.1, 3, 1, 1e-6, 1.0, 0, false);
            var evaluator = new MeanSquaredErrorRegressionMetric();

            var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets,
                                                   split.TestSet.Observations, split.TestSet.Targets, evaluator, 5);

            var predictions = model.Predict(split.TestSet.Observations);
            var actual      = evaluator.Error(split.TestSet.Targets, predictions);

            Assert.AreEqual(0.061035472792879512, actual, 0.000001);
            Assert.AreEqual(40, model.Trees.Length);
        }
        public void RegressionGradientBoostLearner_LearnWithEarlyStopping_when_more_featuresPerSlit_than_featureCount_Throw()
        {
            var sut = new RegressionSquareLossGradientBoostLearner(500, 0.1, 10, 15, 0.01, 0.8,
                                                                   featuresPrSplit: 4);

            IRegressionMetric metric = new MeanSquaredErrorRegressionMetric();

            var trainingRows = 5;
            var testRows     = 6;
            var cols         = 3;

            var split = new TrainingTestSetSplit(
                new F64Matrix(trainingRows, cols), new double[trainingRows],
                new F64Matrix(testRows, cols), new double[testRows]);

            var model = sut.LearnWithEarlyStopping(
                split.TrainingSet.Observations, split.TrainingSet.Targets,
                split.TestSet.Observations, split.TestSet.Targets,
                metric,
                earlyStoppingRounds: 20);
        }
Ejemplo n.º 11
0
        public void RegressionSquareLossGradientBoostLearner_Learn_Indexed()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var sut = new RegressionSquareLossGradientBoostLearner(50, 0.1, 3, 1, 1e-6, 1.0, 0, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model              = sut.Learn(observations, targets, indices);
            var predictions        = model.Predict(observations);
            var indexedPredictions = predictions.GetIndices(indices);
            var indexedTargets     = targets.GetIndices(indices);

            var evaluator = new MeanAbsolutErrorRegressionMetric();
            var actual    = evaluator.Error(indexedTargets, indexedPredictions);

            Assert.AreEqual(0.23625469946001074, actual, 0.0001);
        }
        public void RegressionSquareLossGradientBoostLearner_Learn_Indexed()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var sut = new RegressionSquareLossGradientBoostLearner(50, 0.1, 3, 1, 1e-6, 1.0, 0, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model              = sut.Learn(observations, targets, indices);
            var predictions        = model.Predict(observations);
            var indexedPredictions = predictions.GetIndices(indices);
            var indexedTargets     = targets.GetIndices(indices);

            var evaluator = new MeanAbsolutErrorRegressionMetric();
            var actual    = evaluator.Error(indexedTargets, indexedPredictions);

            Assert.AreEqual(0.23625469946001074, actual, 0.0001);
        }
        public void GradientBoost_Optimize_Hyperparameters()
        {
            #region read and split data
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read regression targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // creates training test splitter,
            // Since this is a regression problem, we use the random training/test set splitter.
            // 30 % of the data is used for the test set.
            var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainSet          = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;
            #endregion

            // since this is a regression problem we are using square error as metric
            // for evaluating how well the model performs.
            var metric = new MeanSquaredErrorRegressionMetric();

            // Usually better results can be achieved by tuning a gradient boost learner

            var numberOfFeatures = trainSet.Observations.ColumnCount;

            // Parameter specs for the optimizer
            // best parameter to tune on random forest is featuresPrSplit.
            var parameters = new IParameterSpec[]
            {
                new MinMaxParameterSpec(min: 80, max: 300,
                                        transform: Transform.Linear, parameterType: ParameterType.Discrete), // iterations

                new MinMaxParameterSpec(min: 0.02, max:  0.2,
                                        transform: Transform.Logarithmic, parameterType: ParameterType.Continuous), // learning rate

                new MinMaxParameterSpec(min: 8, max: 15,
                                        transform: Transform.Linear, parameterType: ParameterType.Discrete), // maximumTreeDepth

                new MinMaxParameterSpec(min: 0.5, max: 0.9,
                                        transform: Transform.Linear, parameterType: ParameterType.Continuous), // subSampleRatio

                new MinMaxParameterSpec(min: 1, max: numberOfFeatures,
                                        transform: Transform.Linear, parameterType: ParameterType.Discrete), // featuresPrSplit
            };

            // Further split the training data to have a validation set to measure
            // how well the model generalizes to unseen data during the optimization.
            var validationSplit = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24)
                                  .SplitSet(trainSet.Observations, trainSet.Targets);


            // Define optimizer objective (function to minimize)
            Func <double[], OptimizerResult> minimize = p =>
            {
                // create the candidate learner using the current optimization parameters.
                var candidateLearner = new RegressionSquareLossGradientBoostLearner(
                    iterations: (int)p[0],
                    learningRate: p[1],
                    maximumTreeDepth: (int)p[2],
                    subSampleRatio: p[3],
                    featuresPrSplit: (int)p[4],
                    runParallel: false);

                var candidateModel = candidateLearner.Learn(validationSplit.TrainingSet.Observations,
                                                            validationSplit.TrainingSet.Targets);

                var validationPredictions = candidateModel.Predict(validationSplit.TestSet.Observations);
                var candidateError        = metric.Error(validationSplit.TestSet.Targets, validationPredictions);

                // trace current error
                Trace.WriteLine(string.Format("Candidate Error: {0:0.0000}, Candidate Parameters: {1}",
                                              candidateError, string.Join(", ", p)));

                return(new OptimizerResult(p, candidateError));
            };

            // create random search optimizer
            var optimizer = new RandomSearchOptimizer(parameters, iterations: 30, runParallel: true);

            // find best hyperparameters
            var result = optimizer.OptimizeBest(minimize);
            var best   = result.ParameterSet;

            // create the final learner using the best hyperparameters.
            var learner = new RegressionSquareLossGradientBoostLearner(
                iterations: (int)best[0],
                learningRate: best[1],
                maximumTreeDepth: (int)best[2],
                subSampleRatio: best[3],
                featuresPrSplit: (int)best[4],
                runParallel: false);

            // learn model with found parameters
            var model = learner.Learn(trainSet.Observations, trainSet.Targets);

            // predict the training and test set.
            var trainPredictions = model.Predict(trainSet.Observations);
            var testPredictions  = model.Predict(testSet.Observations);

            // measure the error on training and test set.
            var trainError = metric.Error(trainSet.Targets, trainPredictions);
            var testError  = metric.Error(testSet.Targets, testPredictions);

            // Optimizer found hyperparameters.
            Trace.WriteLine(string.Format("Found parameters, iterations:  {0}, learning rate {1:0.000}:  maximumTreeDepth: {2}, subSampleRatio {3:0.000}, featuresPrSplit: {4} ",
                                          (int)best[0], best[1], (int)best[2], best[3], (int)best[4]));
            TraceTrainingAndTestError(trainError, testError);
        }
Ejemplo n.º 14
0
        public static double FitGBT(double[] pred_Features)
        {
            var parser     = new CsvParser(() => new StreamReader("dataset.csv"), separator: ',');
            var targetName = "Y";

            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // read regression targets


            var metric = new MeanSquaredErrorRegressionMetric();

            var parameters = new double[][]
            {
                new double[] { 80, 300 },                     // iterations (min: 20, max: 100)
                new double[] { 0.02, 0.2 },                   // learning rate (min: 0.02, max: 0.2)
                new double[] { 8, 15 },                       // maximumTreeDepth (min: 8, max: 15)
                new double[] { 0.5, 0.9 },                    // subSampleRatio (min: 0.5, max: 0.9)
                new double[] { 1, observations.ColumnCount }, // featuresPrSplit (min: 1, max: numberOfFeatures)
            };


            var validationSplit = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24)
                                  .SplitSet(observations, targets);

            Func <double[], OptimizerResult> minimize = p =>
            {
                // create the candidate learner using the current optimization parameters

                var candidateLearner = new RegressionSquareLossGradientBoostLearner(
                    iterations: (int)p[0],
                    learningRate: p[1],
                    maximumTreeDepth: (int)p[2],
                    subSampleRatio: p[3],
                    featuresPrSplit: (int)p[4],
                    runParallel: false);

                var candidateModel = candidateLearner.Learn(validationSplit.TrainingSet.Observations,
                                                            validationSplit.TrainingSet.Targets);

                var validationPredictions = candidateModel.Predict(validationSplit.TestSet.Observations);
                var candidateError        = metric.Error(validationSplit.TestSet.Targets, validationPredictions);

                return(new OptimizerResult(p, candidateError));
            };

            // Hyper-parameter tuning
            var optimizer = new RandomSearchOptimizer(parameters, iterations: 30, runParallel: true);

            var result = optimizer.OptimizeBest(minimize);
            var best   = result.ParameterSet;

            var learner = new RegressionSquareLossGradientBoostLearner(
                iterations: (int)best[0],
                learningRate: best[1],
                maximumTreeDepth: (int)best[2],
                subSampleRatio: best[3],
                featuresPrSplit: (int)best[4],
                runParallel: false);

            var model      = learner.Learn(observations, targets);
            var prediction = model.Predict(pred_Features);

            return(prediction);
        }