コード例 #1
0
        public void RegressionEnsembleModel_Predict_single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new double[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.Predict(observations.Row(i));
            }

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.033195970695970689, actual, 0.0000001);
        }
コード例 #2
0
        public void RegressionEnsembleModel_GetVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100.0 },
                { "AptitudeTestScore", 3.46067371526717 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001);
            }
        }
コード例 #3
0
        public void RegressionEnsembleModel_Predict_Multiple()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.033195970695970689, actual, 0.0000001);
        }
コード例 #4
0
        public void RegressionEnsembleModel_GetRawVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 100.0, 3.46067371526717 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
コード例 #5
0
        public void RegressionEnsembleModel_GetRawVariableImportance()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 100.0, 3.46067371526717 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
コード例 #6
0
        public void RegressionEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var sut = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());

            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();
            var indices      = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.13909386278812202, actual, 0.0001);
        }
コード例 #7
0
        public void RegressionEnsembleLearner_Learn_Bagging()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var sut = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy(), 0.7);

            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.0318163057544871, actual, 0.0001);
        }
コード例 #8
0
        public void RegressionEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var sut = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());

            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var indices = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.13909386278812202, actual, 0.0001);
        }
コード例 #9
0
        public void RegressionEnsembleModel_GetVariableImportance()
        {
            var parser             = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations       = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets            = parser.EnumerateRows("Pass").ToF64Vector();
            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100.0 },
                { "AptitudeTestScore", 3.46067371526717 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001);
            }
        }
コード例 #10
0
        public void RegressionEnsembleLearner()
        {
            #region read and split data
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read regression targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // creates training test splitter,
            // Since this is a regression problem, we use the random training/test set splitter.
            // 30 % of the data is used for the test set.
            var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainSet          = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;
            #endregion

            // create the list of learners to include in the ensemble
            var ensembleLearners = new IIndexedLearner <double>[]
            {
                new RegressionAdaBoostLearner(maximumTreeDepth: 15),
                new RegressionRandomForestLearner(runParallel: false),
                new RegressionSquareLossGradientBoostLearner(iterations:  198, learningRate: 0.028, maximumTreeDepth: 12,
                                                             subSampleRatio: 0.559, featuresPrSplit: 10, runParallel: false)
            };

            // create the ensemble learner
            var learner = new RegressionEnsembleLearner(learners: ensembleLearners);

            // the ensemble learnr combines all the provided learners
            // into a single ensemble model.
            var model = learner.Learn(trainSet.Observations, trainSet.Targets);

            // predict the training and test set.
            var trainPredictions = model.Predict(trainSet.Observations);
            var testPredictions  = model.Predict(testSet.Observations);

            // since this is a regression problem we are using square error as metric
            // for evaluating how well the model performs.
            var metric = new MeanSquaredErrorRegressionMetric();

            // measure the error on training and test set.
            var trainError = metric.Error(trainSet.Targets, trainPredictions);
            var testError  = metric.Error(testSet.Targets, testPredictions);

            // The ensemble model achieves a lower test error
            // then any of the individual models:

            // RegressionAdaBoostLearner: 0.4005
            // RegressionRandomForestLearner: 0.4037
            // RegressionSquareLossGradientBoostLearner: 0.3936
            TraceTrainingAndTestError(trainError, testError);
        }