Esempio n. 1
0
        public void RegressionBackwardEliminationModelSelectingEnsembleLearner_CreateMetaFeatures_Then_Learn()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9),
                new RegressionDecisionTreeLearner(11),
                new RegressionDecisionTreeLearner(21),
                new RegressionDecisionTreeLearner(23),
                new RegressionDecisionTreeLearner(1),
                new RegressionDecisionTreeLearner(14),
                new RegressionDecisionTreeLearner(17),
                new RegressionDecisionTreeLearner(19),
                new RegressionDecisionTreeLearner(33)
            };

            var sut = new RegressionBackwardEliminationModelSelectingEnsembleLearner(learners, 5);

            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var metaObservations = sut.LearnMetaFeatures(observations, targets);
            var model            = sut.SelectModels(observations, metaObservations, targets);

            var predictions = model.Predict(observations);

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.010316259438112841, actual, 0.0001);
        }
        public void RegressionStackingEnsembleLearner_CreateMetaFeatures_Then_Learn()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var sut = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9),
                                                            new RandomCrossValidation <double>(5, 23), false);

            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var metaObservations = sut.LearnMetaFeatures(observations, targets);
            var model            = sut.LearnStackingModel(observations, metaObservations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.06951934687172627, actual, 0.0001);
        }
Esempio n. 3
0
        public void GBMDecisionTreeLearner_Learn()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var inSample        = targets.Select(t => true).ToArray();
            var orderedElements = new int[observations.ColumnCount][];
            var rows            = observations.RowCount;

            for (int i = 0; i < observations.ColumnCount; i++)
            {
                var feature = observations.Column(i);
                var indices = Enumerable.Range(0, rows).ToArray();
                feature.SortWith(indices);
                orderedElements[i] = indices;
            }

            var sut  = new GBMDecisionTreeLearner(10);
            var tree = sut.Learn(observations, targets, targets, targets, orderedElements, inSample);

            var predictions = tree.Predict(observations);
            var evaluator   = new MeanSquaredErrorRegressionMetric();
            var actual      = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.0046122425037232661, actual);
        }
        public void RegressionStackingEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var sut = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9),
                                                            new RandomCrossValidation <double>(5, 23), false);

            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var indices = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.133930222950635, actual, 0.0001);
        }
        public void RegressionRandomModelSelectingEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9),
                new RegressionDecisionTreeLearner(11),
                new RegressionDecisionTreeLearner(21),
                new RegressionDecisionTreeLearner(23),
                new RegressionDecisionTreeLearner(1),
                new RegressionDecisionTreeLearner(14),
                new RegressionDecisionTreeLearner(17),
                new RegressionDecisionTreeLearner(19),
                new RegressionDecisionTreeLearner(33)
            };

            var sut = new RegressionRandomModelSelectingEnsembleLearner(learners, 5);

            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var indices = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.Predict(observations);

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.13601421174394385, actual, 0.0001);
        }
        public void RegressionForwardSearchModelSelectingEnsembleLearner_Learn_Without_Replacement()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9),
                new RegressionDecisionTreeLearner(11),
                new RegressionDecisionTreeLearner(21),
                new RegressionDecisionTreeLearner(23),
                new RegressionDecisionTreeLearner(1),
                new RegressionDecisionTreeLearner(14),
                new RegressionDecisionTreeLearner(17),
                new RegressionDecisionTreeLearner(19),
                new RegressionDecisionTreeLearner(33)
            };

            var metric = new MeanSquaredErrorRegressionMetric();

            var sut = new RegressionForwardSearchModelSelectingEnsembleLearner(learners, 5,
                                                                               new RandomCrossValidation <double>(5, 42),
                                                                               new MeanRegressionEnsembleStrategy(), metric, 1, false);

            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.010316259438112841, actual, 0.0001);
        }
        public void RegressionRandomModelSelectingEnsembleLearner_Learn()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9),
                new RegressionDecisionTreeLearner(11),
                new RegressionDecisionTreeLearner(21),
                new RegressionDecisionTreeLearner(23),
                new RegressionDecisionTreeLearner(1),
                new RegressionDecisionTreeLearner(14),
                new RegressionDecisionTreeLearner(17),
                new RegressionDecisionTreeLearner(19),
                new RegressionDecisionTreeLearner(33)
            };

            var sut = new RegressionRandomModelSelectingEnsembleLearner(learners, 5);

            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.017238327841614508, actual, 0.0001);
        }
        public void RegressionRandomModelSelectingEnsembleLearner_Learn_Start_With_3_Models()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9),
                new RegressionDecisionTreeLearner(11),
                new RegressionDecisionTreeLearner(21),
                new RegressionDecisionTreeLearner(23),
                new RegressionDecisionTreeLearner(1),
                new RegressionDecisionTreeLearner(14),
                new RegressionDecisionTreeLearner(17),
                new RegressionDecisionTreeLearner(19),
                new RegressionDecisionTreeLearner(33)
            };

            var metric = new MeanSquaredErrorRegressionMetric();

            var sut = new RegressionRandomModelSelectingEnsembleLearner(learners, 5,
                                                                        new RandomCrossValidation <double>(5, 42), new MeanRegressionEnsembleStrategy(), metric, 3, false);

            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.0090143589987671056, actual, 0.0001);
        }
Esempio n. 9
0
        public void GBMTree_AddRawFeatureImportances()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var inSample        = targets.Select(t => true).ToArray();
            var orderedElements = new int[observations.ColumnCount][];
            var rows            = observations.RowCount;

            for (int i = 0; i < observations.ColumnCount; i++)
            {
                var feature = observations.Column(i);
                var indices = Enumerable.Range(0, rows).ToArray();
                feature.SortWith(indices);
                orderedElements[i] = indices;
            }

            var sut  = new GBMDecisionTreeLearner(10);
            var tree = sut.Learn(observations, targets, targets, targets, orderedElements, inSample);

            var actual = new double[observations.ColumnCount];

            tree.AddRawVariableImportances(actual);

            var expected = new double[] { 0.0, 105017.48701572006 };

            Assert.AreEqual(expected.Length, actual.Length);
            Assert.AreEqual(expected[0], actual[0], 0.01);
            Assert.AreEqual(expected[1], actual[1], 0.01);
        }
Esempio n. 10
0
        public void TimeSeriesCrossValidation_Validate_InitialTrainingSize_Is_Larger_Than_Obsevations_Length()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut = new TimeSeriesCrossValidation <double>(initialTrainingSize: 300);

            var timeSeriesPredictions = sut.Validate(new RegressionDecisionTreeLearner(), observations, targets);
        }
Esempio n. 11
0
        public void TimeSeriesCrossValidation_Validate_Observations_And_Targets_Length_Does_Not_Match()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();
            targets = targets.Take(100).ToArray();

            var sut = new TimeSeriesCrossValidation <double>(initialTrainingSize: 5);

            var timeSeriesPredictions = sut.Validate(new RegressionDecisionTreeLearner(), observations, targets);
        }
Esempio n. 12
0
        double CrossValidate(int folds)
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut         = new CrossValidation <double>(new RandomIndexSampler <double>(42), folds);
            var predictions = sut.CrossValidate(new RegressionDecisionTreeLearner(), observations, targets);
            var metric      = new MeanSquaredErrorRegressionMetric();

            return(metric.Error(targets, predictions));
        }
        double CrossValidate(int folds)
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut         = new NoShuffleCrossValidation <double>(folds);
            var learner     = new RegressionDecisionTreeLearner();
            var predictions = sut.CrossValidate(learner, observations, targets);
            var metric      = new MeanSquaredErrorRegressionMetric();

            return(metric.Error(targets, predictions));
        }
Esempio n. 14
0
        public void RegressionDecisionTreeModel_Save()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var learner = new RegressionDecisionTreeLearner(100, 4, 2, 0.1, 42);
            var sut     = learner.Learn(observations, targets);

            var writer = new StringWriter();

            sut.Save(() => writer);

            Assert.AreEqual(m_regressionDecisionTreeModelString, writer.ToString());
        }
Esempio n. 15
0
        public void RegressionGradientBoostLearner_LearnWithEarlyStopping_ToFewIterations()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var splitter = new RandomTrainingTestIndexSplitter <double>(0.6, 1234);
            var split    = splitter.SplitSet(observations, targets);

            var sut       = new RegressionSquareLossGradientBoostLearner(5, 0.1, 3, 1, 1e-6, 1.0, 0, false);
            var evaluator = new MeanSquaredErrorRegressionMetric();

            var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets,
                                                   split.TestSet.Observations, split.TestSet.Targets, evaluator, 5);
        }
Esempio n. 16
0
        public void RegressionDecisionTreeModel_Load()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var reader = new StringReader(m_regressionDecisionTreeModelString);
            var sut    = RegressionDecisionTreeModel.Load(() => reader);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.032120286249559482, error, 0.0000001);
        }
Esempio n. 17
0
        public void RegressionDecisionTreeModel_Predict_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var learner = new RegressionDecisionTreeLearner(100, 4, 2, 0.1, 42);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.032120286249559482, error, 0.0000001);
        }
        public void RegressionQuantileLossGradientBoostLearner_Learn()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut   = new RegressionQuantileLossGradientBoostLearner(50, 0.1, 3, 1, 1e-6, 1.0, 0, 0.9, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.18540395091912656, actual);
        }
        public void RegressionAbsoluteLossGradientBoostLearner_Stochastic_Learn()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut   = new RegressionAbsoluteLossGradientBoostLearner(50, 0.1, 3, 1, 1e-6, .5, 0, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.033412842952357739, actual, 0.0001);
        }
        public void RegressionAbsoluteLossGradientBoostLearner_FeaturesPrSplit_Learn()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut   = new RegressionAbsoluteLossGradientBoostLearner(50, 0.1, 3, 1, 1e-6, 1.0, 1, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.0861480348494789, actual);
        }
Esempio n. 21
0
        public void TimeSeriesCrossValidation_Validate_MaxTrainingSetSize_And_RetrainInterval()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut = new TimeSeriesCrossValidation <double>(initialTrainingSize: 5, maxTrainingSetSize: 30, retrainInterval: 5);

            var timeSeriesPredictions = sut.Validate(new RegressionDecisionTreeLearner(), observations, targets);
            var timeSeriesTargets     = sut.GetValidationTargets(targets);

            var metric = new MeanSquaredErrorRegressionMetric();
            var error  = metric.Error(timeSeriesTargets, timeSeriesPredictions);

            Assert.AreEqual(0.13010151998135897, error, 0.00001);
        }
Esempio n. 22
0
        public void TimeSeriesCrossValidation_Validate_MaxTrainingSetSize()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut = new TimeSeriesCrossValidation <double>(initialTrainingSize: 5, maxTrainingSetSize: 10);

            var timeSeriesPredictions = sut.Validate(new RegressionDecisionTreeLearner(), observations, targets);
            var timeSeriesTargets     = sut.GetValidationTargets(targets);

            var metric = new MeanSquaredErrorRegressionMetric();
            var error  = metric.Error(timeSeriesTargets, timeSeriesPredictions);

            Assert.AreEqual(0.27296549371962692, error, 0.00001);
        }
Esempio n. 23
0
        private double RegressionDecisionTreeLearner_Learn_Weighted(int treeDepth, double weight)
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut     = new RegressionDecisionTreeLearner(treeDepth, 4, 2, 0.1, 42);
            var weights = targets.Select(v => Weight(v, weight)).ToArray();
            var model   = sut.Learn(observations, targets, weights);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
        public void TimeSeriesCrossValidation_Validate_RetrainInterval()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut = new TimeSeriesCrossValidation <double>(initialTrainingSize: 5, retrainInterval: 5);

            var learner = new RegressionDecisionTreeLearner();
            var timeSeriesPredictions = sut.Validate(learner, observations, targets);
            var timeSeriesTargets     = sut.GetValidationTargets(targets);

            var metric = new MeanSquaredErrorRegressionMetric();
            var error  = metric.Error(timeSeriesTargets, timeSeriesPredictions);

            Assert.AreEqual(0.096346937132994928, error, 0.00001);
        }
Esempio n. 25
0
        double CrossValidate_Provide_Indices(int folds)
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut = new RandomCrossValidation <double>(folds, 42);

            var rowsToCrossvalidate = targets.Length / 2;
            var indices             = Enumerable.Range(0, rowsToCrossvalidate).ToArray();
            var predictions         = new double[rowsToCrossvalidate];

            sut.CrossValidate(new RegressionDecisionTreeLearner(), observations, targets, indices, predictions);
            var metric = new MeanSquaredErrorRegressionMetric();

            return(metric.Error(targets.Take(rowsToCrossvalidate).ToArray(), predictions));
        }
Esempio n. 26
0
        private static double RegressionDecisionTreeLearner_Learn(int treeDepth)
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut = new RegressionDecisionTreeLearner(treeDepth, 4, 2, 0.1, 42);

            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
Esempio n. 27
0
        public void RegressionDecisionTreeModel_Predict_Multiple_Indexed()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var learner = new RegressionDecisionTreeLearner(100, 4, 2, 0.1, 42);
            var sut     = learner.Learn(observations, targets);

            var indices     = new int[] { 0, 3, 4, 5, 6, 7, 8, 9, 20, 21 };
            var predictions = sut.Predict(observations, indices);

            var indexedTargets = targets.GetIndices(indices);
            var evaluator      = new MeanSquaredErrorRegressionMetric();
            var error          = evaluator.Error(indexedTargets, predictions);

            Assert.AreEqual(0.023821615502626264, error, 0.0000001);
        }
Esempio n. 28
0
        public void NoShuffleLearningCurvesCalculator_Calculate()
        {
            var sut = new NoShuffleLearningCurvesCalculator <double>(new MeanSquaredErrorRegressionMetric(),
                                                                     new double[] { 0.2, 0.8 }, 0.8);

            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var actual = sut.Calculate(new RegressionDecisionTreeLearner(),
                                       observations, targets);

            var expected = new List <LearningCurvePoint>()
            {
                new LearningCurvePoint(32, 0, 0.12874833873980004),
                new LearningCurvePoint(128, 0.0, 0.067720786718774989)
            };

            CollectionAssert.AreEqual(expected, actual);
        }
Esempio n. 29
0
        public void RegressionDecisionTreeModel_GetVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var featureNameToIndex = new Dictionary <string, int> {
                { "F1", 0 }, { "F2", 1 }
            };

            var learner = new RegressionDecisionTreeLearner(100, 4, 2, 0.1, 42);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "F2", 100.0 }, { "F1", 0.0 }
            };

            CollectionAssert.AreEqual(expected, actual);
        }
Esempio n. 30
0
        public void RandomLearningCurvesCalculator_Calculate()
        {
            var sut = new RandomShuffleLearningCurvesCalculator <double>(new MeanSquaredErrorRegressionMetric(),
                                                                         new double[] { 0.2, 0.8 }, 0.8, 42, 5);

            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var actual = sut.Calculate(new RegressionDecisionTreeLearner(),
                                       observations, targets);

            var expected = new List <LearningCurvePoint>()
            {
                new LearningCurvePoint(32, 0, 0.141565953928265),
                new LearningCurvePoint(128, 0.0, 0.068970597423950036)
            };

            CollectionAssert.AreEqual(expected, actual);
        }