コード例 #1
0
        public void RegressionEnsembleModel_GetRawVariableImportance()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 100.0, 3.46067371526717 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
        public void ClassificationStackingEnsembleModel_PredictProbability_single()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9),
                                                                    new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var predictions = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.PredictProbability(observations.Row(i));
            }

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.6696598716465223, actual, 0.0000001);
        }
コード例 #3
0
        public void ClassificationAdaBoostModel_GetVariableImportance()
        {
            var parser             = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations       = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets            = parser.EnumerateRows("Pass").ToF64Vector();
            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learner = new ClassificationAdaBoostLearner(10, 1, 3);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100.0 },
                { "AptitudeTestScore", 24.0268096428771 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001);
            }
        }
コード例 #4
0
        public void RegressionRandomForestLearnerTest_Learn_Glass_100_Indices()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();
            var rows         = targets.Length;

            var sut = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var decisionTreeModels = sut.Learn(observations, targets, indices, out var rawVariableImportance).ToArray();
            var model = new RegressionForestModel(decisionTreeModels, rawVariableImportance);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.49709813080602938, error, m_delta);
        }
        public void ClassificationStackingEnsembleModel_GetRawVariableImportance()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9),
                                                                    new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 0.12545787545787546, 0, 0.16300453932032882, 0.0345479082321188, 0.15036245805476572, 0, 0, 0 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
コード例 #6
0
        public void RegressionBackwardEliminationModelSelectingEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9),
                new RegressionDecisionTreeLearner(11),
                new RegressionDecisionTreeLearner(21),
                new RegressionDecisionTreeLearner(23),
                new RegressionDecisionTreeLearner(1),
                new RegressionDecisionTreeLearner(14),
                new RegressionDecisionTreeLearner(17),
                new RegressionDecisionTreeLearner(19),
                new RegressionDecisionTreeLearner(33)
            };

            var sut = new RegressionBackwardEliminationModelSelectingEnsembleLearner(learners, 5);

            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();
            var indices      = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.Predict(observations);

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.13601421174394385, actual, 0.0001);
        }
        public void ClassificationForwardSearchModelSelectingEnsembleLearner_Learn()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var sut = new ClassificationForwardSearchModelSelectingEnsembleLearner(learners, 5);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.51787562976713208, actual, 0.0001);
        }
コード例 #8
0
        public void RegressionStackingEnsembleLearner_Learn_Keep_Original_Features()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var sut = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9),
                                                            new RandomCrossValidation <double>(5, 23), true);

            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.066184865331534531, actual, 0.0001);
        }
コード例 #9
0
        public void RegressionForwardSearchModelSelectingEnsembleLearner_Learn()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9),
                new RegressionDecisionTreeLearner(11),
                new RegressionDecisionTreeLearner(21),
                new RegressionDecisionTreeLearner(23),
                new RegressionDecisionTreeLearner(1),
                new RegressionDecisionTreeLearner(14),
                new RegressionDecisionTreeLearner(17),
                new RegressionDecisionTreeLearner(19),
                new RegressionDecisionTreeLearner(33)
            };

            var sut = new RegressionForwardSearchModelSelectingEnsembleLearner(learners, 5);

            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.016150842834795006, actual, 0.0001);
        }
コード例 #10
0
        public void RegressionStackingEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var sut = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9),
                                                            new RandomCrossValidation <double>(5, 23), false);

            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();
            var indices      = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.133930222950635, actual, 0.0001);
        }
コード例 #11
0
        public void RegressionStackingEnsembleLearner_CreateMetaFeatures_Then_Learn()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var sut = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9),
                                                            new RandomCrossValidation <double>(5, 23), false);

            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var metaObservations = sut.LearnMetaFeatures(observations, targets);
            var model            = sut.LearnStackingModel(observations, metaObservations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.06951934687172627, actual, 0.0001);
        }
コード例 #12
0
        public void RegressionForestModel_GetVariableImportance()
        {
            var parser             = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations       = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets            = parser.EnumerateRows("Pass").ToF64Vector();
            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100 },
                { "AptitudeTestScore", 42.3879919692465 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, m_delta);
            }
        }
コード例 #13
0
        public void GBMDecisionTreeLearner_Learn()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var inSample        = targets.Select(t => true).ToArray();
            var orderedElements = new int[observations.ColumnCount][];
            var rows            = observations.RowCount;

            for (int i = 0; i < observations.ColumnCount; i++)
            {
                var feature = observations.Column(i);
                var indices = Enumerable.Range(0, rows).ToArray();
                feature.SortWith(indices);
                orderedElements[i] = indices;
            }

            var sut  = new GBMDecisionTreeLearner(10);
            var tree = sut.Learn(observations, targets, targets, targets, orderedElements, inSample);

            var predictions = tree.Predict(observations);
            var evaluator   = new MeanSquaredErrorRegressionMetric();
            var actual      = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.0046122425037232661, actual);
        }
コード例 #14
0
        public void RegressionEnsembleModel_Predict_Multiple()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.033195970695970689, actual, 0.0000001);
        }
コード例 #15
0
        public void RegressionForestModel_PredictCertainty_Single()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual = new CertaintyPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                actual[i] = sut.PredictCertainty(observations.Row(i));
            }

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, actual.Select(p => p.Prediction).ToArray());

            Assert.AreEqual(0.15381141277554411, error, 0.0000001);

            var expected = new CertaintyPrediction[] { new CertaintyPrediction(0.379151515151515, 0.0608255007215007), new CertaintyPrediction(0.411071351850763, 0.0831655436577049), new CertaintyPrediction(0.243420918950331, 0.0452827034233046), new CertaintyPrediction(0.302332251082251, 0.0699917594408057), new CertaintyPrediction(0.411071351850763, 0.0831655436577049), new CertaintyPrediction(0.175743762773174, 0.0354069437824887), new CertaintyPrediction(0.574083361083361, 0.0765858693929188), new CertaintyPrediction(0.259063776093188, 0.0491198812971218), new CertaintyPrediction(0.163878898878899, 0.0331543420321184), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.418472943722944, 0.0607014359023913), new CertaintyPrediction(0.243420918950331, 0.0452827034233046), new CertaintyPrediction(0.443779942279942, 0.0941961872991865), new CertaintyPrediction(0.156999361749362, 0.0435804333960299), new CertaintyPrediction(0.591222034501446, 0.0873624628347336), new CertaintyPrediction(0.123822406351818, 0.0283119805431255), new CertaintyPrediction(0.162873993653405, 0.0333697457759022), new CertaintyPrediction(0.596261932511932, 0.0695341060210394), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.418472943722944, 0.0607014359023913), new CertaintyPrediction(0.329000027750028, 0.0788869852405852), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.499770375049787, 0.0913884936411888), new CertaintyPrediction(0.140025508804921, 0.0309875116490099), new CertaintyPrediction(0.161207326986739, 0.0336321035325246), new CertaintyPrediction(0.389553418803419, 0.0744433596104835), };

            CollectionAssert.AreEqual(expected, actual);
        }
コード例 #16
0
        public void RegressionForwardSearchModelSelectingEnsembleLearner_Learn_Start_With_3_Models()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9),
                new RegressionDecisionTreeLearner(11),
                new RegressionDecisionTreeLearner(21),
                new RegressionDecisionTreeLearner(23),
                new RegressionDecisionTreeLearner(1),
                new RegressionDecisionTreeLearner(14),
                new RegressionDecisionTreeLearner(17),
                new RegressionDecisionTreeLearner(19),
                new RegressionDecisionTreeLearner(33)
            };

            var metric = new MeanSquaredErrorRegressionMetric();

            var sut = new RegressionForwardSearchModelSelectingEnsembleLearner(learners, 5,
                                                                               new RandomCrossValidation <double>(5, 42), new MeanRegressionEnsembleStrategy(), metric, 3, false);

            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.010316259438112848, actual, 0.0001);
        }
コード例 #17
0
        public void RegressionBackwardEliminationModelSelectingEnsembleLearner_CreateMetaFeatures_Then_Learn()
        {
            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9),
                new RegressionDecisionTreeLearner(11),
                new RegressionDecisionTreeLearner(21),
                new RegressionDecisionTreeLearner(23),
                new RegressionDecisionTreeLearner(1),
                new RegressionDecisionTreeLearner(14),
                new RegressionDecisionTreeLearner(17),
                new RegressionDecisionTreeLearner(19),
                new RegressionDecisionTreeLearner(33)
            };

            var sut = new RegressionBackwardEliminationModelSelectingEnsembleLearner(learners, 5);

            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var metaObservations = sut.LearnMetaFeatures(observations, targets);
            var model            = sut.SelectModels(observations, metaObservations, targets);

            var predictions = model.Predict(observations);

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.010316259438112841, actual, 0.0001);
        }
        public void Hyper_Parameter_Tuning()
        {
            #region Read data

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            #endregion

            // metric to minimize
            var metric = new MeanSquaredErrorRegressionMetric();

            // Parameter ranges for the optimizer
            var paramers = new ParameterBounds[]
            {
                new ParameterBounds(min: 1, max: 100, transform: Transform.Linear), // maximumTreeDepth
                new ParameterBounds(min: 1, max: 16, transform: Transform.Linear),  // minimumSplitSize
            };

            // create random search optimizer
            var optimizer = new RandomSearchOptimizer(paramers, iterations: 30, runParallel: true);

            // other availible optimizers
            // GridSearchOptimizer
            // GlobalizedBoundedNelderMeadOptimizer
            // ParticleSwarmOptimizer
            // BayesianOptimizer

            // function to minimize
            Func <double[], OptimizerResult> minimize = p =>
            {
                var cv          = new RandomCrossValidation <double>(crossValidationFolds: 5, seed: 42);
                var optlearner  = new RegressionDecisionTreeLearner(maximumTreeDepth: (int)p[0], minimumSplitSize: (int)p[1]);
                var predictions = cv.CrossValidate(optlearner, observations, targets);
                var error       = metric.Error(targets, predictions);
                Trace.WriteLine("Error: " + error);
                return(new OptimizerResult(p, error));
            };

            // run optimizer
            var result         = optimizer.OptimizeBest(minimize);
            var bestParameters = result.ParameterSet;

            Trace.WriteLine("Result: " + result.Error);

            // create learner with found parameters
            var learner = new RegressionDecisionTreeLearner(maximumTreeDepth: (int)bestParameters[0], minimumSplitSize: (int)bestParameters[1]);

            // learn model with found parameters
            var model = learner.Learn(observations, targets);
        }
コード例 #19
0
        public void RegressionDecisionTreeLearner_Learn_Reuse_No_Valid_Split()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();
            var rows         = targets.Length;

            var sut = new RegressionDecisionTreeLearner();

            // train initial model.
            sut.Learn(observations, targets);

            // reuse learner, with smaller data that provides no valid split.
            var onlyUniqueTargetValue     = 1.0;
            var onlyOneUniqueObservations = (F64Matrix)observations.Rows(0, 1, 2, 3, 4);
            var onlyOneUniquetargets      = Enumerable.Range(0, onlyOneUniqueObservations.RowCount).Select(v => onlyUniqueTargetValue).ToArray();
            var model = sut.Learn(onlyOneUniqueObservations, onlyOneUniquetargets);

            var predictions = model.Predict(onlyOneUniqueObservations);

            // no valid split, so should result in the model always returning the onlyUniqueTargetValue.
            for (int i = 0; i < predictions.Length; i++)
            {
                Assert.AreEqual(onlyUniqueTargetValue, predictions[i], 0.0001);
            }
        }
コード例 #20
0
        public void ClassificationStackingEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var sut = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9),
                                                                new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();
            var indices      = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.Predict(observations);

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.67289719626168221, actual, 0.0001);
        }
        public void ClassificationForwardSearchModelSelectingEnsembleLearner_Learn_Start_With_3_Models()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var metric           = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy();

            var sut = new ClassificationForwardSearchModelSelectingEnsembleLearner(learners, 5,
                                                                                   new StratifiedCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, metric, 3, true);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.54434276244488244, actual, 0.0001);
        }
コード例 #22
0
        public void ClassificationStackingEnsembleLearner_CreateMetaFeatures_Then_Learn()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var sut = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9),
                                                                new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var metaObservations = sut.LearnMetaFeatures(observations, targets);
            var model            = sut.LearnStackingModel(observations, metaObservations, targets);

            var predictions = model.Predict(observations);

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.63551401869158874, actual, 0.0001);
        }
        public void ClassificationModel_Predict()
        {
            #region learner creation

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // create learner
            var learner = new ClassificationDecisionTreeLearner();
            #endregion

            // learns a ClassificationDecisionTreeModel
            var model = learner.Learn(observations, targets);

            // predict all observations
            var predictions = model.Predict(observations);

            // predict single observation
            var prediction = model.Predict(observations.Row(0));
        }
コード例 #24
0
        public void ClassificationStackingEnsembleLearner_Learn_Include_Original_Features()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var sut = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9),
                                                                new RandomCrossValidation <ProbabilityPrediction>(5, 23), true);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.26168224299065418, actual, 0.0001);
        }
        public void ClassificationStackingEnsembleModel_Predict_Multiple()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9),
                                                                    new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.34615384615384615, actual, 0.0000001);
        }
コード例 #26
0
        public void RegressionStackingEnsembleModel_GetRawVariableImportance()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9),
                                                                new RandomCrossValidation <double>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 0.255311355311355, 0.525592463092463, 0.753846153846154, 0.0128205128205128 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
        public void ClassificationBackwardEliminationModelSelectingEnsembleLearner_CreateMetaFeatures_Then_Learn()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var sut = new ClassificationBackwardEliminationModelSelectingEnsembleLearner(learners, 5);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var metaObservations = sut.LearnMetaFeatures(observations, targets);
            var model            = sut.SelectModels(observations, metaObservations, targets);

            var predictions = model.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.52351727716455632, actual, 0.0001);
        }
コード例 #28
0
        public void RegressionStackingEnsembleModel_Predict_single()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9),
                                                                new RandomCrossValidation <double>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var predictions = new double[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.Predict(observations.Row(i));
            }

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.26175213675213671, actual, 0.0000001);
        }
        public void FeatureNormalization_Normalize()
        {
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix (all columns different from the targetName)
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // create minmax normalizer (normalizes each feature from 0.0 to 1.0)
            var minMaxTransformer = new MinMaxTransformer(0.0, 1.0);

            // transforms features using the feature normalization transform
            minMaxTransformer.Transform(observations, observations);

            // read targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // create learner
            // neural net requires features to be normalize.
            // This makes convergens much faster.
            var net = new NeuralNet();

            net.Add(new InputLayer(observations.ColumnCount));
            net.Add(new SoftMaxLayer(targets.Distinct().Count())); // no hidden layer and softmax output correpsonds to logistic regression
            var learner = new ClassificationNeuralNetLearner(net, new LogLoss());

            // learns a logistic regression classifier
            var model = learner.Learn(observations, targets);
        }
コード例 #30
0
        public void ClassificationEnsembleModel_Predict_single()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var predictions = new double[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.Predict(observations.Row(i));
            }

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.076923076923076927, actual, 0.0000001);
        }