public void ClassificationModelSelectingEnsembleLearner_Learn_Start_With_3_Models()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var metric            = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy  = new MeanProbabilityClassificationEnsembleStrategy();
            var ensembleSelection = new ForwardSearchClassificationEnsembleSelection(metric, ensembleStrategy, 5, 3, true);

            var sut = new ClassificationModelSelectingEnsembleLearner(learners, new RandomCrossValidation <ProbabilityPrediction>(5, 23),
                                                                      ensembleStrategy, ensembleSelection);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.55183985816428427, actual, 0.0001);
        }
        public void ClassificationStackingEnsembleModel_PredictProbability_single()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9),
                                                                    new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var predictions = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.PredictProbability(observations.Row(i));
            }

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.6696598716465223, actual, 0.0000001);
        }
        public void ClassificationBackwardEliminationModelSelectingEnsembleLearner_CreateMetaFeatures_Then_Learn()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var sut = new ClassificationBackwardEliminationModelSelectingEnsembleLearner(learners, 5);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var metaObservations = sut.LearnMetaFeatures(observations, targets);
            var model            = sut.SelectModels(observations, metaObservations, targets);

            var predictions = model.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.52351727716455632, actual, 0.0001);
        }
        public void ClassificationEnsembleModel_PredictProbability_single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.PredictProbability(observations.Row(i));
            }

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.32562112824941963, actual, 0.0000001);
        }
Esempio n. 5
0
        public void ClassificationBackwardEliminationModelSelectingEnsembleLearner_CreateMetaFeatures_Then_Learn()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var sut = new ClassificationBackwardEliminationModelSelectingEnsembleLearner(learners, 5);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var metaObservations = sut.LearnMetaFeatures(observations, targets);
            var model            = sut.SelectModels(observations, metaObservations, targets);

            var predictions = model.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.52351727716455632, actual, 0.0001);
        }
Esempio n. 6
0
        public void ClassificationBackwardEliminationModelSelectingEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var metric           = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy();

            var sut = new ClassificationBackwardEliminationModelSelectingEnsembleLearner(learners, 5,
                                                                                         new RandomCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, metric);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var indices = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.PredictProbability(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(2.3682546920482164, actual, 0.0001);
        }
        public void ClassificationForwardSearchModelSelectingEnsembleLearner_Learn()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var sut = new ClassificationForwardSearchModelSelectingEnsembleLearner(learners, 5);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.51787562976713208, actual, 0.0001);
        }
        public void ClassificationForwardSearchModelSelectingEnsembleLearner_Learn_Start_With_3_Models()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var metric           = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy();

            var sut = new ClassificationForwardSearchModelSelectingEnsembleLearner(learners, 5,
                                                                                   new StratifiedCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, metric, 3, true);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.54434276244488244, actual, 0.0001);
        }
Esempio n. 9
0
        public void ClassificationRandomModelSelectingEnsembleLearner_Learn()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var sut = new ClassificationRandomModelSelectingEnsembleLearner(learners, 5);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.60969181130388794, actual, 0.0001);
        }
Esempio n. 10
0
        public void ClassificationRandomModelSelectingEnsembleLearner_Learn_Without_Replacement()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var metric           = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy();

            var sut = new ClassificationRandomModelSelectingEnsembleLearner(learners, 5,
                                                                            new StratifiedCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, metric, 1, false);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.5805783545646459, actual, 0.0001);
        }
Esempio n. 11
0
        public void ClassificationStackingEnsembleModel_PredictProbability_single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationStackingEnsembleLearner(learners,
                                                                    new ClassificationDecisionTreeLearner(9),
                                                                    new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.PredictProbability(observations.Row(i));
            }

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.6696598716465223, actual, 0.0000001);
        }
        public void ClassificationEnsembleModel_PredictProbability_Multiple()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.32562112824941963, actual, 0.0000001);
        }
        public void CrossValidation_CrossValidate_ProbabilityPredictions()
        {
            #region Read data

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix (all columns different from the targetName)
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            #endregion

            // creates cross validator, observations are shuffled randomly
            var cv = new RandomCrossValidation <ProbabilityPrediction>(crossValidationFolds: 5, seed: 42);

            // create learner
            var learner = new ClassificationDecisionTreeLearner();

            // cross-validated predictions
            var cvPredictions = cv.CrossValidate(learner, observations, targets);

            // metric for measuring model error
            var metric = new LogLossClassificationProbabilityMetric();

            // cross-validation provides an estimate on how the model will perform on unseen data
            Trace.WriteLine("Cross-validation error: " + metric.Error(targets, cvPredictions));

            // train and predict training set for comparison
            var predictions = learner.Learn(observations, targets).PredictProbability(observations);

            // The training set is NOT a good estimate of how well the model will perfrom on unseen data.
            Trace.WriteLine("Training error: " + metric.Error(targets, predictions));
        }
        public void LogLossClassificationMetric_Error_2()
        {
            var sut         = new LogLossClassificationProbabilityMetric(1e-15);
            var predictions = new ProbabilityPrediction[] {
                new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 1.0 }, { 1, 1.0 }, { 2, 1.0 }
                }),
                new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 1.0 }, { 2, 0.0 }
                }),
                new ProbabilityPrediction(2, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 0.0 }, { 2, 1.0 }
                }),
            };

            var targets = new double[] { 0, 1, 2 };

            var actual = sut.Error(targets, predictions);

            Assert.AreEqual(0.36620409622270467, actual, 0.0001);
        }
        public void LogLossClassificationMetric_Error_1()
        {
            var sut         = new LogLossClassificationProbabilityMetric(1e-15);
            var predictions = new ProbabilityPrediction[] {
                new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 1.0 }, { 1, 0.0 }, { 2, 0.0 }
                }),
                new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 1.0 }, { 2, 0.0 }
                }),
                new ProbabilityPrediction(2, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 0.0 }, { 2, 1.0 }
                }),
            };

            var targets = new double[] { 0, 1, 2 };

            var actual = sut.Error(targets, predictions);

            Assert.AreEqual(9.9920072216264128e-16, actual, 1e-17);
        }
        public void ClassificationModelSelectingEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var metric            = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy  = new MeanProbabilityClassificationEnsembleStrategy();
            var ensembleSelection = new ForwardSearchClassificationEnsembleSelection(metric, ensembleStrategy, 5, 1, true);

            var sut = new ClassificationModelSelectingEnsembleLearner(learners, new RandomCrossValidation <ProbabilityPrediction>(5, 23),
                                                                      ensembleStrategy, ensembleSelection);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();
            var indices      = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.PredictProbability(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(2.3682546920482164, actual, 0.0001);
        }