Esempio n. 1
0
        public void ClassificationStackingEnsembleModel_PredictProbability_single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationStackingEnsembleLearner(learners,
                                                                    new ClassificationDecisionTreeLearner(9),
                                                                    new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.PredictProbability(observations.Row(i));
            }

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.6696598716465223, actual, 0.0000001);
        }
        public void ClassificationForwardSearchModelSelectingEnsembleLearner_Learn_Start_With_3_Models()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var metric           = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy();

            var sut = new ClassificationForwardSearchModelSelectingEnsembleLearner(learners, 5,
                                                                                   new StratifiedCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, metric, 3, true);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.54434276244488244, actual, 0.0001);
        }
        public void ClassificationModelSelectingEnsembleLearner_Learn_Start_With_3_Models()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var metric            = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy  = new MeanProbabilityClassificationEnsembleStrategy();
            var ensembleSelection = new ForwardSearchClassificationEnsembleSelection(metric, ensembleStrategy, 5, 3, true);

            var sut = new ClassificationModelSelectingEnsembleLearner(learners, new RandomCrossValidation <ProbabilityPrediction>(5, 23),
                                                                      ensembleStrategy, ensembleSelection);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.55183985816428427, actual, 0.0001);
        }
Esempio n. 4
0
        public void ClassificationBackwardEliminationModelSelectingEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var metric           = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy();

            var sut = new ClassificationBackwardEliminationModelSelectingEnsembleLearner(learners, 5,
                                                                                         new RandomCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, metric);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var indices = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.PredictProbability(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(2.3682546920482164, actual, 0.0001);
        }
        public void ClassificationEnsembleModel_PredictProbability_Multiple()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.32562112824941963, actual, 0.0000001);
        }
        public void ClassificationBackwardEliminationModelSelectingEnsembleLearner_CreateMetaFeatures_Then_Learn()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var sut = new ClassificationBackwardEliminationModelSelectingEnsembleLearner(learners, 5);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var metaObservations = sut.LearnMetaFeatures(observations, targets);
            var model            = sut.SelectModels(observations, metaObservations, targets);

            var predictions = model.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.52351727716455632, actual, 0.0001);
        }
        public void LogLossClassificationMetric_ErrorString_TargetStringMapping()
        {
            var sut         = new LogLossClassificationProbabilityMetric(1e-15);
            var predictions = new ProbabilityPrediction[] {
                new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 1.0 }, { 1, 1.0 }, { 2, 1.0 }
                }),
                new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 1.0 }, { 2, 0.0 }
                }),
                new ProbabilityPrediction(2, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 0.0 }, { 2, 1.0 }
                }),
            };

            var targets             = new double[] { 0, 1, 2 };
            var targetStringMapping = new Dictionary <double, string> {
                { 0, "One" }, { 1, "Two" }, { 2, "Three" }
            };

            var actual   = sut.ErrorString(targets, predictions, targetStringMapping);
            var expected = ";One;Two;Three;One;Two;Three\r\nOne;1.000;0.000;0.000;100.000;0.000;0.000\r\nTwo;0.000;1.000;0.000;0.000;100.000;0.000\r\nThree;0.000;0.000;1.000;0.000;0.000;100.000\r\nError: 36.620\r\n";

            Assert.AreEqual(expected, actual);
        }
        public void ClassificationStackingEnsembleModel_PredictProbability_single()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9),
                                                                    new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var predictions = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.PredictProbability(observations.Row(i));
            }

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.6696598716465223, actual, 0.0000001);
        }
Esempio n. 9
0
        public void ClassificationBackwardEliminationModelSelectingEnsembleLearner_CreateMetaFeatures_Then_Learn()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var sut = new ClassificationBackwardEliminationModelSelectingEnsembleLearner(learners, 5);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var metaObservations = sut.LearnMetaFeatures(observations, targets);
            var model            = sut.SelectModels(observations, metaObservations, targets);

            var predictions = model.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.52351727716455632, actual, 0.0001);
        }
        public void ClassificationForwardSearchModelSelectingEnsembleLearner_Learn()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var sut = new ClassificationForwardSearchModelSelectingEnsembleLearner(learners, 5);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.51787562976713208, actual, 0.0001);
        }
Esempio n. 11
0
        public void ClassificationRandomModelSelectingEnsembleLearner_Learn_Without_Replacement()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var metric           = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy();

            var sut = new ClassificationRandomModelSelectingEnsembleLearner(learners, 5,
                                                                            new StratifiedCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, metric, 1, false);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.5805783545646459, actual, 0.0001);
        }
Esempio n. 12
0
        public void ClassificationRandomModelSelectingEnsembleLearner_Learn()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var sut = new ClassificationRandomModelSelectingEnsembleLearner(learners, 5);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.PredictProbability(observations);

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.60969181130388794, actual, 0.0001);
        }
        public void ClassificationEnsembleModel_PredictProbability_single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.PredictProbability(observations.Row(i));
            }

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.32562112824941963, actual, 0.0000001);
        }
        public void ClassificationModelSelectingEnsembleLearner_Constructor_CrossValidation_Null()
        {
            var learners          = new IIndexedLearner <ProbabilityPrediction> [4];
            var metric            = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy  = new MeanProbabilityClassificationEnsembleStrategy();
            var ensembleSelection = new ForwardSearchClassificationEnsembleSelection(metric, ensembleStrategy, 5, 1, true);

            var sut = new ClassificationModelSelectingEnsembleLearner(learners, null, ensembleStrategy, ensembleSelection);
        }
        public void ClassificationModelSelectingEnsembleLearner_Constructor_EnsembleSelection_Null()
        {
            var learners         = new IIndexedLearner <ProbabilityPrediction> [4];
            var metric           = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy();
            var crossValidation  = new RandomCrossValidation <ProbabilityPrediction>(5);

            var sut = new ClassificationModelSelectingEnsembleLearner(learners, crossValidation, ensembleStrategy, null);
        }
        public void ClassificationModelSelectingEnsembleLearner_Constructor_Learners_Null()
        {
            var metric            = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy  = new MeanProbabilityClassificationEnsembleStrategy();
            var ensembleSelection = new ForwardSearchClassificationEnsembleSelection(metric, ensembleStrategy, 5, 1, true);
            var crossValidation   = new RandomCrossValidation <ProbabilityPrediction>(5);

            var sut = new ClassificationModelSelectingEnsembleLearner(null, crossValidation, ensembleStrategy, ensembleSelection);
        }
        public void ClassificationModel_PredictProbability_Threshold_On_Probability()
        {
            #region learner creation

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets and convert to binary problem (low quality/high quality).
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector().Select(t => t < 5 ? 0.0 : 1.0).ToArray();

            var translation = new Dictionary <double, string> {
                { 0.0, "Low quality" }, { 1.0, "High quality" }
            };

            // create learner
            var learner = new ClassificationDecisionTreeLearner(maximumTreeDepth: 5);
            #endregion

            // learns a ClassificationDecisionTreeModel
            var model = learner.Learn(observations, targets);

            // predict probabilities for all observations
            var probabilityPredictions = model.PredictProbability(observations);

            // zip target and probabilities to keep order
            var zip = targets.Zip(probabilityPredictions, (t, p) => new { Target = t, Prediction = p });

            // threhold on the probabilty of the predicted class.
            // This will remove the obserations that the model is uncertain about.
            var probabilityThreshold = 0.90;
            var thresholdedResult    = zip.Where(kvp => kvp.Prediction.Probabilities[kvp.Prediction.Prediction] > probabilityThreshold);

            // evaluate the resulting observations
            var thresholdedPredictions = thresholdedResult.Select(p => p.Prediction).ToArray();
            var thresholdedTargets     = thresholdedResult.Select(p => p.Target).ToArray();

            // evaluate only on probability thresholded data
            var metric = new LogLossClassificationProbabilityMetric();
            Trace.WriteLine("ProbabilityThresholded Result:");
            Trace.WriteLine(metric.ErrorString(thresholdedTargets, thresholdedPredictions, translation));
            Trace.WriteLine("");

            // evaluate on all data for comparison
            Trace.WriteLine("All data result:");
            Trace.WriteLine(metric.ErrorString(targets, probabilityPredictions, translation));
        }
Esempio n. 18
0
        public void LearningCurves_Calculate_ProbabilityPrediction()
        {
            #region Read data

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets and convert to binary problem (low quality/high quality).
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector().Select(t => t < 5 ? 0.0 : 1.0).ToArray();

            #endregion

            // metric for measuring model error
            var metric = new LogLossClassificationProbabilityMetric();

            // creates cross validator, observations are shuffled randomly
            var learningCurveCalculator = new RandomShuffleLearningCurvesCalculator <ProbabilityPrediction>(metric,
                                                                                                            samplePercentages: new double[] { 0.05, 0.1, 0.2, 0.4, 0.8, 1.0 },
                                                                                                            trainingPercentage: 0.7, numberOfShufflesPrSample: 5);

            // create learner
            var learner = new ClassificationDecisionTreeLearner(maximumTreeDepth: 5);

            // calculate learning curve
            var learningCurve = learningCurveCalculator.Calculate(learner, observations, targets);

            // write to csv
            var writer = new StringWriter();
            learningCurve.Write(() => writer);

            // trace result
            // Plotting the learning curves will help determine if the model has high bias or high variance.
            // This information can be used to determine what to try next in order to improve the model.
            Trace.WriteLine(writer.ToString());

            // alternatively, write to file
            //learningCurve.Write(() => new StreamWriter(filePath));
        }
        public void LogLossClassificationMetric_Error_2()
        {
            var sut         = new LogLossClassificationProbabilityMetric(1e-15);
            var predictions = new ProbabilityPrediction[] {
                new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 1.0 }, { 1, 1.0 }, { 2, 1.0 }
                }),
                new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 1.0 }, { 2, 0.0 }
                }),
                new ProbabilityPrediction(2, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 0.0 }, { 2, 1.0 }
                }),
            };

            var targets = new double[] { 0, 1, 2 };

            var actual = sut.Error(targets, predictions);

            Assert.AreEqual(0.36620409622270467, actual, 0.0001);
        }
        public void LogLossClassificationMetric_Error_1()
        {
            var sut         = new LogLossClassificationProbabilityMetric(1e-15);
            var predictions = new ProbabilityPrediction[] {
                new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 1.0 }, { 1, 0.0 }, { 2, 0.0 }
                }),
                new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 1.0 }, { 2, 0.0 }
                }),
                new ProbabilityPrediction(2, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 0.0 }, { 2, 1.0 }
                }),
            };

            var targets = new double[] { 0, 1, 2 };

            var actual = sut.Error(targets, predictions);

            Assert.AreEqual(9.9920072216264128e-16, actual, 1e-17);
        }
        public void LogLossClassificationMetric_ErrorString()
        {
            var sut         = new LogLossClassificationProbabilityMetric(1e-15);
            var predictions = new ProbabilityPrediction[] {
                new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 1.0 }, { 1, 1.0 }, { 2, 1.0 }
                }),
                new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 1.0 }, { 2, 0.0 }
                }),
                new ProbabilityPrediction(2, new Dictionary <double, double> {
                    { 0, 0.0 }, { 1, 0.0 }, { 2, 1.0 }
                }),
            };

            var targets = new double[] { 0, 1, 2 };

            var actual   = sut.ErrorString(targets, predictions);
            var expected = ";0;1;2;0;1;2\r\n0;1.000;0.000;0.000;100.000;0.000;0.000\r\n1;0.000;1.000;0.000;0.000;100.000;0.000\r\n2;0.000;0.000;1.000;0.000;0.000;100.000\r\nError: 36.620\r\n";

            Assert.AreEqual(expected, actual);
        }
        public void CrossValidation_CrossValidate_ProbabilityPredictions()
        {
            #region Read data

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix (all columns different from the targetName)
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            #endregion

            // creates cross validator, observations are shuffled randomly
            var cv = new RandomCrossValidation <ProbabilityPrediction>(crossValidationFolds: 5, seed: 42);

            // create learner
            var learner = new ClassificationDecisionTreeLearner();

            // cross-validated predictions
            var cvPredictions = cv.CrossValidate(learner, observations, targets);

            // metric for measuring model error
            var metric = new LogLossClassificationProbabilityMetric();

            // cross-validation provides an estimate on how the model will perform on unseen data
            Trace.WriteLine("Cross-validation error: " + metric.Error(targets, cvPredictions));

            // train and predict training set for comparison
            var predictions = learner.Learn(observations, targets).PredictProbability(observations);

            // The training set is NOT a good estimate of how well the model will perfrom on unseen data.
            Trace.WriteLine("Training error: " + metric.Error(targets, predictions));
        }
        public void ClassificationModelSelectingEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9),
                new ClassificationDecisionTreeLearner(11),
                new ClassificationDecisionTreeLearner(21),
                new ClassificationDecisionTreeLearner(23),
                new ClassificationDecisionTreeLearner(1),
                new ClassificationDecisionTreeLearner(14),
                new ClassificationDecisionTreeLearner(17),
                new ClassificationDecisionTreeLearner(19),
                new ClassificationDecisionTreeLearner(33)
            };

            var metric            = new LogLossClassificationProbabilityMetric();
            var ensembleStrategy  = new MeanProbabilityClassificationEnsembleStrategy();
            var ensembleSelection = new ForwardSearchClassificationEnsembleSelection(metric, ensembleStrategy, 5, 1, true);

            var sut = new ClassificationModelSelectingEnsembleLearner(learners, new RandomCrossValidation <ProbabilityPrediction>(5, 23),
                                                                      ensembleStrategy, ensembleSelection);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();
            var indices      = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.PredictProbability(observations);

            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(2.3682546920482164, actual, 0.0001);
        }