double ClassificationDecisionTreeLearner_Learn_Aptitude_Weighted(int treeDepth, double weight)
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var weights = targets.Select(v => Weight(v, 0, weight)).ToArray();
            var sut     = new ClassificationDecisionTreeLearner(treeDepth, 1, 2, 0.001, 42);
            var model   = sut.Learn(observations, targets, weights);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();

            Trace.WriteLine(evaluator.ErrorString(targets, predictions));
            var error = evaluator.Error(targets, predictions);

            return(error);
        }
        public void ClassificationAdaBoostModel_Predict_Single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationAdaBoostLearner(10);
            var sut     = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new double[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.Predict(observations.Row(i));
            }

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.038461538461538464, error, 0.0000001);
        }
示例#3
0
        public void ClassificationForestModel_Predict_Single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new double[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.Predict(observations.Row(i));
            }

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.23076923076923078, error, m_delta);
        }
示例#4
0
        public void ClassificationGradientBoostLearner_LearnWithEarlyStopping()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(r => r != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var splitter = new StratifiedTrainingTestIndexSplitter <double>(0.6, 1234);
            var split    = splitter.SplitSet(observations, targets);

            var sut       = new ClassificationBinomialGradientBoostLearner(100, 0.01, 9, 1, 1e-6, .5, 0, false);
            var evaluator = new TotalErrorClassificationMetric <double>();

            var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets,
                                                   split.TestSet.Observations, split.TestSet.Targets, evaluator, 10);

            var predictions = model.Predict(split.TestSet.Observations);
            var actual      = evaluator.Error(split.TestSet.Targets, predictions);

            Assert.AreEqual(0.16279069767441862, actual, 0.000001);
            Assert.AreEqual(90, model.Trees.First().ToArray().Length);
        }
        public void ClassificationGradientBoostModel_Save()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationGradientBoostLearner(5);
            var sut     = learner.Learn(observations, targets);

            // save model.
            var writer = new StringWriter();

            sut.Save(() => writer);

            // load model and assert prediction results.
            sut = ClassificationGradientBoostModel.Load(() => new StringReader(writer.ToString()));
            var predictions = sut.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.15384615384615385, actual, 0.0000001);
        }
示例#6
0
        public void ClassificationNeuralNetModel_PredictProbability_Multiple()
        {
            var numberOfObservations = 500;
            var numberOfFeatures     = 5;
            var numberOfClasses      = 5;

            var random       = new Random(32);
            var observations = new F64Matrix(numberOfObservations, numberOfFeatures);

            observations.Map(() => random.NextDouble());
            var targets = Enumerable.Range(0, numberOfObservations).Select(i => (double)random.Next(0, numberOfClasses)).ToArray();

            var sut = ClassificationNeuralNetModel.Load(() => new StringReader(ClassificationNeuralNetModelText));

            var predictions = sut.PredictProbability(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var actual    = evaluator.Error(targets, predictions.Select(p => p.Prediction).ToArray());

            Assert.AreEqual(0.762, actual);
        }
示例#7
0
        public void ClassificationRandomForestLearner_Learn_Glass_100_Indices()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var sut = new ClassificationRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model = sut.Learn(observations, targets, indices);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.098130841121495324, error, m_delta);
        }
        public void ClassificationDecisionTreeModel_PredictProbability_Multiple_Indexed()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationDecisionTreeLearner(100, 5, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var indices = new int[] { 0, 3, 4, 5, 6, 7, 8, 9, 20, 21 };
            var actual  = sut.PredictProbability(observations, indices);

            var indexedTargets = targets.GetIndices(indices);
            var evaluator      = new TotalErrorClassificationMetric <double>();
            var error          = evaluator.Error(indexedTargets, actual.Select(p => p.Prediction).ToArray());

            Assert.AreEqual(0.1, error, 0.0000001);

            var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.571428571428571 }, { 1, 0.428571428571429 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.571428571428571 }, { 1, 0.428571428571429 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.428571428571429 }, { 1, 0.571428571428571 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.75 }, { 1, 0.25 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.285714285714286 }, { 1, 0.714285714285714 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.75 }, { 1, 0.25 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.857142857142857 }, { 1, 0.142857142857143 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.285714285714286 }, { 1, 0.714285714285714 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.857142857142857 }, { 1, 0.142857142857143 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.285714285714286 }, { 1, 0.714285714285714 },
                }), };

            CollectionAssert.AreEqual(expected, actual);
        }
示例#9
0
        public void ClassificationAdaBoostLearner_Learn_Glass_Indexed()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var sut = new ClassificationAdaBoostLearner(10, 1, 5);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model              = sut.Learn(observations, targets, indices);
            var predictions        = model.Predict(observations);
            var indexedPredictions = predictions.GetIndices(indices);
            var indexedTargets     = targets.GetIndices(indices);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var actual    = evaluator.Error(indexedTargets, indexedPredictions);

            Assert.AreEqual(0.0, actual);
        }
        public void ClassificationXGBoostLearner_Learn_indexed()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var sut = CreateLearner();

            using (var model = sut.Learn(observations, targets, indices))
            {
                var predictions = model.Predict(observations);

                var evaluator = new TotalErrorClassificationMetric <double>();
                var error     = evaluator.Error(targets, predictions);

                Assert.AreEqual(0.228971962616822, error, m_delta);
            }
        }
示例#11
0
        public void ClassificationBinomialGradientBoostLearner_Stochastic_Learn_Indexed()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut = new ClassificationBinomialGradientBoostLearner(30, 0.1, 3, 1, 1e-6, .5, 0, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model              = sut.Learn(observations, targets, indices);
            var predictions        = model.Predict(observations);
            var indexedPredictions = predictions.GetIndices(indices);
            var indexedTargets     = targets.GetIndices(indices);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var actual    = evaluator.Error(indexedTargets, indexedPredictions);

            Assert.AreEqual(0.055555555555555552, actual);
        }
        public void ClassificationEnsembleLearner_Learn_Bagging()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var sut = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy(), 0.7);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.16822429906542055, actual, 0.0001);
        }
示例#13
0
        public void ClassificationAdaBoostModel_Predict_Single()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new ClassificationAdaBoostLearner(10);
            var sut     = learner.Learn(observations, targets);

            var predictions = new double[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.Predict(observations.Row(i));
            }

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.038461538461538464, error, 0.0000001);
        }
        public void ClassificationEnsembleModel_Predict_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.076923076923076927, actual, 0.0000001);
        }
示例#15
0
        public void ClassificationForestModel_Trees()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var reader = new StringReader(m_classificationForestModelString);
            var sut    = ClassificationForestModel.Load(() => reader);

            var rows        = observations.RowCount;
            var predictions = new double[rows];

            for (int row = 0; row < rows; row++)
            {
                var observation = observations.Row(row);
                predictions[row] = sut.Trees.Select(t => t.Predict(observation))
                                   .GroupBy(p => p).OrderByDescending(g => g.Count())
                                   .First().Key;
            }

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.42307692307692307, error, m_delta);
        }
示例#16
0
        public void ClassificationGradientBoostModel_Save()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learner = new ClassificationGradientBoostLearner(5);
            var sut     = learner.Learn(observations, targets);

            // save model.
            var writer = new StringWriter();

            sut.Save(() => writer);

            // load model and assert prediction results.
            sut = ClassificationGradientBoostModel.Load(() => new StringReader(writer.ToString()));
            var predictions = sut.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.15384615384615385, actual, 0.0000001);
        }
示例#17
0
        public void ClassificationStackingEnsembleLearner_Learn_Include_Original_Features()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var sut = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9),
                                                                new RandomCrossValidation <ProbabilityPrediction>(5, 23), true);

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.26168224299065418, actual, 0.0001);
        }
示例#18
0
        public void ClassificationEnsembleLearner_Learn_Bagging()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var sut = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy(), 0.7);

            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.16822429906542055, actual, 0.0001);
        }
示例#19
0
        public void ClassificationStackingEnsembleModel_Predict_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9),
                                                                    new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.34615384615384615, actual, 0.0000001);
        }
        public void ClassificationAdaBoostLearner_Learn_Glass_Indexed()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var sut = new ClassificationAdaBoostLearner(10, 1, 5);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model              = sut.Learn(observations, targets, indices);
            var predictions        = model.Predict(observations);
            var indexedPredictions = predictions.GetIndices(indices);
            var indexedTargets     = targets.GetIndices(indices);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var actual    = evaluator.Error(indexedTargets, indexedPredictions);

            Assert.AreEqual(0.0, actual);
        }
        public void ClassificationXGBoostLearner_Learn_indexed()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var sut = CreateLearner();

            using (var model = sut.Learn(observations, targets, indices))
            {
                var predictions = model.Predict(observations);

                var evaluator = new TotalErrorClassificationMetric <double>();
                var error     = evaluator.Error(targets, predictions);

                Assert.AreEqual(0.228971962616822, error, m_delta);
            }
        }
        public void ClassificationEnsembleLearner_Learn_Indexed()
        {
            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var sut = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy());

            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var indices = Enumerable.Range(0, 25).ToArray();

            var model       = sut.Learn(observations, targets, indices);
            var predictions = model.Predict(observations);

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.67289719626168221, actual, 0.0001);
        }
        public void ClassificationBinomialGradientBoostLearner_Stochastic_Learn_Indexed()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows("AptitudeTestScore", "PreviousExperience_month").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var sut = new ClassificationBinomialGradientBoostLearner(30, 0.1, 3, 1, 1e-6, .5, 0, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model              = sut.Learn(observations, targets, indices);
            var predictions        = model.Predict(observations);
            var indexedPredictions = predictions.GetIndices(indices);
            var indexedTargets     = targets.GetIndices(indices);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var actual    = evaluator.Error(indexedTargets, indexedPredictions);

            Assert.AreEqual(0.055555555555555552, actual);
        }
        public void ClassificationExtremelyRandomizedTreesLearner_Learn_Glass_100_Indices()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();
            var rows         = targets.Length;

            var sut = new ClassificationExtremelyRandomizedTreesLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model = sut.Learn(observations, targets, indices);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14485981308411214, error, 0.0000001);
        }
        public void ClassificationBinomialGradientBoostLearner_MultiClass_Stochastic_FeaturePrSplit_Learn_Indexed()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var sut = new ClassificationBinomialGradientBoostLearner(30, 0.1, 3, 1, 1e-6, 0.5, 3, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model              = sut.Learn(observations, targets, indices);
            var predictions        = model.Predict(observations);
            var indexedPredictions = predictions.GetIndices(indices);
            var indexedTargets     = targets.GetIndices(indices);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var actual    = evaluator.Error(indexedTargets, indexedPredictions);

            Assert.AreEqual(0.033557046979865772, actual);
        }
        public void ClassificationNeuralNetModel_Save()
        {
            var numberOfObservations = 500;
            var numberOfFeatures     = 5;
            var numberOfClasses      = 5;

            var random       = new Random(32);
            var observations = new F64Matrix(numberOfObservations, numberOfFeatures);

            observations.Map(() => random.NextDouble());
            var targets = Enumerable.Range(0, numberOfObservations)
                          .Select(i => (double)random.Next(0, numberOfClasses)).ToArray();

            var net = new NeuralNet();

            net.Add(new InputLayer(numberOfFeatures));
            net.Add(new DenseLayer(10));
            net.Add(new SvmLayer(numberOfClasses));

            var learner = new ClassificationNeuralNetLearner(net, new AccuracyLoss());
            var sut     = learner.Learn(observations, targets);

            // save model.
            var writer = new StringWriter();

            sut.Save(() => writer);

            // load model and assert prediction results.
            sut = ClassificationNeuralNetModel.Load(() => new StringReader(writer.ToString()));
            var predictions = sut.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.762, actual, 0.0000001);
        }
        public void ClassificationAdaBoostModel_PredictProbability_Single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationAdaBoostLearner(10, 1, 3);
            var sut     = learner.Learn(observations, targets);

            var rows   = targets.Length;
            var actual = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                actual[i] = sut.PredictProbability(observations.Row(i));
            }

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, actual.Select(p => p.Prediction).ToArray());

            Assert.AreEqual(0.038461538461538464, error, 0.0000001);

            var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.553917222019051 }, { 1, 0.446082777980949 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.455270122123639 }, { 1, 0.544729877876361 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.590671208378385 }, { 1, 0.409328791621616 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.564961572849738 }, { 1, 0.435038427150263 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.455270122123639 }, { 1, 0.544729877876361 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.549970403132686 }, { 1, 0.450029596867314 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.417527839140627 }, { 1, 0.582472160859373 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.409988559960094 }, { 1, 0.590011440039906 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.630894242807786 }, { 1, 0.369105757192214 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.436954866525023 }, { 1, 0.563045133474978 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.461264944069783 }, { 1, 0.538735055930217 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.590671208378385 }, { 1, 0.409328791621616 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.549503146925505 }, { 1, 0.450496853074495 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.537653803214063 }, { 1, 0.462346196785938 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.37650723540928 }, { 1, 0.62349276459072 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.573579890413618 }, { 1, 0.426420109586382 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.549970403132686 }, { 1, 0.450029596867314 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.524371409810479 }, { 1, 0.475628590189522 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.436954866525023 }, { 1, 0.563045133474978 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.471117379964633 }, { 1, 0.528882620035367 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.630894242807786 }, { 1, 0.369105757192214 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.436954866525023 }, { 1, 0.563045133474978 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.404976804073458 }, { 1, 0.595023195926542 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.573579890413618 }, { 1, 0.426420109586382 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.549970403132686 }, { 1, 0.450029596867314 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.630894242807786 }, { 1, 0.369105757192214 },
                }), };

            CollectionAssert.AreEqual(expected, actual);
        }
示例#28
0
        public void Classification_Find_Best_Model_With_Default_Parameters()
        {
            #region Read and Transform Data
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix (all columns different from the targetName)
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // create minmax normalizer (normalizes each feature from 0.0 to 1.0)
            var minMaxTransformer = new MinMaxTransformer(0.0, 1.0);

            // transforms features using the feature normalization transform
            minMaxTransformer.Transform(observations, observations);

            // read targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();
            #endregion

            // split data
            // creates training test splitter, training and test set are splittet
            // to have equal distribution of classes in both set.
            var splitter = new StratifiedTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainingSet       = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;

            // Create list of all classification learners (with default parameters)
            var learners = new List <ILearner <double> >
            {
                new ClassificationDecisionTreeLearner(),
                new ClassificationRandomForestLearner(),
                new ClassificationExtremelyRandomizedTreesLearner(),
                new ClassificationAdaBoostLearner(),
                new ClassificationBinomialGradientBoostLearner(),
            };

            // metric for measuring the error
            var metric = new TotalErrorClassificationMetric <double>();

            // try all learners
            var testPredictions = new double[testSet.Targets.Length];
            var testObservation = new double[trainingSet.Observations.ColumnCount];
            foreach (var learner in learners)
            {
                // train model
                var model = learner.Learn(trainingSet.Observations, trainingSet.Targets);

                // iterate over test set and predict each observation
                for (int i = 0; i < testSet.Targets.Length; i++)
                {
                    testSet.Observations.Row(i, testObservation);
                    testPredictions[i] = model.Predict(testObservation);
                }

                // measure error on test set
                var error = metric.Error(testSet.Targets, testPredictions);

                // Trace learner type and error to output window
                Trace.WriteLine(string.Format("{0}: {1:0.0000}", learner.GetType().Name, error));
            }
        }
        public void ClassificationGradientBoostModel_PredictProbability_Single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationGradientBoostLearner(100, 0.1, 3, 1, 1e-6, 1, 0, new GradientBoostBinomialLoss(), false);
            var sut     = learner.Learn(observations, targets);

            var rows   = targets.Length;
            var actual = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                actual[i] = sut.PredictProbability(observations.Row(i));
            }

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, actual.Select(p => p.Prediction).ToArray());

            Assert.AreEqual(0.038461538461538464, error, 0.0000001);

            var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00153419685769873 }, { 0, 0.998465803142301 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.497135615200052 }, { 0, 0.502864384799948 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00674291737944022 }, { 0, 0.99325708262056 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00153419685769873 }, { 0, 0.998465803142301 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.497135615200052 }, { 0, 0.502864384799948 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00428497228545111 }, { 0, 0.995715027714549 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 1, 0.987907185249206 }, { 0, 0.0120928147507945 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 1, 0.982783250692275 }, { 0, 0.0172167493077254 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00262490179961228 }, { 0, 0.997375098200388 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 1, 0.996417847055106 }, { 0, 0.00358215294489364 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 1, 0.995341658753364 }, { 0, 0.00465834124663571 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00674291737944022 }, { 0, 0.99325708262056 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.0118633115475969 }, { 0, 0.988136688452403 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00048646805791186 }, { 0, 0.999513531942088 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 1, 0.999891769651047 }, { 0, 0.000108230348952856 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00334655581934884 }, { 0, 0.996653444180651 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00428497228545111 }, { 0, 0.995715027714549 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.0118633115475969 }, { 0, 0.988136688452403 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 1, 0.996417847055106 }, { 0, 0.00358215294489362 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 1, 0.993419876193791 }, { 0, 0.00658012380620933 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00262490179961228 }, { 0, 0.997375098200388 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 1, 0.996417847055106 }, { 0, 0.00358215294489362 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 1, 0.988568859753437 }, { 0, 0.0114311402465632 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00334655581934884 }, { 0, 0.996653444180651 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00428497228545111 }, { 0, 0.995715027714549 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 1, 0.00262490179961228 }, { 0, 0.997375098200388 },
                }), };

            CollectionAssert.AreEqual(expected, actual);
        }
示例#30
0
        public void ClassificationForestModel_PredictProbability_Single()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new ClassificationRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                actual[i] = sut.PredictProbability(observations.Row(i));
            }

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, actual.Select(p => p.Prediction).ToArray());

            Assert.AreEqual(0.076923076923076927, error, m_delta);

            var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.650149027443145 }, { 1, 0.349850972556855 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.566943847818848 }, { 1, 0.433056152181152 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.726936489980608 }, { 1, 0.273063510019392 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.752781908451026 }, { 1, 0.247218091548974 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.566943847818848 }, { 1, 0.433056152181152 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.792506836300954 }, { 1, 0.207493163699046 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.491736055611056 }, { 1, 0.508263944388944 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.574583315377433 }, { 1, 0.425416684622567 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.838724674018791 }, { 1, 0.161275325981208 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.241480824730825 }, { 1, 0.758519175269175 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.385258186258186 }, { 1, 0.614741813741813 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.726936489980608 }, { 1, 0.273063510019392 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.706733044733045 }, { 1, 0.293266955266955 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.801266011766012 }, { 1, 0.198733988233988 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.294952297702298 }, { 1, 0.705047702297702 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.821706914001031 }, { 1, 0.178293085998968 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.780062391856509 }, { 1, 0.21993760814349 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.554444388944389 }, { 1, 0.445555611055611 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.261349872349872 }, { 1, 0.738650127650127 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.419758186258186 }, { 1, 0.580241813741813 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.71382231249143 }, { 1, 0.28617768750857 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.241480824730825 }, { 1, 0.758519175269175 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.47562148962149 }, { 1, 0.52437851037851 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.821706914001031 }, { 1, 0.178293085998968 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.792506836300954 }, { 1, 0.207493163699046 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.666244987039105 }, { 1, 0.333755012960895 },
                }) };

            CollectionAssert.AreEqual(expected, actual);
        }