double ClassificationDecisionTreeLearner_Learn_Aptitude_Weighted(int treeDepth, double weight) { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var weights = targets.Select(v => Weight(v, 0, weight)).ToArray(); var sut = new ClassificationDecisionTreeLearner(treeDepth, 1, 2, 0.001, 42); var model = sut.Learn(observations, targets, weights); var predictions = model.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); Trace.WriteLine(evaluator.ErrorString(targets, predictions)); var error = evaluator.Error(targets, predictions); return(error); }
public void ClassificationAdaBoostModel_Predict_Single() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationAdaBoostLearner(10); var sut = learner.Learn(observations, targets); var rows = targets.Length; var predictions = new double[rows]; for (int i = 0; i < rows; i++) { predictions[i] = sut.Predict(observations.Row(i)); } var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.038461538461538464, error, 0.0000001); }
public void ClassificationForestModel_Predict_Single() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false); var sut = learner.Learn(observations, targets); var rows = targets.Length; var predictions = new double[rows]; for (int i = 0; i < rows; i++) { predictions[i] = sut.Predict(observations.Row(i)); } var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.23076923076923078, error, m_delta); }
public void ClassificationGradientBoostLearner_LearnWithEarlyStopping() { var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(r => r != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var splitter = new StratifiedTrainingTestIndexSplitter <double>(0.6, 1234); var split = splitter.SplitSet(observations, targets); var sut = new ClassificationBinomialGradientBoostLearner(100, 0.01, 9, 1, 1e-6, .5, 0, false); var evaluator = new TotalErrorClassificationMetric <double>(); var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets, split.TestSet.Observations, split.TestSet.Targets, evaluator, 10); var predictions = model.Predict(split.TestSet.Observations); var actual = evaluator.Error(split.TestSet.Targets, predictions); Assert.AreEqual(0.16279069767441862, actual, 0.000001); Assert.AreEqual(90, model.Trees.First().ToArray().Length); }
public void ClassificationGradientBoostModel_Save() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationGradientBoostLearner(5); var sut = learner.Learn(observations, targets); // save model. var writer = new StringWriter(); sut.Save(() => writer); // load model and assert prediction results. sut = ClassificationGradientBoostModel.Load(() => new StringReader(writer.ToString())); var predictions = sut.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.15384615384615385, actual, 0.0000001); }
public void ClassificationNeuralNetModel_PredictProbability_Multiple() { var numberOfObservations = 500; var numberOfFeatures = 5; var numberOfClasses = 5; var random = new Random(32); var observations = new F64Matrix(numberOfObservations, numberOfFeatures); observations.Map(() => random.NextDouble()); var targets = Enumerable.Range(0, numberOfObservations).Select(i => (double)random.Next(0, numberOfClasses)).ToArray(); var sut = ClassificationNeuralNetModel.Load(() => new StringReader(ClassificationNeuralNetModelText)); var predictions = sut.PredictProbability(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var actual = evaluator.Error(targets, predictions.Select(p => p.Prediction).ToArray()); Assert.AreEqual(0.762, actual); }
public void ClassificationRandomForestLearner_Learn_Glass_100_Indices() { var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var sut = new ClassificationRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false); var indices = Enumerable.Range(0, targets.Length).ToArray(); indices.Shuffle(new Random(42)); indices = indices.Take((int)(targets.Length * 0.7)) .ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.098130841121495324, error, m_delta); }
public void ClassificationDecisionTreeModel_PredictProbability_Multiple_Indexed() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationDecisionTreeLearner(100, 5, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var indices = new int[] { 0, 3, 4, 5, 6, 7, 8, 9, 20, 21 }; var actual = sut.PredictProbability(observations, indices); var indexedTargets = targets.GetIndices(indices); var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(indexedTargets, actual.Select(p => p.Prediction).ToArray()); Assert.AreEqual(0.1, error, 0.0000001); var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.571428571428571 }, { 1, 0.428571428571429 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.571428571428571 }, { 1, 0.428571428571429 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.428571428571429 }, { 1, 0.571428571428571 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.75 }, { 1, 0.25 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.285714285714286 }, { 1, 0.714285714285714 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.75 }, { 1, 0.25 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.857142857142857 }, { 1, 0.142857142857143 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.285714285714286 }, { 1, 0.714285714285714 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.857142857142857 }, { 1, 0.142857142857143 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.285714285714286 }, { 1, 0.714285714285714 }, }), }; CollectionAssert.AreEqual(expected, actual); }
public void ClassificationAdaBoostLearner_Learn_Glass_Indexed() { var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var sut = new ClassificationAdaBoostLearner(10, 1, 5); var indices = Enumerable.Range(0, targets.Length).ToArray(); indices.Shuffle(new Random(42)); indices = indices.Take((int)(targets.Length * 0.7)) .ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var indexedPredictions = predictions.GetIndices(indices); var indexedTargets = targets.GetIndices(indices); var evaluator = new TotalErrorClassificationMetric <double>(); var actual = evaluator.Error(indexedTargets, indexedPredictions); Assert.AreEqual(0.0, actual); }
public void ClassificationXGBoostLearner_Learn_indexed() { var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var indices = Enumerable.Range(0, targets.Length).ToArray(); indices.Shuffle(new Random(42)); indices = indices.Take((int)(targets.Length * 0.7)) .ToArray(); var sut = CreateLearner(); using (var model = sut.Learn(observations, targets, indices)) { var predictions = model.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.228971962616822, error, m_delta); } }
public void ClassificationBinomialGradientBoostLearner_Stochastic_Learn_Indexed() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var sut = new ClassificationBinomialGradientBoostLearner(30, 0.1, 3, 1, 1e-6, .5, 0, false); var indices = Enumerable.Range(0, targets.Length).ToArray(); indices.Shuffle(new Random(42)); indices = indices.Take((int)(targets.Length * 0.7)) .ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var indexedPredictions = predictions.GetIndices(indices); var indexedTargets = targets.GetIndices(indices); var evaluator = new TotalErrorClassificationMetric <double>(); var actual = evaluator.Error(indexedTargets, indexedPredictions); Assert.AreEqual(0.055555555555555552, actual); }
public void ClassificationEnsembleLearner_Learn_Bagging() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var sut = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy(), 0.7); var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var model = sut.Learn(observations, targets); var predictions = model.Predict(observations); var metric = new TotalErrorClassificationMetric <double>(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.16822429906542055, actual, 0.0001); }
public void ClassificationAdaBoostModel_Predict_Single() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learner = new ClassificationAdaBoostLearner(10); var sut = learner.Learn(observations, targets); var predictions = new double[rows]; for (int i = 0; i < rows; i++) { predictions[i] = sut.Predict(observations.Row(i)); } var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.038461538461538464, error, 0.0000001); }
public void ClassificationEnsembleModel_Predict_Multiple() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy()); var sut = learner.Learn(observations, targets); var predictions = sut.Predict(observations); var metric = new TotalErrorClassificationMetric <double>(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.076923076923076927, actual, 0.0000001); }
public void ClassificationForestModel_Trees() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var reader = new StringReader(m_classificationForestModelString); var sut = ClassificationForestModel.Load(() => reader); var rows = observations.RowCount; var predictions = new double[rows]; for (int row = 0; row < rows; row++) { var observation = observations.Row(row); predictions[row] = sut.Trees.Select(t => t.Predict(observation)) .GroupBy(p => p).OrderByDescending(g => g.Count()) .First().Key; } var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.42307692307692307, error, m_delta); }
public void ClassificationGradientBoostModel_Save() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var learner = new ClassificationGradientBoostLearner(5); var sut = learner.Learn(observations, targets); // save model. var writer = new StringWriter(); sut.Save(() => writer); // load model and assert prediction results. sut = ClassificationGradientBoostModel.Load(() => new StringReader(writer.ToString())); var predictions = sut.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.15384615384615385, actual, 0.0000001); }
public void ClassificationStackingEnsembleLearner_Learn_Include_Original_Features() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var sut = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9), new RandomCrossValidation <ProbabilityPrediction>(5, 23), true); var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var model = sut.Learn(observations, targets); var predictions = model.Predict(observations); var metric = new TotalErrorClassificationMetric <double>(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.26168224299065418, actual, 0.0001); }
public void ClassificationEnsembleLearner_Learn_Bagging() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var sut = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy(), 0.7); var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var model = sut.Learn(observations, targets); var predictions = model.Predict(observations); var metric = new TotalErrorClassificationMetric <double>(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.16822429906542055, actual, 0.0001); }
public void ClassificationStackingEnsembleModel_Predict_Multiple() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9), new RandomCrossValidation <ProbabilityPrediction>(5, 23), false); var sut = learner.Learn(observations, targets); var predictions = sut.Predict(observations); var metric = new TotalErrorClassificationMetric <double>(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.34615384615384615, actual, 0.0000001); }
public void ClassificationAdaBoostLearner_Learn_Glass_Indexed() { var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var sut = new ClassificationAdaBoostLearner(10, 1, 5); var indices = Enumerable.Range(0, targets.Length).ToArray(); indices.Shuffle(new Random(42)); indices = indices.Take((int)(targets.Length * 0.7)) .ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var indexedPredictions = predictions.GetIndices(indices); var indexedTargets = targets.GetIndices(indices); var evaluator = new TotalErrorClassificationMetric <double>(); var actual = evaluator.Error(indexedTargets, indexedPredictions); Assert.AreEqual(0.0, actual); }
public void ClassificationXGBoostLearner_Learn_indexed() { var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var indices = Enumerable.Range(0, targets.Length).ToArray(); indices.Shuffle(new Random(42)); indices = indices.Take((int)(targets.Length * 0.7)) .ToArray(); var sut = CreateLearner(); using (var model = sut.Learn(observations, targets, indices)) { var predictions = model.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.228971962616822, error, m_delta); } }
public void ClassificationEnsembleLearner_Learn_Indexed() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var sut = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy()); var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var indices = Enumerable.Range(0, 25).ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var metric = new TotalErrorClassificationMetric <double>(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.67289719626168221, actual, 0.0001); }
public void ClassificationBinomialGradientBoostLearner_Stochastic_Learn_Indexed() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows("AptitudeTestScore", "PreviousExperience_month").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var sut = new ClassificationBinomialGradientBoostLearner(30, 0.1, 3, 1, 1e-6, .5, 0, false); var indices = Enumerable.Range(0, targets.Length).ToArray(); indices.Shuffle(new Random(42)); indices = indices.Take((int)(targets.Length * 0.7)) .ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var indexedPredictions = predictions.GetIndices(indices); var indexedTargets = targets.GetIndices(indices); var evaluator = new TotalErrorClassificationMetric <double>(); var actual = evaluator.Error(indexedTargets, indexedPredictions); Assert.AreEqual(0.055555555555555552, actual); }
public void ClassificationExtremelyRandomizedTreesLearner_Learn_Glass_100_Indices() { var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var rows = targets.Length; var sut = new ClassificationExtremelyRandomizedTreesLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false); var indices = Enumerable.Range(0, targets.Length).ToArray(); indices.Shuffle(new Random(42)); indices = indices.Take((int)(targets.Length * 0.7)) .ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.14485981308411214, error, 0.0000001); }
public void ClassificationBinomialGradientBoostLearner_MultiClass_Stochastic_FeaturePrSplit_Learn_Indexed() { var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var sut = new ClassificationBinomialGradientBoostLearner(30, 0.1, 3, 1, 1e-6, 0.5, 3, false); var indices = Enumerable.Range(0, targets.Length).ToArray(); indices.Shuffle(new Random(42)); indices = indices.Take((int)(targets.Length * 0.7)) .ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var indexedPredictions = predictions.GetIndices(indices); var indexedTargets = targets.GetIndices(indices); var evaluator = new TotalErrorClassificationMetric <double>(); var actual = evaluator.Error(indexedTargets, indexedPredictions); Assert.AreEqual(0.033557046979865772, actual); }
public void ClassificationNeuralNetModel_Save() { var numberOfObservations = 500; var numberOfFeatures = 5; var numberOfClasses = 5; var random = new Random(32); var observations = new F64Matrix(numberOfObservations, numberOfFeatures); observations.Map(() => random.NextDouble()); var targets = Enumerable.Range(0, numberOfObservations) .Select(i => (double)random.Next(0, numberOfClasses)).ToArray(); var net = new NeuralNet(); net.Add(new InputLayer(numberOfFeatures)); net.Add(new DenseLayer(10)); net.Add(new SvmLayer(numberOfClasses)); var learner = new ClassificationNeuralNetLearner(net, new AccuracyLoss()); var sut = learner.Learn(observations, targets); // save model. var writer = new StringWriter(); sut.Save(() => writer); // load model and assert prediction results. sut = ClassificationNeuralNetModel.Load(() => new StringReader(writer.ToString())); var predictions = sut.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.762, actual, 0.0000001); }
public void ClassificationAdaBoostModel_PredictProbability_Single() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationAdaBoostLearner(10, 1, 3); var sut = learner.Learn(observations, targets); var rows = targets.Length; var actual = new ProbabilityPrediction[rows]; for (int i = 0; i < rows; i++) { actual[i] = sut.PredictProbability(observations.Row(i)); } var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, actual.Select(p => p.Prediction).ToArray()); Assert.AreEqual(0.038461538461538464, error, 0.0000001); var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.553917222019051 }, { 1, 0.446082777980949 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.455270122123639 }, { 1, 0.544729877876361 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.590671208378385 }, { 1, 0.409328791621616 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.564961572849738 }, { 1, 0.435038427150263 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.455270122123639 }, { 1, 0.544729877876361 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.549970403132686 }, { 1, 0.450029596867314 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.417527839140627 }, { 1, 0.582472160859373 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.409988559960094 }, { 1, 0.590011440039906 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.630894242807786 }, { 1, 0.369105757192214 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.436954866525023 }, { 1, 0.563045133474978 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.461264944069783 }, { 1, 0.538735055930217 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.590671208378385 }, { 1, 0.409328791621616 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.549503146925505 }, { 1, 0.450496853074495 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.537653803214063 }, { 1, 0.462346196785938 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.37650723540928 }, { 1, 0.62349276459072 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.573579890413618 }, { 1, 0.426420109586382 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.549970403132686 }, { 1, 0.450029596867314 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.524371409810479 }, { 1, 0.475628590189522 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.436954866525023 }, { 1, 0.563045133474978 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.471117379964633 }, { 1, 0.528882620035367 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.630894242807786 }, { 1, 0.369105757192214 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.436954866525023 }, { 1, 0.563045133474978 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.404976804073458 }, { 1, 0.595023195926542 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.573579890413618 }, { 1, 0.426420109586382 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.549970403132686 }, { 1, 0.450029596867314 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.630894242807786 }, { 1, 0.369105757192214 }, }), }; CollectionAssert.AreEqual(expected, actual); }
public void Classification_Find_Best_Model_With_Default_Parameters() { #region Read and Transform Data var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix (all columns different from the targetName) var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // create minmax normalizer (normalizes each feature from 0.0 to 1.0) var minMaxTransformer = new MinMaxTransformer(0.0, 1.0); // transforms features using the feature normalization transform minMaxTransformer.Transform(observations, observations); // read targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); #endregion // split data // creates training test splitter, training and test set are splittet // to have equal distribution of classes in both set. var splitter = new StratifiedTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24); var trainingTestSplit = splitter.SplitSet(observations, targets); var trainingSet = trainingTestSplit.TrainingSet; var testSet = trainingTestSplit.TestSet; // Create list of all classification learners (with default parameters) var learners = new List <ILearner <double> > { new ClassificationDecisionTreeLearner(), new ClassificationRandomForestLearner(), new ClassificationExtremelyRandomizedTreesLearner(), new ClassificationAdaBoostLearner(), new ClassificationBinomialGradientBoostLearner(), }; // metric for measuring the error var metric = new TotalErrorClassificationMetric <double>(); // try all learners var testPredictions = new double[testSet.Targets.Length]; var testObservation = new double[trainingSet.Observations.ColumnCount]; foreach (var learner in learners) { // train model var model = learner.Learn(trainingSet.Observations, trainingSet.Targets); // iterate over test set and predict each observation for (int i = 0; i < testSet.Targets.Length; i++) { testSet.Observations.Row(i, testObservation); testPredictions[i] = model.Predict(testObservation); } // measure error on test set var error = metric.Error(testSet.Targets, testPredictions); // Trace learner type and error to output window Trace.WriteLine(string.Format("{0}: {1:0.0000}", learner.GetType().Name, error)); } }
public void ClassificationGradientBoostModel_PredictProbability_Single() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationGradientBoostLearner(100, 0.1, 3, 1, 1e-6, 1, 0, new GradientBoostBinomialLoss(), false); var sut = learner.Learn(observations, targets); var rows = targets.Length; var actual = new ProbabilityPrediction[rows]; for (int i = 0; i < rows; i++) { actual[i] = sut.PredictProbability(observations.Row(i)); } var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, actual.Select(p => p.Prediction).ToArray()); Assert.AreEqual(0.038461538461538464, error, 0.0000001); var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00153419685769873 }, { 0, 0.998465803142301 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.497135615200052 }, { 0, 0.502864384799948 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00674291737944022 }, { 0, 0.99325708262056 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00153419685769873 }, { 0, 0.998465803142301 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.497135615200052 }, { 0, 0.502864384799948 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00428497228545111 }, { 0, 0.995715027714549 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 1, 0.987907185249206 }, { 0, 0.0120928147507945 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 1, 0.982783250692275 }, { 0, 0.0172167493077254 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00262490179961228 }, { 0, 0.997375098200388 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 1, 0.996417847055106 }, { 0, 0.00358215294489364 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 1, 0.995341658753364 }, { 0, 0.00465834124663571 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00674291737944022 }, { 0, 0.99325708262056 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.0118633115475969 }, { 0, 0.988136688452403 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00048646805791186 }, { 0, 0.999513531942088 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 1, 0.999891769651047 }, { 0, 0.000108230348952856 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00334655581934884 }, { 0, 0.996653444180651 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00428497228545111 }, { 0, 0.995715027714549 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.0118633115475969 }, { 0, 0.988136688452403 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 1, 0.996417847055106 }, { 0, 0.00358215294489362 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 1, 0.993419876193791 }, { 0, 0.00658012380620933 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00262490179961228 }, { 0, 0.997375098200388 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 1, 0.996417847055106 }, { 0, 0.00358215294489362 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 1, 0.988568859753437 }, { 0, 0.0114311402465632 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00334655581934884 }, { 0, 0.996653444180651 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00428497228545111 }, { 0, 0.995715027714549 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 1, 0.00262490179961228 }, { 0, 0.997375098200388 }, }), }; CollectionAssert.AreEqual(expected, actual); }
public void ClassificationForestModel_PredictProbability_Single() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learner = new ClassificationRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false); var sut = learner.Learn(observations, targets); var actual = new ProbabilityPrediction[rows]; for (int i = 0; i < rows; i++) { actual[i] = sut.PredictProbability(observations.Row(i)); } var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, actual.Select(p => p.Prediction).ToArray()); Assert.AreEqual(0.076923076923076927, error, m_delta); var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.650149027443145 }, { 1, 0.349850972556855 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.566943847818848 }, { 1, 0.433056152181152 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.726936489980608 }, { 1, 0.273063510019392 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.752781908451026 }, { 1, 0.247218091548974 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.566943847818848 }, { 1, 0.433056152181152 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.792506836300954 }, { 1, 0.207493163699046 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.491736055611056 }, { 1, 0.508263944388944 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.574583315377433 }, { 1, 0.425416684622567 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.838724674018791 }, { 1, 0.161275325981208 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.241480824730825 }, { 1, 0.758519175269175 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.385258186258186 }, { 1, 0.614741813741813 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.726936489980608 }, { 1, 0.273063510019392 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.706733044733045 }, { 1, 0.293266955266955 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.801266011766012 }, { 1, 0.198733988233988 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.294952297702298 }, { 1, 0.705047702297702 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.821706914001031 }, { 1, 0.178293085998968 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.780062391856509 }, { 1, 0.21993760814349 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.554444388944389 }, { 1, 0.445555611055611 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.261349872349872 }, { 1, 0.738650127650127 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.419758186258186 }, { 1, 0.580241813741813 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.71382231249143 }, { 1, 0.28617768750857 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.241480824730825 }, { 1, 0.758519175269175 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.47562148962149 }, { 1, 0.52437851037851 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.821706914001031 }, { 1, 0.178293085998968 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.792506836300954 }, { 1, 0.207493163699046 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.666244987039105 }, { 1, 0.333755012960895 }, }) }; CollectionAssert.AreEqual(expected, actual); }