public void ClassificationModelSelectingEnsembleLearner_Learn_Start_With_3_Models() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var metric = new LogLossClassificationProbabilityMetric(); var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy(); var ensembleSelection = new ForwardSearchClassificationEnsembleSelection(metric, ensembleStrategy, 5, 3, true); var sut = new ClassificationModelSelectingEnsembleLearner(learners, new RandomCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, ensembleSelection); var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var model = sut.Learn(observations, targets); var predictions = model.PredictProbability(observations); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.55183985816428427, actual, 0.0001); }
public void ClassificationStackingEnsembleModel_PredictProbability_single() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9), new RandomCrossValidation <ProbabilityPrediction>(5, 23), false); var sut = learner.Learn(observations, targets); var predictions = new ProbabilityPrediction[rows]; for (int i = 0; i < rows; i++) { predictions[i] = sut.PredictProbability(observations.Row(i)); } var metric = new LogLossClassificationProbabilityMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.6696598716465223, actual, 0.0000001); }
public void ClassificationBackwardEliminationModelSelectingEnsembleLearner_CreateMetaFeatures_Then_Learn() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var sut = new ClassificationBackwardEliminationModelSelectingEnsembleLearner(learners, 5); var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var metaObservations = sut.LearnMetaFeatures(observations, targets); var model = sut.SelectModels(observations, metaObservations, targets); var predictions = model.PredictProbability(observations); var metric = new LogLossClassificationProbabilityMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.52351727716455632, actual, 0.0001); }
public void ClassificationEnsembleModel_PredictProbability_single() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy()); var sut = learner.Learn(observations, targets); var rows = targets.Length; var predictions = new ProbabilityPrediction[rows]; for (int i = 0; i < rows; i++) { predictions[i] = sut.PredictProbability(observations.Row(i)); } var metric = new LogLossClassificationProbabilityMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.32562112824941963, actual, 0.0000001); }
public void ClassificationBackwardEliminationModelSelectingEnsembleLearner_CreateMetaFeatures_Then_Learn() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var sut = new ClassificationBackwardEliminationModelSelectingEnsembleLearner(learners, 5); var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var metaObservations = sut.LearnMetaFeatures(observations, targets); var model = sut.SelectModels(observations, metaObservations, targets); var predictions = model.PredictProbability(observations); var metric = new LogLossClassificationProbabilityMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.52351727716455632, actual, 0.0001); }
public void ClassificationBackwardEliminationModelSelectingEnsembleLearner_Learn_Indexed() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var metric = new LogLossClassificationProbabilityMetric(); var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy(); var sut = new ClassificationBackwardEliminationModelSelectingEnsembleLearner(learners, 5, new RandomCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, metric); var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var indices = Enumerable.Range(0, 25).ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.PredictProbability(observations); var actual = metric.Error(targets, predictions); Assert.AreEqual(2.3682546920482164, actual, 0.0001); }
public void ClassificationForwardSearchModelSelectingEnsembleLearner_Learn() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var sut = new ClassificationForwardSearchModelSelectingEnsembleLearner(learners, 5); var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var model = sut.Learn(observations, targets); var predictions = model.PredictProbability(observations); var metric = new LogLossClassificationProbabilityMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.51787562976713208, actual, 0.0001); }
public void ClassificationForwardSearchModelSelectingEnsembleLearner_Learn_Start_With_3_Models() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var metric = new LogLossClassificationProbabilityMetric(); var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy(); var sut = new ClassificationForwardSearchModelSelectingEnsembleLearner(learners, 5, new StratifiedCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, metric, 3, true); var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var model = sut.Learn(observations, targets); var predictions = model.PredictProbability(observations); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.54434276244488244, actual, 0.0001); }
public void ClassificationRandomModelSelectingEnsembleLearner_Learn() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var sut = new ClassificationRandomModelSelectingEnsembleLearner(learners, 5); var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var model = sut.Learn(observations, targets); var predictions = model.PredictProbability(observations); var metric = new LogLossClassificationProbabilityMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.60969181130388794, actual, 0.0001); }
public void ClassificationRandomModelSelectingEnsembleLearner_Learn_Without_Replacement() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var metric = new LogLossClassificationProbabilityMetric(); var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy(); var sut = new ClassificationRandomModelSelectingEnsembleLearner(learners, 5, new StratifiedCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, metric, 1, false); var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var model = sut.Learn(observations, targets); var predictions = model.PredictProbability(observations); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.5805783545646459, actual, 0.0001); }
public void ClassificationStackingEnsembleModel_PredictProbability_single() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9), new RandomCrossValidation <ProbabilityPrediction>(5, 23), false); var sut = learner.Learn(observations, targets); var rows = targets.Length; var predictions = new ProbabilityPrediction[rows]; for (int i = 0; i < rows; i++) { predictions[i] = sut.PredictProbability(observations.Row(i)); } var metric = new LogLossClassificationProbabilityMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.6696598716465223, actual, 0.0000001); }
public void ClassificationEnsembleModel_PredictProbability_Multiple() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy()); var sut = learner.Learn(observations, targets); var predictions = sut.PredictProbability(observations); var metric = new LogLossClassificationProbabilityMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.32562112824941963, actual, 0.0000001); }
public void CrossValidation_CrossValidate_ProbabilityPredictions() { #region Read data // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix (all columns different from the targetName) var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); #endregion // creates cross validator, observations are shuffled randomly var cv = new RandomCrossValidation <ProbabilityPrediction>(crossValidationFolds: 5, seed: 42); // create learner var learner = new ClassificationDecisionTreeLearner(); // cross-validated predictions var cvPredictions = cv.CrossValidate(learner, observations, targets); // metric for measuring model error var metric = new LogLossClassificationProbabilityMetric(); // cross-validation provides an estimate on how the model will perform on unseen data Trace.WriteLine("Cross-validation error: " + metric.Error(targets, cvPredictions)); // train and predict training set for comparison var predictions = learner.Learn(observations, targets).PredictProbability(observations); // The training set is NOT a good estimate of how well the model will perfrom on unseen data. Trace.WriteLine("Training error: " + metric.Error(targets, predictions)); }
public void LogLossClassificationMetric_Error_2() { var sut = new LogLossClassificationProbabilityMetric(1e-15); var predictions = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 1.0 }, { 1, 1.0 }, { 2, 1.0 } }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.0 }, { 1, 1.0 }, { 2, 0.0 } }), new ProbabilityPrediction(2, new Dictionary <double, double> { { 0, 0.0 }, { 1, 0.0 }, { 2, 1.0 } }), }; var targets = new double[] { 0, 1, 2 }; var actual = sut.Error(targets, predictions); Assert.AreEqual(0.36620409622270467, actual, 0.0001); }
public void LogLossClassificationMetric_Error_1() { var sut = new LogLossClassificationProbabilityMetric(1e-15); var predictions = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 1.0 }, { 1, 0.0 }, { 2, 0.0 } }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.0 }, { 1, 1.0 }, { 2, 0.0 } }), new ProbabilityPrediction(2, new Dictionary <double, double> { { 0, 0.0 }, { 1, 0.0 }, { 2, 1.0 } }), }; var targets = new double[] { 0, 1, 2 }; var actual = sut.Error(targets, predictions); Assert.AreEqual(9.9920072216264128e-16, actual, 1e-17); }
public void ClassificationModelSelectingEnsembleLearner_Learn_Indexed() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var metric = new LogLossClassificationProbabilityMetric(); var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy(); var ensembleSelection = new ForwardSearchClassificationEnsembleSelection(metric, ensembleStrategy, 5, 1, true); var sut = new ClassificationModelSelectingEnsembleLearner(learners, new RandomCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, ensembleSelection); var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var indices = Enumerable.Range(0, 25).ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.PredictProbability(observations); var actual = metric.Error(targets, predictions); Assert.AreEqual(2.3682546920482164, actual, 0.0001); }