public void RegressionEnsembleModel_GetRawVariableImportance() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy()); var sut = learner.Learn(observations, targets); var actual = sut.GetRawVariableImportance(); var expected = new double[] { 100.0, 3.46067371526717 }; Assert.AreEqual(expected.Length, actual.Length); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i], actual[i], 0.000001); } }
public void ClassificationStackingEnsembleModel_PredictProbability_single() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9), new RandomCrossValidation <ProbabilityPrediction>(5, 23), false); var sut = learner.Learn(observations, targets); var predictions = new ProbabilityPrediction[rows]; for (int i = 0; i < rows; i++) { predictions[i] = sut.PredictProbability(observations.Row(i)); } var metric = new LogLossClassificationProbabilityMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.6696598716465223, actual, 0.0000001); }
public void ClassificationAdaBoostModel_GetVariableImportance() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var featureNameToIndex = new Dictionary <string, int> { { "AptitudeTestScore", 0 }, { "PreviousExperience_month", 1 } }; var learner = new ClassificationAdaBoostLearner(10, 1, 3); var sut = learner.Learn(observations, targets); var actual = sut.GetVariableImportance(featureNameToIndex); var expected = new Dictionary <string, double> { { "PreviousExperience_month", 100.0 }, { "AptitudeTestScore", 24.0268096428771 } }; Assert.AreEqual(expected.Count, actual.Count); var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a }); foreach (var item in zip) { Assert.AreEqual(item.Expected.Key, item.Actual.Key); Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001); } }
public void RegressionRandomForestLearnerTest_Learn_Glass_100_Indices() { var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var rows = targets.Length; var sut = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false); var indices = Enumerable.Range(0, targets.Length).ToArray(); indices.Shuffle(new Random(42)); indices = indices.Take((int)(targets.Length * 0.7)) .ToArray(); var decisionTreeModels = sut.Learn(observations, targets, indices, out var rawVariableImportance).ToArray(); var model = new RegressionForestModel(decisionTreeModels, rawVariableImportance); var predictions = model.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.49709813080602938, error, m_delta); }
public void ClassificationStackingEnsembleModel_GetRawVariableImportance() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9), new RandomCrossValidation <ProbabilityPrediction>(5, 23), false); var sut = learner.Learn(observations, targets); var actual = sut.GetRawVariableImportance(); var expected = new double[] { 0.12545787545787546, 0, 0.16300453932032882, 0.0345479082321188, 0.15036245805476572, 0, 0, 0 }; Assert.AreEqual(expected.Length, actual.Length); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i], actual[i], 0.000001); } }
public void RegressionBackwardEliminationModelSelectingEnsembleLearner_Learn_Indexed() { var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9), new RegressionDecisionTreeLearner(11), new RegressionDecisionTreeLearner(21), new RegressionDecisionTreeLearner(23), new RegressionDecisionTreeLearner(1), new RegressionDecisionTreeLearner(14), new RegressionDecisionTreeLearner(17), new RegressionDecisionTreeLearner(19), new RegressionDecisionTreeLearner(33) }; var sut = new RegressionBackwardEliminationModelSelectingEnsembleLearner(learners, 5); var parser = new CsvParser(() => new StringReader(Resources.DecisionTreeData)); var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix(); var targets = parser.EnumerateRows("T").ToF64Vector(); var indices = Enumerable.Range(0, 25).ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var metric = new MeanSquaredErrorRegressionMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.13601421174394385, actual, 0.0001); }
public void ClassificationForwardSearchModelSelectingEnsembleLearner_Learn() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var sut = new ClassificationForwardSearchModelSelectingEnsembleLearner(learners, 5); var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var model = sut.Learn(observations, targets); var predictions = model.PredictProbability(observations); var metric = new LogLossClassificationProbabilityMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.51787562976713208, actual, 0.0001); }
public void RegressionStackingEnsembleLearner_Learn_Keep_Original_Features() { var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var sut = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9), new RandomCrossValidation <double>(5, 23), true); var parser = new CsvParser(() => new StringReader(Resources.DecisionTreeData)); var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix(); var targets = parser.EnumerateRows("T").ToF64Vector(); var model = sut.Learn(observations, targets); var predictions = model.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.066184865331534531, actual, 0.0001); }
public void RegressionForwardSearchModelSelectingEnsembleLearner_Learn() { var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9), new RegressionDecisionTreeLearner(11), new RegressionDecisionTreeLearner(21), new RegressionDecisionTreeLearner(23), new RegressionDecisionTreeLearner(1), new RegressionDecisionTreeLearner(14), new RegressionDecisionTreeLearner(17), new RegressionDecisionTreeLearner(19), new RegressionDecisionTreeLearner(33) }; var sut = new RegressionForwardSearchModelSelectingEnsembleLearner(learners, 5); var parser = new CsvParser(() => new StringReader(Resources.DecisionTreeData)); var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix(); var targets = parser.EnumerateRows("T").ToF64Vector(); var model = sut.Learn(observations, targets); var predictions = model.Predict(observations); var metric = new MeanSquaredErrorRegressionMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.016150842834795006, actual, 0.0001); }
public void RegressionStackingEnsembleLearner_Learn_Indexed() { var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var sut = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9), new RandomCrossValidation <double>(5, 23), false); var parser = new CsvParser(() => new StringReader(Resources.DecisionTreeData)); var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix(); var targets = parser.EnumerateRows("T").ToF64Vector(); var indices = Enumerable.Range(0, 25).ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.133930222950635, actual, 0.0001); }
public void RegressionStackingEnsembleLearner_CreateMetaFeatures_Then_Learn() { var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var sut = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9), new RandomCrossValidation <double>(5, 23), false); var parser = new CsvParser(() => new StringReader(Resources.DecisionTreeData)); var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix(); var targets = parser.EnumerateRows("T").ToF64Vector(); var metaObservations = sut.LearnMetaFeatures(observations, targets); var model = sut.LearnStackingModel(observations, metaObservations, targets); var predictions = model.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.06951934687172627, actual, 0.0001); }
public void RegressionForestModel_GetVariableImportance() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var featureNameToIndex = new Dictionary <string, int> { { "AptitudeTestScore", 0 }, { "PreviousExperience_month", 1 } }; var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false); var sut = learner.Learn(observations, targets); var actual = sut.GetVariableImportance(featureNameToIndex); var expected = new Dictionary <string, double> { { "PreviousExperience_month", 100 }, { "AptitudeTestScore", 42.3879919692465 } }; Assert.AreEqual(expected.Count, actual.Count); var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a }); foreach (var item in zip) { Assert.AreEqual(item.Expected.Key, item.Actual.Key); Assert.AreEqual(item.Expected.Value, item.Actual.Value, m_delta); } }
public void GBMDecisionTreeLearner_Learn() { var parser = new CsvParser(() => new StringReader(Resources.DecisionTreeData)); var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix(); var targets = parser.EnumerateRows("T").ToF64Vector(); var inSample = targets.Select(t => true).ToArray(); var orderedElements = new int[observations.ColumnCount][]; var rows = observations.RowCount; for (int i = 0; i < observations.ColumnCount; i++) { var feature = observations.Column(i); var indices = Enumerable.Range(0, rows).ToArray(); feature.SortWith(indices); orderedElements[i] = indices; } var sut = new GBMDecisionTreeLearner(10); var tree = sut.Learn(observations, targets, targets, targets, orderedElements, inSample); var predictions = tree.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.0046122425037232661, actual); }
public void RegressionEnsembleModel_Predict_Multiple() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy()); var sut = learner.Learn(observations, targets); var predictions = sut.Predict(observations); var metric = new MeanSquaredErrorRegressionMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.033195970695970689, actual, 0.0000001); }
public void RegressionForestModel_PredictCertainty_Single() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false); var sut = learner.Learn(observations, targets); var actual = new CertaintyPrediction[rows]; for (int i = 0; i < rows; i++) { actual[i] = sut.PredictCertainty(observations.Row(i)); } var evaluator = new MeanSquaredErrorRegressionMetric(); var error = evaluator.Error(targets, actual.Select(p => p.Prediction).ToArray()); Assert.AreEqual(0.15381141277554411, error, 0.0000001); var expected = new CertaintyPrediction[] { new CertaintyPrediction(0.379151515151515, 0.0608255007215007), new CertaintyPrediction(0.411071351850763, 0.0831655436577049), new CertaintyPrediction(0.243420918950331, 0.0452827034233046), new CertaintyPrediction(0.302332251082251, 0.0699917594408057), new CertaintyPrediction(0.411071351850763, 0.0831655436577049), new CertaintyPrediction(0.175743762773174, 0.0354069437824887), new CertaintyPrediction(0.574083361083361, 0.0765858693929188), new CertaintyPrediction(0.259063776093188, 0.0491198812971218), new CertaintyPrediction(0.163878898878899, 0.0331543420321184), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.418472943722944, 0.0607014359023913), new CertaintyPrediction(0.243420918950331, 0.0452827034233046), new CertaintyPrediction(0.443779942279942, 0.0941961872991865), new CertaintyPrediction(0.156999361749362, 0.0435804333960299), new CertaintyPrediction(0.591222034501446, 0.0873624628347336), new CertaintyPrediction(0.123822406351818, 0.0283119805431255), new CertaintyPrediction(0.162873993653405, 0.0333697457759022), new CertaintyPrediction(0.596261932511932, 0.0695341060210394), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.418472943722944, 0.0607014359023913), new CertaintyPrediction(0.329000027750028, 0.0788869852405852), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.499770375049787, 0.0913884936411888), new CertaintyPrediction(0.140025508804921, 0.0309875116490099), new CertaintyPrediction(0.161207326986739, 0.0336321035325246), new CertaintyPrediction(0.389553418803419, 0.0744433596104835), }; CollectionAssert.AreEqual(expected, actual); }
public void RegressionForwardSearchModelSelectingEnsembleLearner_Learn_Start_With_3_Models() { var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9), new RegressionDecisionTreeLearner(11), new RegressionDecisionTreeLearner(21), new RegressionDecisionTreeLearner(23), new RegressionDecisionTreeLearner(1), new RegressionDecisionTreeLearner(14), new RegressionDecisionTreeLearner(17), new RegressionDecisionTreeLearner(19), new RegressionDecisionTreeLearner(33) }; var metric = new MeanSquaredErrorRegressionMetric(); var sut = new RegressionForwardSearchModelSelectingEnsembleLearner(learners, 5, new RandomCrossValidation <double>(5, 42), new MeanRegressionEnsembleStrategy(), metric, 3, false); var parser = new CsvParser(() => new StringReader(Resources.DecisionTreeData)); var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix(); var targets = parser.EnumerateRows("T").ToF64Vector(); var model = sut.Learn(observations, targets); var predictions = model.Predict(observations); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.010316259438112848, actual, 0.0001); }
public void RegressionBackwardEliminationModelSelectingEnsembleLearner_CreateMetaFeatures_Then_Learn() { var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9), new RegressionDecisionTreeLearner(11), new RegressionDecisionTreeLearner(21), new RegressionDecisionTreeLearner(23), new RegressionDecisionTreeLearner(1), new RegressionDecisionTreeLearner(14), new RegressionDecisionTreeLearner(17), new RegressionDecisionTreeLearner(19), new RegressionDecisionTreeLearner(33) }; var sut = new RegressionBackwardEliminationModelSelectingEnsembleLearner(learners, 5); var parser = new CsvParser(() => new StringReader(Resources.DecisionTreeData)); var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix(); var targets = parser.EnumerateRows("T").ToF64Vector(); var metaObservations = sut.LearnMetaFeatures(observations, targets); var model = sut.SelectModels(observations, metaObservations, targets); var predictions = model.Predict(observations); var metric = new MeanSquaredErrorRegressionMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.010316259438112841, actual, 0.0001); }
public void Hyper_Parameter_Tuning() { #region Read data // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read classification targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); #endregion // metric to minimize var metric = new MeanSquaredErrorRegressionMetric(); // Parameter ranges for the optimizer var paramers = new ParameterBounds[] { new ParameterBounds(min: 1, max: 100, transform: Transform.Linear), // maximumTreeDepth new ParameterBounds(min: 1, max: 16, transform: Transform.Linear), // minimumSplitSize }; // create random search optimizer var optimizer = new RandomSearchOptimizer(paramers, iterations: 30, runParallel: true); // other availible optimizers // GridSearchOptimizer // GlobalizedBoundedNelderMeadOptimizer // ParticleSwarmOptimizer // BayesianOptimizer // function to minimize Func <double[], OptimizerResult> minimize = p => { var cv = new RandomCrossValidation <double>(crossValidationFolds: 5, seed: 42); var optlearner = new RegressionDecisionTreeLearner(maximumTreeDepth: (int)p[0], minimumSplitSize: (int)p[1]); var predictions = cv.CrossValidate(optlearner, observations, targets); var error = metric.Error(targets, predictions); Trace.WriteLine("Error: " + error); return(new OptimizerResult(p, error)); }; // run optimizer var result = optimizer.OptimizeBest(minimize); var bestParameters = result.ParameterSet; Trace.WriteLine("Result: " + result.Error); // create learner with found parameters var learner = new RegressionDecisionTreeLearner(maximumTreeDepth: (int)bestParameters[0], minimumSplitSize: (int)bestParameters[1]); // learn model with found parameters var model = learner.Learn(observations, targets); }
public void RegressionDecisionTreeLearner_Learn_Reuse_No_Valid_Split() { var parser = new CsvParser(() => new StringReader(Resources.DecisionTreeData)); var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix(); var targets = parser.EnumerateRows("T").ToF64Vector(); var rows = targets.Length; var sut = new RegressionDecisionTreeLearner(); // train initial model. sut.Learn(observations, targets); // reuse learner, with smaller data that provides no valid split. var onlyUniqueTargetValue = 1.0; var onlyOneUniqueObservations = (F64Matrix)observations.Rows(0, 1, 2, 3, 4); var onlyOneUniquetargets = Enumerable.Range(0, onlyOneUniqueObservations.RowCount).Select(v => onlyUniqueTargetValue).ToArray(); var model = sut.Learn(onlyOneUniqueObservations, onlyOneUniquetargets); var predictions = model.Predict(onlyOneUniqueObservations); // no valid split, so should result in the model always returning the onlyUniqueTargetValue. for (int i = 0; i < predictions.Length; i++) { Assert.AreEqual(onlyUniqueTargetValue, predictions[i], 0.0001); } }
public void ClassificationStackingEnsembleLearner_Learn_Indexed() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var sut = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9), new RandomCrossValidation <ProbabilityPrediction>(5, 23), false); var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var indices = Enumerable.Range(0, 25).ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var metric = new TotalErrorClassificationMetric <double>(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.67289719626168221, actual, 0.0001); }
public void ClassificationForwardSearchModelSelectingEnsembleLearner_Learn_Start_With_3_Models() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var metric = new LogLossClassificationProbabilityMetric(); var ensembleStrategy = new MeanProbabilityClassificationEnsembleStrategy(); var sut = new ClassificationForwardSearchModelSelectingEnsembleLearner(learners, 5, new StratifiedCrossValidation <ProbabilityPrediction>(5, 23), ensembleStrategy, metric, 3, true); var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var model = sut.Learn(observations, targets); var predictions = model.PredictProbability(observations); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.54434276244488244, actual, 0.0001); }
public void ClassificationStackingEnsembleLearner_CreateMetaFeatures_Then_Learn() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var sut = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9), new RandomCrossValidation <ProbabilityPrediction>(5, 23), false); var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var metaObservations = sut.LearnMetaFeatures(observations, targets); var model = sut.LearnStackingModel(observations, metaObservations, targets); var predictions = model.Predict(observations); var metric = new TotalErrorClassificationMetric <double>(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.63551401869158874, actual, 0.0001); }
public void ClassificationModel_Predict() { #region learner creation // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read classification targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // create learner var learner = new ClassificationDecisionTreeLearner(); #endregion // learns a ClassificationDecisionTreeModel var model = learner.Learn(observations, targets); // predict all observations var predictions = model.Predict(observations); // predict single observation var prediction = model.Predict(observations.Row(0)); }
public void ClassificationStackingEnsembleLearner_Learn_Include_Original_Features() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var sut = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9), new RandomCrossValidation <ProbabilityPrediction>(5, 23), true); var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var model = sut.Learn(observations, targets); var predictions = model.Predict(observations); var metric = new TotalErrorClassificationMetric <double>(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.26168224299065418, actual, 0.0001); }
public void ClassificationStackingEnsembleModel_Predict_Multiple() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var learner = new ClassificationStackingEnsembleLearner(learners, new ClassificationDecisionTreeLearner(9), new RandomCrossValidation <ProbabilityPrediction>(5, 23), false); var sut = learner.Learn(observations, targets); var predictions = sut.Predict(observations); var metric = new TotalErrorClassificationMetric <double>(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.34615384615384615, actual, 0.0000001); }
public void RegressionStackingEnsembleModel_GetRawVariableImportance() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var learner = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9), new RandomCrossValidation <double>(5, 23), false); var sut = learner.Learn(observations, targets); var actual = sut.GetRawVariableImportance(); var expected = new double[] { 0.255311355311355, 0.525592463092463, 0.753846153846154, 0.0128205128205128 }; Assert.AreEqual(expected.Length, actual.Length); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i], actual[i], 0.000001); } }
public void ClassificationBackwardEliminationModelSelectingEnsembleLearner_CreateMetaFeatures_Then_Learn() { var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9), new ClassificationDecisionTreeLearner(11), new ClassificationDecisionTreeLearner(21), new ClassificationDecisionTreeLearner(23), new ClassificationDecisionTreeLearner(1), new ClassificationDecisionTreeLearner(14), new ClassificationDecisionTreeLearner(17), new ClassificationDecisionTreeLearner(19), new ClassificationDecisionTreeLearner(33) }; var sut = new ClassificationBackwardEliminationModelSelectingEnsembleLearner(learners, 5); var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var metaObservations = sut.LearnMetaFeatures(observations, targets); var model = sut.SelectModels(observations, metaObservations, targets); var predictions = model.PredictProbability(observations); var metric = new LogLossClassificationProbabilityMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.52351727716455632, actual, 0.0001); }
public void RegressionStackingEnsembleModel_Predict_single() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var learner = new RegressionStackingEnsembleLearner(learners, new RegressionDecisionTreeLearner(9), new RandomCrossValidation <double>(5, 23), false); var sut = learner.Learn(observations, targets); var predictions = new double[rows]; for (int i = 0; i < rows; i++) { predictions[i] = sut.Predict(observations.Row(i)); } var metric = new MeanSquaredErrorRegressionMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.26175213675213671, actual, 0.0000001); }
public void FeatureNormalization_Normalize() { // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix (all columns different from the targetName) var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // create minmax normalizer (normalizes each feature from 0.0 to 1.0) var minMaxTransformer = new MinMaxTransformer(0.0, 1.0); // transforms features using the feature normalization transform minMaxTransformer.Transform(observations, observations); // read targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // create learner // neural net requires features to be normalize. // This makes convergens much faster. var net = new NeuralNet(); net.Add(new InputLayer(observations.ColumnCount)); net.Add(new SoftMaxLayer(targets.Distinct().Count())); // no hidden layer and softmax output correpsonds to logistic regression var learner = new ClassificationNeuralNetLearner(net, new LogLoss()); // learns a logistic regression classifier var model = learner.Learn(observations, targets); }
public void ClassificationEnsembleModel_Predict_single() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learners = new IIndexedLearner <ProbabilityPrediction>[] { new ClassificationDecisionTreeLearner(2), new ClassificationDecisionTreeLearner(5), new ClassificationDecisionTreeLearner(7), new ClassificationDecisionTreeLearner(9) }; var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy()); var sut = learner.Learn(observations, targets); var predictions = new double[rows]; for (int i = 0; i < rows; i++) { predictions[i] = sut.Predict(observations.Row(i)); } var metric = new TotalErrorClassificationMetric <double>(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.076923076923076927, actual, 0.0000001); }