public void RegressionEnsembleModel_Predict_single() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy()); var sut = learner.Learn(observations, targets); var rows = targets.Length; var predictions = new double[rows]; for (int i = 0; i < rows; i++) { predictions[i] = sut.Predict(observations.Row(i)); } var metric = new MeanSquaredErrorRegressionMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.033195970695970689, actual, 0.0000001); }
public void RegressionEnsembleModel_GetVariableImportance() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var featureNameToIndex = new Dictionary <string, int> { { "AptitudeTestScore", 0 }, { "PreviousExperience_month", 1 } }; var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy()); var sut = learner.Learn(observations, targets); var actual = sut.GetVariableImportance(featureNameToIndex); var expected = new Dictionary <string, double> { { "PreviousExperience_month", 100.0 }, { "AptitudeTestScore", 3.46067371526717 } }; Assert.AreEqual(expected.Count, actual.Count); var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a }); foreach (var item in zip) { Assert.AreEqual(item.Expected.Key, item.Actual.Key); Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001); } }
public void RegressionEnsembleModel_Predict_Multiple() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy()); var sut = learner.Learn(observations, targets); var predictions = sut.Predict(observations); var metric = new MeanSquaredErrorRegressionMetric(); var actual = metric.Error(targets, predictions); Assert.AreEqual(0.033195970695970689, actual, 0.0000001); }
public void RegressionEnsembleModel_GetRawVariableImportance() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy()); var sut = learner.Learn(observations, targets); var actual = sut.GetRawVariableImportance(); var expected = new double[] { 100.0, 3.46067371526717 }; Assert.AreEqual(expected.Length, actual.Length); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i], actual[i], 0.000001); } }
public void RegressionEnsembleModel_GetRawVariableImportance() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy()); var sut = learner.Learn(observations, targets); var actual = sut.GetRawVariableImportance(); var expected = new double[] { 100.0, 3.46067371526717 }; Assert.AreEqual(expected.Length, actual.Length); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i], actual[i], 0.000001); } }
public void RegressionEnsembleLearner_Learn_Indexed() { var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var sut = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy()); var parser = new CsvParser(() => new StringReader(Resources.DecisionTreeData)); var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix(); var targets = parser.EnumerateRows("T").ToF64Vector(); var indices = Enumerable.Range(0, 25).ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.13909386278812202, actual, 0.0001); }
public void RegressionEnsembleLearner_Learn_Bagging() { var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var sut = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy(), 0.7); var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet(); var model = sut.Learn(observations, targets); var predictions = model.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.0318163057544871, actual, 0.0001); }
public void RegressionEnsembleLearner_Learn_Indexed() { var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var sut = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy()); var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet(); var indices = Enumerable.Range(0, 25).ToArray(); var model = sut.Learn(observations, targets, indices); var predictions = model.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.13909386278812202, actual, 0.0001); }
public void RegressionEnsembleModel_GetVariableImportance() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var featureNameToIndex = new Dictionary <string, int> { { "AptitudeTestScore", 0 }, { "PreviousExperience_month", 1 } }; var learners = new IIndexedLearner <double>[] { new RegressionDecisionTreeLearner(2), new RegressionDecisionTreeLearner(5), new RegressionDecisionTreeLearner(7), new RegressionDecisionTreeLearner(9) }; var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy()); var sut = learner.Learn(observations, targets); var actual = sut.GetVariableImportance(featureNameToIndex); var expected = new Dictionary <string, double> { { "PreviousExperience_month", 100.0 }, { "AptitudeTestScore", 3.46067371526717 } }; Assert.AreEqual(expected.Count, actual.Count); var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a }); foreach (var item in zip) { Assert.AreEqual(item.Expected.Key, item.Actual.Key); Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001); } }
public void RegressionEnsembleLearner() { #region read and split data // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read regression targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // creates training test splitter, // Since this is a regression problem, we use the random training/test set splitter. // 30 % of the data is used for the test set. var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24); var trainingTestSplit = splitter.SplitSet(observations, targets); var trainSet = trainingTestSplit.TrainingSet; var testSet = trainingTestSplit.TestSet; #endregion // create the list of learners to include in the ensemble var ensembleLearners = new IIndexedLearner <double>[] { new RegressionAdaBoostLearner(maximumTreeDepth: 15), new RegressionRandomForestLearner(runParallel: false), new RegressionSquareLossGradientBoostLearner(iterations: 198, learningRate: 0.028, maximumTreeDepth: 12, subSampleRatio: 0.559, featuresPrSplit: 10, runParallel: false) }; // create the ensemble learner var learner = new RegressionEnsembleLearner(learners: ensembleLearners); // the ensemble learnr combines all the provided learners // into a single ensemble model. var model = learner.Learn(trainSet.Observations, trainSet.Targets); // predict the training and test set. var trainPredictions = model.Predict(trainSet.Observations); var testPredictions = model.Predict(testSet.Observations); // since this is a regression problem we are using square error as metric // for evaluating how well the model performs. var metric = new MeanSquaredErrorRegressionMetric(); // measure the error on training and test set. var trainError = metric.Error(trainSet.Targets, trainPredictions); var testError = metric.Error(testSet.Targets, testPredictions); // The ensemble model achieves a lower test error // then any of the individual models: // RegressionAdaBoostLearner: 0.4005 // RegressionRandomForestLearner: 0.4037 // RegressionSquareLossGradientBoostLearner: 0.3936 TraceTrainingAndTestError(trainError, testError); }