public void RegressionGradientBoostModel_GetVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learner = new RegressionGradientBoostLearner(100, 0.1, 3, 1, 1e-6, 1.0, 0, new GradientBoostSquaredLoss(), false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100.0 },
                { "AptitudeTestScore", 72.1682473281495 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001);
            }
        }
        public void ClassificationEnsembleModel_GetVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100.0 },
                { "AptitudeTestScore", 15.6771501925546 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001);
            }
        }
Beispiel #3
0
        public void RegressionEnsembleModel_Predict_single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new double[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.Predict(observations.Row(i));
            }

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.033195970695970689, actual, 0.0000001);
        }
        public void RegressionForestModel_GetVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100 },
                { "AptitudeTestScore", 42.3879919692465 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, m_delta);
            }
        }
        public void RegressionStackingEnsembleModel_Predict_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionStackingEnsembleLearner(learners,
                                                                new RegressionDecisionTreeLearner(9),
                                                                new RandomCrossValidation <double>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var metric = new MeanSquaredErrorRegressionMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.26175213675213671, actual, 0.0000001);
        }
        public void ClassificationEnsembleModel_PredictProbability_single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.PredictProbability(observations.Row(i));
            }

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.32562112824941963, actual, 0.0000001);
        }
        public void RegressionStackingEnsembleModel_GetRawVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionStackingEnsembleLearner(learners,
                                                                new RegressionDecisionTreeLearner(9),
                                                                new RandomCrossValidation <double>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 0.255311355311355, 0.525592463092463, 0.753846153846154, 0.0128205128205128 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
Beispiel #8
0
        public void ClassificationStackingEnsembleModel_PredictProbability_single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationStackingEnsembleLearner(learners,
                                                                    new ClassificationDecisionTreeLearner(9),
                                                                    new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.PredictProbability(observations.Row(i));
            }

            var metric = new LogLossClassificationProbabilityMetric();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.6696598716465223, actual, 0.0000001);
        }
        public void ClassificationEnsembleModel_GetRawVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationEnsembleLearner(learners, new MeanProbabilityClassificationEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 100.0, 15.6771501925546 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
Beispiel #10
0
        public void ClassificationStackingEnsembleModel_GetRawVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationStackingEnsembleLearner(learners,
                                                                    new ClassificationDecisionTreeLearner(9),
                                                                    new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 0.12545787545787546, 0, 0.16300453932032882, 0.0345479082321188, 0.15036245805476572, 0, 0, 0 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
Beispiel #11
0
        public void RegressionEnsembleModel_GetRawVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <double>[]
            {
                new RegressionDecisionTreeLearner(2),
                new RegressionDecisionTreeLearner(5),
                new RegressionDecisionTreeLearner(7),
                new RegressionDecisionTreeLearner(9)
            };

            var learner = new RegressionEnsembleLearner(learners, new MeanRegressionEnsembleStrategy());
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 100.0, 3.46067371526717 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
Beispiel #12
0
        public void ClassificationStackingEnsembleModel_Predict_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learners = new IIndexedLearner <ProbabilityPrediction>[]
            {
                new ClassificationDecisionTreeLearner(2),
                new ClassificationDecisionTreeLearner(5),
                new ClassificationDecisionTreeLearner(7),
                new ClassificationDecisionTreeLearner(9)
            };

            var learner = new ClassificationStackingEnsembleLearner(learners,
                                                                    new ClassificationDecisionTreeLearner(9),
                                                                    new RandomCrossValidation <ProbabilityPrediction>(5, 23), false);

            var sut = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var metric = new TotalErrorClassificationMetric <double>();
            var actual = metric.Error(targets, predictions);

            Assert.AreEqual(0.34615384615384615, actual, 0.0000001);
        }
        public void ClassificationAdaBoostModel_GetVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learner = new ClassificationAdaBoostLearner(10, 1, 3);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100.0 },
                { "AptitudeTestScore", 24.0268096428771 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001);
            }
        }
Beispiel #14
0
        double CrossValidate(int folds)
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut         = new StratifiedCrossValidation <double>(folds, 42);
            var predictions = sut.CrossValidate(new ClassificationDecisionTreeLearner(), observations, targets);
            var metric      = new TotalErrorClassificationMetric <double>();

            return(metric.Error(targets, predictions));
        }
        public void ClassificationAdaBoostModel_Save()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationAdaBoostLearner(2);
            var sut     = learner.Learn(observations, targets);

            var writer = new StringWriter();

            sut.Save(() => writer);

            Assert.AreEqual(m_classificationAdaBoostModelString, writer.ToString());
        }
        public void RegressionForestModel_Predict_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.15381141277554411, error, m_delta);
        }
Beispiel #17
0
        double ClassificationDecisionTreeLearner_Learn_Aptitude(int treeDepth)
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut   = new ClassificationDecisionTreeLearner(treeDepth, 1, 2, 0.001, 42);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
Beispiel #18
0
        public void RegressionAdaBoostModel_Precit_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionAdaBoostLearner(10);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanAbsolutErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14185814185814186, error, 0.0000001);
        }
Beispiel #19
0
        public void RegressionAdaBoostModel_Load()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var reader = new StringReader(RegressionDecisionTreeModelString);
            var sut    = RegressionAdaBoostModel.Load(() => reader);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanAbsolutErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.22527472527472531, error, 0.0000001);
        }
        public void ClassificationGradientBoostModel_Predict_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationGradientBoostLearner(100, 0.1, 3, 1, 1e-6, 1, 0, new GradientBoostBinomialLoss(), false);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.038461538461538464, error, 0.0000001);
        }
        public void ClassificationAdaBoostModel_Precit_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationAdaBoostLearner(10);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.038461538461538464, error, 0.0000001);
        }
        public void RegressionGradientBoostModel_Precit_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionGradientBoostLearner(100, 0.1, 3, 1, 1e-6, 1.0, 0, new GradientBoostSquaredLoss(), false);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanAbsolutErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.045093177702025665, error, 0.0000001);
        }
        public void ClassificationAdaBoostModel_Load()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var reader = new StringReader(m_classificationAdaBoostModelString);
            var sut    = ClassificationAdaBoostModel.Load(() => reader);

            var predictions = sut.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.19230769230769232, error, 0.0000001);
        }
Beispiel #24
0
        public void ClassificationAdaBoostLearner_Learn_AptitudeData()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut = new ClassificationAdaBoostLearner(10);

            var model       = sut.Learn(observations, targets);
            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.038461538461538464, actual);
        }
        public void RegressionGradientBoostModel_Load()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var reader = new StringReader(RegressionGradientBoostModelString);
            var sut    = RegressionGradientBoostModel.Load(() => reader);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanAbsolutErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.42445562130177528, error, 0.0000001);
        }
Beispiel #26
0
        public void ClassificationBinomialGradientBoostLearner_Stochastic_Learn()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut   = new ClassificationBinomialGradientBoostLearner(50, 0.1, 3, 1, 1e-6, .3, 0, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.076923076923076927, actual);
        }
Beispiel #27
0
        public void ClassificationForestModel_Predict_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.23076923076923078, error, m_delta);
        }
        public void RegressionForestModel_Load()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var reader = new StringReader(m_regressionForestModelString);
            var sut    = RegressionForestModel.Load(() => reader);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14547628738104926, error, m_delta);
        }
        double RegressionRandomForestLearner_Learn_Aptitude(int trees, double subSampleRatio = 1.0)
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut   = new RegressionRandomForestLearner(trees, 5, 100, 1, 0.0001, subSampleRatio, 42, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
Beispiel #30
0
        double ClassificationExtremelyRandomizedTreesLearner_Learn_Aptitude(int trees, double subSampleRatio = 1.0)
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut   = new ClassificationExtremelyRandomizedTreesLearner(trees, 1, 100, 1, 0.0001, subSampleRatio, 42, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }