public void ClassificationDecisionTreeModel_GetVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 }, { "PreviousExperience_month", 1 }
            };

            var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100.0 }, { "AptitudeTestScore", 19.5121951219512 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001);
            }
        }
        public void ClassificationModel_Predict()
        {
            #region learner creation

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // create learner
            var learner = new ClassificationDecisionTreeLearner();
            #endregion

            // learns a ClassificationDecisionTreeModel
            var model = learner.Learn(observations, targets);

            // predict all observations
            var predictions = model.Predict(observations);

            // predict single observation
            var prediction = model.Predict(observations.Row(0));
        }
        public void ClassificationDecisionTreeLearner_Learn_Reuse_No_Valid_Split()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();
            var rows         = targets.Length;

            var sut = new ClassificationDecisionTreeLearner();

            // train initial model.
            sut.Learn(observations, targets);

            // reuse learner, with smaller data that provides no valid split.
            var onlyUniqueTargetValue     = 1.0;
            var onlyOneUniqueObservations = (F64Matrix)observations.Rows(0, 1, 2, 3, 4);
            var onlyOneUniquetargets      = Enumerable.Range(0, onlyOneUniqueObservations.RowCount).Select(v => onlyUniqueTargetValue).ToArray();
            var model = sut.Learn(onlyOneUniqueObservations, onlyOneUniquetargets);

            var predictions = model.Predict(onlyOneUniqueObservations);

            // no valid split, so should result in the model always returning the onlyUniqueTargetValue.
            for (int i = 0; i < predictions.Length; i++)
            {
                Assert.AreEqual(onlyUniqueTargetValue, predictions[i], 0.0001);
            }
        }
Beispiel #4
0
        public void ClassificationDecisionTreeModel_GetVariableImportance()
        {
            var parser             = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations       = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets            = parser.EnumerateRows("Pass").ToF64Vector();
            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 }, { "PreviousExperience_month", 1 }
            };

            var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100.0 }, { "AptitudeTestScore", 19.5121951219512 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001);
            }
        }
        double CrossValidate(int folds)
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut         = new StratifiedCrossValidation <double>(folds, 42);
            var learner     = new ClassificationDecisionTreeLearner();
            var predictions = sut.CrossValidate(learner, observations, targets);
            var metric      = new TotalErrorClassificationMetric <double>();

            return(metric.Error(targets, predictions));
        }
        public void ClassificationModel_PredictProbability_Threshold_On_Probability()
        {
            #region learner creation

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets and convert to binary problem (low quality/high quality).
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector().Select(t => t < 5 ? 0.0 : 1.0).ToArray();

            var translation = new Dictionary <double, string> {
                { 0.0, "Low quality" }, { 1.0, "High quality" }
            };

            // create learner
            var learner = new ClassificationDecisionTreeLearner(maximumTreeDepth: 5);
            #endregion

            // learns a ClassificationDecisionTreeModel
            var model = learner.Learn(observations, targets);

            // predict probabilities for all observations
            var probabilityPredictions = model.PredictProbability(observations);

            // zip target and probabilities to keep order
            var zip = targets.Zip(probabilityPredictions, (t, p) => new { Target = t, Prediction = p });

            // threhold on the probabilty of the predicted class.
            // This will remove the obserations that the model is uncertain about.
            var probabilityThreshold = 0.90;
            var thresholdedResult    = zip.Where(kvp => kvp.Prediction.Probabilities[kvp.Prediction.Prediction] > probabilityThreshold);

            // evaluate the resulting observations
            var thresholdedPredictions = thresholdedResult.Select(p => p.Prediction).ToArray();
            var thresholdedTargets     = thresholdedResult.Select(p => p.Target).ToArray();

            // evaluate only on probability thresholded data
            var metric = new LogLossClassificationProbabilityMetric();
            Trace.WriteLine("ProbabilityThresholded Result:");
            Trace.WriteLine(metric.ErrorString(thresholdedTargets, thresholdedPredictions, translation));
            Trace.WriteLine("");

            // evaluate on all data for comparison
            Trace.WriteLine("All data result:");
            Trace.WriteLine(metric.ErrorString(targets, probabilityPredictions, translation));
        }
        public void ClassificationDecisionTreeModel_Save()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationDecisionTreeLearner(2);
            var sut     = learner.Learn(observations, targets);

            var writer = new StringWriter();

            sut.Save(() => writer);

            Assert.AreEqual(ClassificationDecisionTreeModelString, writer.ToString());
        }
Beispiel #8
0
        double ClassificationDecisionTreeLearner_Learn_Aptitude(int treeDepth)
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut   = new ClassificationDecisionTreeLearner(treeDepth, 1, 2, 0.001, 42);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
        public void ClassificationDecisionTreeModel_Precit_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.038461538461538464, error, 0.0000001);
        }
Beispiel #10
0
        public void ClassificationDecisionTreeModel_Save()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learner = new ClassificationDecisionTreeLearner(2);
            var sut     = learner.Learn(observations, targets);

            var writer = new StringWriter();

            sut.Save(() => writer);

            Assert.AreEqual(ClassificationDecisionTreeModelString, writer.ToString());
        }
        double CrossValidate_Provide_Indices(int folds)
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut = new StratifiedCrossValidation <double>(folds, 42);

            var rowsToCrossvalidate = targets.Length / 2;
            var indices             = Enumerable.Range(0, rowsToCrossvalidate).ToArray();
            var predictions         = new double[rowsToCrossvalidate];
            var learner             = new ClassificationDecisionTreeLearner();

            sut.CrossValidate(learner, observations, targets, indices, predictions);
            var metric = new TotalErrorClassificationMetric <double>();

            return(metric.Error(targets.Take(rowsToCrossvalidate).ToArray(), predictions));
        }
Beispiel #12
0
        double ClassificationDecisionTreeLearner_Learn_Glass_Weighted(int treeDepth, double weight)
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var weights = targets.Select(v => Weight(v, 1, weight)).ToArray();
            var sut     = new ClassificationDecisionTreeLearner(treeDepth, 1, observations.ColumnCount, 0.001, 42);
            var model   = sut.Learn(observations, targets, weights);

            var predictions = model.Predict(observations);
            var evaluator   = new TotalErrorClassificationMetric <double>();

            Trace.WriteLine(evaluator.ErrorString(targets, predictions));
            var error = evaluator.Error(targets, predictions);

            return(error);
        }
        public void ClassificationDecisionTreeModel_Predict_Multiple_Indexed()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationDecisionTreeLearner(100, 5, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var indices     = new int[] { 0, 3, 4, 5, 6, 7, 8, 9, 20, 21 };
            var predictions = sut.Predict(observations, indices);

            var evaluator      = new TotalErrorClassificationMetric <double>();
            var indexedTargets = targets.GetIndices(indices);
            var error          = evaluator.Error(indexedTargets, predictions);

            Assert.AreEqual(0.1, error, 0.0000001);
        }
        double ClassificationDecisionTreeLearner_Learn_Aptitude(int treeDepth)
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var sut   = new ClassificationDecisionTreeLearner(treeDepth, 1, 2, 0.001, 42);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
Beispiel #15
0
        public void ClassificationDecisionTreeModel_Precit_Multiple()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.038461538461538464, error, 0.0000001);
        }
        ClassificationDecisionTreeModel CreateTree(F64Matrix observations, double[] targets, int[] indices, Random random)
        {
            var learner = new ClassificationDecisionTreeLearner(m_maximumTreeDepth, m_minimumSplitSize, m_featuresPrSplit,
                                                                m_minimumInformationGain, random.Next());

            var treeIndicesLength = (int)Math.Round(m_subSampleRatio * (double)indices.Length);
            var treeIndices       = new int[treeIndicesLength];

            for (int j = 0; j < treeIndicesLength; j++)
            {
                treeIndices[j] = indices[random.Next(indices.Length)];
            }

            var model = learner.Learn(observations, targets, treeIndices);

            return(model);
        }
        public void ClassificationDecisionTreeModel_GetRawVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 0.071005917159763288, 0.36390532544378695 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
Beispiel #18
0
        public void LearningCurves_Calculate_ProbabilityPrediction()
        {
            #region Read data

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets and convert to binary problem (low quality/high quality).
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector().Select(t => t < 5 ? 0.0 : 1.0).ToArray();

            #endregion

            // metric for measuring model error
            var metric = new LogLossClassificationProbabilityMetric();

            // creates cross validator, observations are shuffled randomly
            var learningCurveCalculator = new RandomShuffleLearningCurvesCalculator <ProbabilityPrediction>(metric,
                                                                                                            samplePercentages: new double[] { 0.05, 0.1, 0.2, 0.4, 0.8, 1.0 },
                                                                                                            trainingPercentage: 0.7, numberOfShufflesPrSample: 5);

            // create learner
            var learner = new ClassificationDecisionTreeLearner(maximumTreeDepth: 5);

            // calculate learning curve
            var learningCurve = learningCurveCalculator.Calculate(learner, observations, targets);

            // write to csv
            var writer = new StringWriter();
            learningCurve.Write(() => writer);

            // trace result
            // Plotting the learning curves will help determine if the model has high bias or high variance.
            // This information can be used to determine what to try next in order to improve the model.
            Trace.WriteLine(writer.ToString());

            // alternatively, write to file
            //learningCurve.Write(() => new StreamWriter(filePath));
        }
Beispiel #19
0
        public void ClassificationDecisionTreeModel_GetRawVariableImportance()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 0.071005917159763288, 0.36390532544378695 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
        double ClassificationDecisionTreeLearner_Learn_Glass_Weighted(int treeDepth, double weight)
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();
            var rows         = targets.Length;

            var weights = targets.Select(v => Weight(v, 1, weight)).ToArray();
            var sut     = new ClassificationDecisionTreeLearner(treeDepth, 1, observations.ColumnCount, 0.001, 42);
            var model   = sut.Learn(observations, targets, weights);

            var predictions = model.Predict(observations);
            var evaluator   = new TotalErrorClassificationMetric <double>();

            Trace.WriteLine(evaluator.ErrorString(targets, predictions));
            var error = evaluator.Error(targets, predictions);

            return(error);
        }
Beispiel #21
0
        public void ClassificationDecisionTreeModel_Predict_Multiple_Indexed()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new ClassificationDecisionTreeLearner(100, 5, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var indices     = new int[] { 0, 3, 4, 5, 6, 7, 8, 9, 20, 21 };
            var predictions = sut.Predict(observations, indices);

            var evaluator      = new TotalErrorClassificationMetric <double>();
            var indexedTargets = targets.GetIndices(indices);
            var error          = evaluator.Error(indexedTargets, predictions);

            Assert.AreEqual(0.1, error, 0.0000001);
        }
Beispiel #22
0
        public void ClassificationDecisionTreeModel_PredictProbability_Multiple_Indexed()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new ClassificationDecisionTreeLearner(100, 5, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var indices = new int[] { 0, 3, 4, 5, 6, 7, 8, 9, 20, 21 };
            var actual  = sut.PredictProbability(observations, indices);

            var indexedTargets = targets.GetIndices(indices);
            var evaluator      = new TotalErrorClassificationMetric <double>();
            var error          = evaluator.Error(indexedTargets, actual.Select(p => p.Prediction).ToArray());

            Assert.AreEqual(0.1, error, 0.0000001);

            var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.571428571428571 }, { 1, 0.428571428571429 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.571428571428571 }, { 1, 0.428571428571429 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.428571428571429 }, { 1, 0.571428571428571 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.75 }, { 1, 0.25 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.285714285714286 }, { 1, 0.714285714285714 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.75 }, { 1, 0.25 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.857142857142857 }, { 1, 0.142857142857143 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.285714285714286 }, { 1, 0.714285714285714 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.857142857142857 }, { 1, 0.142857142857143 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.285714285714286 }, { 1, 0.714285714285714 },
                }), };

            CollectionAssert.AreEqual(expected, actual);
        }
        public void ClassificationModel_FeatureImportance()
        {
            #region learner creation

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // create learner
            var learner = new ClassificationDecisionTreeLearner();

            #endregion

            // learns a ClassificationDecisionTreeModel
            var model = learner.Learn(observations, targets);

            // raw feature importance
            var rawImportance = model.GetRawVariableImportance();

            // Normalized and named feature importance.
            // This gives information about which features/variables the learner found important (higher is more important).
            var featureNameToIndex = parser.EnumerateRows(c => c != targetName).First().ColumnNameToIndex;
            var importance         = model.GetVariableImportance(featureNameToIndex);

            // trace normalized importances
            var importanceCsv = new StringBuilder();
            importanceCsv.Append("FeatureName;Importance");
            foreach (var feature in importance)
            {
                importanceCsv.AppendLine();
                importanceCsv.Append(feature.Key + ";" + feature.Value);
            }

            Trace.WriteLine(importanceCsv);
        }
        public void ClassificationDecisionTreeModel_Predict_Single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new double[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.Predict(observations.Row(i));
            }

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.038461538461538464, error, 0.0000001);
        }
        public void ClassificationLearner_Learn()
        {
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // create learner
            var learner = new ClassificationDecisionTreeLearner();

            // learns a ClassificationDecisionTreeModel
            var model = learner.Learn(observations, targets);
        }
        public void ClassificationDecisionTreeModel_PredictProbability_Multiple_Indexed()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationDecisionTreeLearner(100, 5, 2, 0.001, 42);
            var sut     = learner.Learn(observations, targets);

            var indices = new int[] { 0, 3, 4, 5, 6, 7, 8, 9, 20, 21 };
            var actual  = sut.PredictProbability(observations, indices);

            var indexedTargets = targets.GetIndices(indices);
            var evaluator      = new TotalErrorClassificationMetric <double>();
            var error          = evaluator.Error(indexedTargets, actual.Select(p => p.Prediction).ToArray());

            Assert.AreEqual(0.1, error, 0.0000001);

            var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.571428571428571 }, { 1, 0.428571428571429 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.571428571428571 }, { 1, 0.428571428571429 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.428571428571429 }, { 1, 0.571428571428571 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.75 }, { 1, 0.25 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.285714285714286 }, { 1, 0.714285714285714 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.75 }, { 1, 0.25 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.857142857142857 }, { 1, 0.142857142857143 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.285714285714286 }, { 1, 0.714285714285714 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.857142857142857 }, { 1, 0.142857142857143 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.285714285714286 }, { 1, 0.714285714285714 },
                }), };

            CollectionAssert.AreEqual(expected, actual);
        }
        public void CrossValidation_CrossValidate_ProbabilityPredictions()
        {
            #region Read data

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix (all columns different from the targetName)
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            #endregion

            // creates cross validator, observations are shuffled randomly
            var cv = new RandomCrossValidation <ProbabilityPrediction>(crossValidationFolds: 5, seed: 42);

            // create learner
            var learner = new ClassificationDecisionTreeLearner();

            // cross-validated predictions
            var cvPredictions = cv.CrossValidate(learner, observations, targets);

            // metric for measuring model error
            var metric = new LogLossClassificationProbabilityMetric();

            // cross-validation provides an estimate on how the model will perform on unseen data
            Trace.WriteLine("Cross-validation error: " + metric.Error(targets, cvPredictions));

            // train and predict training set for comparison
            var predictions = learner.Learn(observations, targets).PredictProbability(observations);

            // The training set is NOT a good estimate of how well the model will perfrom on unseen data.
            Trace.WriteLine("Training error: " + metric.Error(targets, predictions));
        }
Beispiel #28
0
        public void ClassificationDecisionTreeLearner_Learn_Reuse_No_Valid_Split()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var sut = new ClassificationDecisionTreeLearner();

            // train initial model.
            sut.Learn(observations, targets);

            // reuse learner, with smaller data that provides no valid split.
            var onlyUniqueTargetValue     = 1.0;
            var onlyOneUniqueObservations = (F64Matrix)observations.Rows(0, 1, 2, 3, 4);
            var onlyOneUniquetargets      = Enumerable.Range(0, onlyOneUniqueObservations.RowCount).Select(v => onlyUniqueTargetValue).ToArray();
            var model = sut.Learn(onlyOneUniqueObservations, onlyOneUniquetargets);

            var predictions = model.Predict(onlyOneUniqueObservations);

            // no valid split, so should result in the model always returning the onlyUniqueTargetValue.
            for (int i = 0; i < predictions.Length; i++)
            {
                Assert.AreEqual(onlyUniqueTargetValue, predictions[i], 0.0001);
            }
        }
        public void ClassificationModel_PredictProbability()
        {
            #region learner creation

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // create learner
            var learner = new ClassificationDecisionTreeLearner(maximumTreeDepth: 5);
            #endregion

            // learns a ClassificationDecisionTreeModel
            var model = learner.Learn(observations, targets);

            // predict probabilities for all observations
            var probabilityPredictions = model.PredictProbability(observations);

            // predict probabilities for single observation
            var probabilityPrediction = model.PredictProbability(observations.Row(0));

            // the predicted class
            var predictedClass = probabilityPrediction.Prediction;

            // trace class probabilities
            probabilityPrediction.Probabilities.ToList()
            .ForEach(p => Trace.WriteLine(p.Key + ": " + p.Value));
        }
        public void ClassificationModel_Save_Load()
        {
            #region learner creation

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // create learner
            var learner = new ClassificationDecisionTreeLearner();

            #endregion

            // learns a ClassificationDecisionTreeModel
            var model = learner.Learn(observations, targets);

            var writer = new StringWriter();
            model.Save(() => writer);

            // save to file
            //model.Save(() => new StreamWriter(filePath));

            var text        = writer.ToString();
            var loadedModel = ClassificationDecisionTreeModel.Load(() => new StringReader(text));

            // load from file
            //ClassificationDecisionTreeModel.Load(() => new StreamReader(filePath));
        }