public void ClassificationDecisionTreeModel_GetVariableImportance() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var featureNameToIndex = new Dictionary <string, int> { { "AptitudeTestScore", 0 }, { "PreviousExperience_month", 1 } }; var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var actual = sut.GetVariableImportance(featureNameToIndex); var expected = new Dictionary <string, double> { { "PreviousExperience_month", 100.0 }, { "AptitudeTestScore", 19.5121951219512 } }; Assert.AreEqual(expected.Count, actual.Count); var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a }); foreach (var item in zip) { Assert.AreEqual(item.Expected.Key, item.Actual.Key); Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001); } }
public void ClassificationModel_Predict() { #region learner creation // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read classification targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // create learner var learner = new ClassificationDecisionTreeLearner(); #endregion // learns a ClassificationDecisionTreeModel var model = learner.Learn(observations, targets); // predict all observations var predictions = model.Predict(observations); // predict single observation var prediction = model.Predict(observations.Row(0)); }
public void ClassificationDecisionTreeLearner_Learn_Reuse_No_Valid_Split() { var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var rows = targets.Length; var sut = new ClassificationDecisionTreeLearner(); // train initial model. sut.Learn(observations, targets); // reuse learner, with smaller data that provides no valid split. var onlyUniqueTargetValue = 1.0; var onlyOneUniqueObservations = (F64Matrix)observations.Rows(0, 1, 2, 3, 4); var onlyOneUniquetargets = Enumerable.Range(0, onlyOneUniqueObservations.RowCount).Select(v => onlyUniqueTargetValue).ToArray(); var model = sut.Learn(onlyOneUniqueObservations, onlyOneUniquetargets); var predictions = model.Predict(onlyOneUniqueObservations); // no valid split, so should result in the model always returning the onlyUniqueTargetValue. for (int i = 0; i < predictions.Length; i++) { Assert.AreEqual(onlyUniqueTargetValue, predictions[i], 0.0001); } }
public void ClassificationDecisionTreeModel_GetVariableImportance() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var featureNameToIndex = new Dictionary <string, int> { { "AptitudeTestScore", 0 }, { "PreviousExperience_month", 1 } }; var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var actual = sut.GetVariableImportance(featureNameToIndex); var expected = new Dictionary <string, double> { { "PreviousExperience_month", 100.0 }, { "AptitudeTestScore", 19.5121951219512 } }; Assert.AreEqual(expected.Count, actual.Count); var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a }); foreach (var item in zip) { Assert.AreEqual(item.Expected.Key, item.Actual.Key); Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001); } }
double CrossValidate(int folds) { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var sut = new StratifiedCrossValidation <double>(folds, 42); var learner = new ClassificationDecisionTreeLearner(); var predictions = sut.CrossValidate(learner, observations, targets); var metric = new TotalErrorClassificationMetric <double>(); return(metric.Error(targets, predictions)); }
public void ClassificationModel_PredictProbability_Threshold_On_Probability() { #region learner creation // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read classification targets and convert to binary problem (low quality/high quality). var targets = parser.EnumerateRows(targetName) .ToF64Vector().Select(t => t < 5 ? 0.0 : 1.0).ToArray(); var translation = new Dictionary <double, string> { { 0.0, "Low quality" }, { 1.0, "High quality" } }; // create learner var learner = new ClassificationDecisionTreeLearner(maximumTreeDepth: 5); #endregion // learns a ClassificationDecisionTreeModel var model = learner.Learn(observations, targets); // predict probabilities for all observations var probabilityPredictions = model.PredictProbability(observations); // zip target and probabilities to keep order var zip = targets.Zip(probabilityPredictions, (t, p) => new { Target = t, Prediction = p }); // threhold on the probabilty of the predicted class. // This will remove the obserations that the model is uncertain about. var probabilityThreshold = 0.90; var thresholdedResult = zip.Where(kvp => kvp.Prediction.Probabilities[kvp.Prediction.Prediction] > probabilityThreshold); // evaluate the resulting observations var thresholdedPredictions = thresholdedResult.Select(p => p.Prediction).ToArray(); var thresholdedTargets = thresholdedResult.Select(p => p.Target).ToArray(); // evaluate only on probability thresholded data var metric = new LogLossClassificationProbabilityMetric(); Trace.WriteLine("ProbabilityThresholded Result:"); Trace.WriteLine(metric.ErrorString(thresholdedTargets, thresholdedPredictions, translation)); Trace.WriteLine(""); // evaluate on all data for comparison Trace.WriteLine("All data result:"); Trace.WriteLine(metric.ErrorString(targets, probabilityPredictions, translation)); }
public void ClassificationDecisionTreeModel_Save() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationDecisionTreeLearner(2); var sut = learner.Learn(observations, targets); var writer = new StringWriter(); sut.Save(() => writer); Assert.AreEqual(ClassificationDecisionTreeModelString, writer.ToString()); }
double ClassificationDecisionTreeLearner_Learn_Aptitude(int treeDepth) { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var sut = new ClassificationDecisionTreeLearner(treeDepth, 1, 2, 0.001, 42); var model = sut.Learn(observations, targets); var predictions = model.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); return(error); }
public void ClassificationDecisionTreeModel_Precit_Multiple() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var predictions = sut.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.038461538461538464, error, 0.0000001); }
public void ClassificationDecisionTreeModel_Save() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var learner = new ClassificationDecisionTreeLearner(2); var sut = learner.Learn(observations, targets); var writer = new StringWriter(); sut.Save(() => writer); Assert.AreEqual(ClassificationDecisionTreeModelString, writer.ToString()); }
double CrossValidate_Provide_Indices(int folds) { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var sut = new StratifiedCrossValidation <double>(folds, 42); var rowsToCrossvalidate = targets.Length / 2; var indices = Enumerable.Range(0, rowsToCrossvalidate).ToArray(); var predictions = new double[rowsToCrossvalidate]; var learner = new ClassificationDecisionTreeLearner(); sut.CrossValidate(learner, observations, targets, indices, predictions); var metric = new TotalErrorClassificationMetric <double>(); return(metric.Error(targets.Take(rowsToCrossvalidate).ToArray(), predictions)); }
double ClassificationDecisionTreeLearner_Learn_Glass_Weighted(int treeDepth, double weight) { var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var weights = targets.Select(v => Weight(v, 1, weight)).ToArray(); var sut = new ClassificationDecisionTreeLearner(treeDepth, 1, observations.ColumnCount, 0.001, 42); var model = sut.Learn(observations, targets, weights); var predictions = model.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); Trace.WriteLine(evaluator.ErrorString(targets, predictions)); var error = evaluator.Error(targets, predictions); return(error); }
public void ClassificationDecisionTreeModel_Predict_Multiple_Indexed() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationDecisionTreeLearner(100, 5, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var indices = new int[] { 0, 3, 4, 5, 6, 7, 8, 9, 20, 21 }; var predictions = sut.Predict(observations, indices); var evaluator = new TotalErrorClassificationMetric <double>(); var indexedTargets = targets.GetIndices(indices); var error = evaluator.Error(indexedTargets, predictions); Assert.AreEqual(0.1, error, 0.0000001); }
double ClassificationDecisionTreeLearner_Learn_Aptitude(int treeDepth) { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var sut = new ClassificationDecisionTreeLearner(treeDepth, 1, 2, 0.001, 42); var model = sut.Learn(observations, targets); var predictions = model.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); return(error); }
public void ClassificationDecisionTreeModel_Precit_Multiple() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var predictions = sut.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.038461538461538464, error, 0.0000001); }
ClassificationDecisionTreeModel CreateTree(F64Matrix observations, double[] targets, int[] indices, Random random) { var learner = new ClassificationDecisionTreeLearner(m_maximumTreeDepth, m_minimumSplitSize, m_featuresPrSplit, m_minimumInformationGain, random.Next()); var treeIndicesLength = (int)Math.Round(m_subSampleRatio * (double)indices.Length); var treeIndices = new int[treeIndicesLength]; for (int j = 0; j < treeIndicesLength; j++) { treeIndices[j] = indices[random.Next(indices.Length)]; } var model = learner.Learn(observations, targets, treeIndices); return(model); }
public void ClassificationDecisionTreeModel_GetRawVariableImportance() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var actual = sut.GetRawVariableImportance(); var expected = new double[] { 0.071005917159763288, 0.36390532544378695 }; Assert.AreEqual(expected.Length, actual.Length); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i], actual[i], 0.000001); } }
public void LearningCurves_Calculate_ProbabilityPrediction() { #region Read data // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read classification targets and convert to binary problem (low quality/high quality). var targets = parser.EnumerateRows(targetName) .ToF64Vector().Select(t => t < 5 ? 0.0 : 1.0).ToArray(); #endregion // metric for measuring model error var metric = new LogLossClassificationProbabilityMetric(); // creates cross validator, observations are shuffled randomly var learningCurveCalculator = new RandomShuffleLearningCurvesCalculator <ProbabilityPrediction>(metric, samplePercentages: new double[] { 0.05, 0.1, 0.2, 0.4, 0.8, 1.0 }, trainingPercentage: 0.7, numberOfShufflesPrSample: 5); // create learner var learner = new ClassificationDecisionTreeLearner(maximumTreeDepth: 5); // calculate learning curve var learningCurve = learningCurveCalculator.Calculate(learner, observations, targets); // write to csv var writer = new StringWriter(); learningCurve.Write(() => writer); // trace result // Plotting the learning curves will help determine if the model has high bias or high variance. // This information can be used to determine what to try next in order to improve the model. Trace.WriteLine(writer.ToString()); // alternatively, write to file //learningCurve.Write(() => new StreamWriter(filePath)); }
public void ClassificationDecisionTreeModel_GetRawVariableImportance() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var actual = sut.GetRawVariableImportance(); var expected = new double[] { 0.071005917159763288, 0.36390532544378695 }; Assert.AreEqual(expected.Length, actual.Length); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i], actual[i], 0.000001); } }
double ClassificationDecisionTreeLearner_Learn_Glass_Weighted(int treeDepth, double weight) { var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var rows = targets.Length; var weights = targets.Select(v => Weight(v, 1, weight)).ToArray(); var sut = new ClassificationDecisionTreeLearner(treeDepth, 1, observations.ColumnCount, 0.001, 42); var model = sut.Learn(observations, targets, weights); var predictions = model.Predict(observations); var evaluator = new TotalErrorClassificationMetric <double>(); Trace.WriteLine(evaluator.ErrorString(targets, predictions)); var error = evaluator.Error(targets, predictions); return(error); }
public void ClassificationDecisionTreeModel_Predict_Multiple_Indexed() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learner = new ClassificationDecisionTreeLearner(100, 5, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var indices = new int[] { 0, 3, 4, 5, 6, 7, 8, 9, 20, 21 }; var predictions = sut.Predict(observations, indices); var evaluator = new TotalErrorClassificationMetric <double>(); var indexedTargets = targets.GetIndices(indices); var error = evaluator.Error(indexedTargets, predictions); Assert.AreEqual(0.1, error, 0.0000001); }
public void ClassificationDecisionTreeModel_PredictProbability_Multiple_Indexed() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var rows = targets.Length; var learner = new ClassificationDecisionTreeLearner(100, 5, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var indices = new int[] { 0, 3, 4, 5, 6, 7, 8, 9, 20, 21 }; var actual = sut.PredictProbability(observations, indices); var indexedTargets = targets.GetIndices(indices); var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(indexedTargets, actual.Select(p => p.Prediction).ToArray()); Assert.AreEqual(0.1, error, 0.0000001); var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.571428571428571 }, { 1, 0.428571428571429 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.571428571428571 }, { 1, 0.428571428571429 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.428571428571429 }, { 1, 0.571428571428571 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.75 }, { 1, 0.25 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.285714285714286 }, { 1, 0.714285714285714 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.75 }, { 1, 0.25 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.857142857142857 }, { 1, 0.142857142857143 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.285714285714286 }, { 1, 0.714285714285714 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.857142857142857 }, { 1, 0.142857142857143 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.285714285714286 }, { 1, 0.714285714285714 }, }), }; CollectionAssert.AreEqual(expected, actual); }
public void ClassificationModel_FeatureImportance() { #region learner creation // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read classification targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // create learner var learner = new ClassificationDecisionTreeLearner(); #endregion // learns a ClassificationDecisionTreeModel var model = learner.Learn(observations, targets); // raw feature importance var rawImportance = model.GetRawVariableImportance(); // Normalized and named feature importance. // This gives information about which features/variables the learner found important (higher is more important). var featureNameToIndex = parser.EnumerateRows(c => c != targetName).First().ColumnNameToIndex; var importance = model.GetVariableImportance(featureNameToIndex); // trace normalized importances var importanceCsv = new StringBuilder(); importanceCsv.Append("FeatureName;Importance"); foreach (var feature in importance) { importanceCsv.AppendLine(); importanceCsv.Append(feature.Key + ";" + feature.Value); } Trace.WriteLine(importanceCsv); }
public void ClassificationDecisionTreeModel_Predict_Single() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationDecisionTreeLearner(100, 1, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var rows = targets.Length; var predictions = new double[rows]; for (int i = 0; i < rows; i++) { predictions[i] = sut.Predict(observations.Row(i)); } var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.038461538461538464, error, 0.0000001); }
public void ClassificationLearner_Learn() { // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read classification targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // create learner var learner = new ClassificationDecisionTreeLearner(); // learns a ClassificationDecisionTreeModel var model = learner.Learn(observations, targets); }
public void ClassificationDecisionTreeModel_PredictProbability_Multiple_Indexed() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new ClassificationDecisionTreeLearner(100, 5, 2, 0.001, 42); var sut = learner.Learn(observations, targets); var indices = new int[] { 0, 3, 4, 5, 6, 7, 8, 9, 20, 21 }; var actual = sut.PredictProbability(observations, indices); var indexedTargets = targets.GetIndices(indices); var evaluator = new TotalErrorClassificationMetric <double>(); var error = evaluator.Error(indexedTargets, actual.Select(p => p.Prediction).ToArray()); Assert.AreEqual(0.1, error, 0.0000001); var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.571428571428571 }, { 1, 0.428571428571429 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.571428571428571 }, { 1, 0.428571428571429 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.428571428571429 }, { 1, 0.571428571428571 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.75 }, { 1, 0.25 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.285714285714286 }, { 1, 0.714285714285714 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.75 }, { 1, 0.25 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.857142857142857 }, { 1, 0.142857142857143 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.285714285714286 }, { 1, 0.714285714285714 }, }), new ProbabilityPrediction(0, new Dictionary <double, double> { { 0, 0.857142857142857 }, { 1, 0.142857142857143 }, }), new ProbabilityPrediction(1, new Dictionary <double, double> { { 0, 0.285714285714286 }, { 1, 0.714285714285714 }, }), }; CollectionAssert.AreEqual(expected, actual); }
public void CrossValidation_CrossValidate_ProbabilityPredictions() { #region Read data // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix (all columns different from the targetName) var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); #endregion // creates cross validator, observations are shuffled randomly var cv = new RandomCrossValidation <ProbabilityPrediction>(crossValidationFolds: 5, seed: 42); // create learner var learner = new ClassificationDecisionTreeLearner(); // cross-validated predictions var cvPredictions = cv.CrossValidate(learner, observations, targets); // metric for measuring model error var metric = new LogLossClassificationProbabilityMetric(); // cross-validation provides an estimate on how the model will perform on unseen data Trace.WriteLine("Cross-validation error: " + metric.Error(targets, cvPredictions)); // train and predict training set for comparison var predictions = learner.Learn(observations, targets).PredictProbability(observations); // The training set is NOT a good estimate of how well the model will perfrom on unseen data. Trace.WriteLine("Training error: " + metric.Error(targets, predictions)); }
public void ClassificationDecisionTreeLearner_Learn_Reuse_No_Valid_Split() { var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var sut = new ClassificationDecisionTreeLearner(); // train initial model. sut.Learn(observations, targets); // reuse learner, with smaller data that provides no valid split. var onlyUniqueTargetValue = 1.0; var onlyOneUniqueObservations = (F64Matrix)observations.Rows(0, 1, 2, 3, 4); var onlyOneUniquetargets = Enumerable.Range(0, onlyOneUniqueObservations.RowCount).Select(v => onlyUniqueTargetValue).ToArray(); var model = sut.Learn(onlyOneUniqueObservations, onlyOneUniquetargets); var predictions = model.Predict(onlyOneUniqueObservations); // no valid split, so should result in the model always returning the onlyUniqueTargetValue. for (int i = 0; i < predictions.Length; i++) { Assert.AreEqual(onlyUniqueTargetValue, predictions[i], 0.0001); } }
public void ClassificationModel_PredictProbability() { #region learner creation // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read classification targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // create learner var learner = new ClassificationDecisionTreeLearner(maximumTreeDepth: 5); #endregion // learns a ClassificationDecisionTreeModel var model = learner.Learn(observations, targets); // predict probabilities for all observations var probabilityPredictions = model.PredictProbability(observations); // predict probabilities for single observation var probabilityPrediction = model.PredictProbability(observations.Row(0)); // the predicted class var predictedClass = probabilityPrediction.Prediction; // trace class probabilities probabilityPrediction.Probabilities.ToList() .ForEach(p => Trace.WriteLine(p.Key + ": " + p.Value)); }
public void ClassificationModel_Save_Load() { #region learner creation // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read classification targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // create learner var learner = new ClassificationDecisionTreeLearner(); #endregion // learns a ClassificationDecisionTreeModel var model = learner.Learn(observations, targets); var writer = new StringWriter(); model.Save(() => writer); // save to file //model.Save(() => new StreamWriter(filePath)); var text = writer.ToString(); var loadedModel = ClassificationDecisionTreeModel.Load(() => new StringReader(text)); // load from file //ClassificationDecisionTreeModel.Load(() => new StreamReader(filePath)); }