public void Classification_Neural_Net_Using_ValidtionSet_For_Selecting_The_best_Model() { #region Read Data // Use StreamReader(filepath) when running from filesystem var trainingParser = new CsvParser(() => new StringReader(Resources.mnist_small_train)); var testParser = new CsvParser(() => new StringReader(Resources.mnist_small_test)); var targetName = "Class"; var featureNames = trainingParser.EnumerateRows(c => c != targetName).First().ColumnNameToIndex.Keys.ToArray(); // read feature matrix (training) var trainingObservations = trainingParser .EnumerateRows(featureNames) .ToF64Matrix(); // read classification targets (training) var trainingTargets = trainingParser.EnumerateRows(targetName) .ToF64Vector(); // read feature matrix (test) var testObservations = testParser .EnumerateRows(featureNames) .ToF64Matrix(); // read classification targets (test) var testTargets = testParser.EnumerateRows(targetName) .ToF64Vector(); #endregion // transform pixel values to be between 0 and 1. trainingObservations.Map(p => p / 255); testObservations.Map(p => p / 255); // create training validation split var splitter = new StratifiedTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24); var split = splitter.SplitSet(trainingObservations, trainingTargets); // the output layer must know the number of classes. var numberOfClasses = trainingTargets.Distinct().Count(); var net = new NeuralNet(); net.Add(new InputLayer(width: 28, height: 28, depth: 1)); // MNIST data is 28x28x1. net.Add(new DenseLayer(800, Activation.Relu)); net.Add(new SoftMaxLayer(numberOfClasses)); // using classification accuracy as error metric. // When using a validation set, the error metric // is used for selecting the best iteration based on models error on the validation set. var learner = new ClassificationNeuralNetLearner(net, iterations: 10, loss: new AccuracyLoss()); var model = learner.Learn(split.TrainingSet.Observations, split.TrainingSet.Targets, //); split.TestSet.Observations, split.TestSet.Targets); // the validation set for estimating how well the network generalises to new data. var metric = new TotalErrorClassificationMetric <double>(); var predictions = model.Predict(testObservations); Trace.WriteLine("Test Error: " + metric.Error(testTargets, predictions)); }
public void ClassificationGradientBoostLearner_LearnWithEarlyStopping_ToFewIterations() { var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var splitter = new StratifiedTrainingTestIndexSplitter <double>(0.6, 1234); var split = splitter.SplitSet(observations, targets); var sut = new ClassificationBinomialGradientBoostLearner(10, 0.01, 9, 1, 1e-6, .5, 1); var evaluator = new TotalErrorClassificationMetric <double>(); var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets, split.TestSet.Observations, split.TestSet.Targets, evaluator, 10); }
public void ClassificationGradientBoostLearner_LearnWithEarlyStopping_ToFewIterations() { var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(r => r != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var splitter = new StratifiedTrainingTestIndexSplitter <double>(0.6, 1234); var split = splitter.SplitSet(observations, targets); var sut = new ClassificationBinomialGradientBoostLearner(10, 0.01, 9, 1, 1e-6, .5, 1); var evaluator = new TotalErrorClassificationMetric <double>(); var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets, split.TestSet.Observations, split.TestSet.Targets, evaluator, 10); }
public void ClassificationGradientBoostLearner_LearnWithEarlyStopping() { var(observations, targets) = DataSetUtilities.LoadGlassDataSet(); var splitter = new StratifiedTrainingTestIndexSplitter <double>(0.6, 1234); var split = splitter.SplitSet(observations, targets); var sut = new ClassificationBinomialGradientBoostLearner(100, 0.01, 9, 1, 1e-6, .5, 0, false); var evaluator = new TotalErrorClassificationMetric <double>(); var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets, split.TestSet.Observations, split.TestSet.Targets, evaluator, 10); var predictions = model.Predict(split.TestSet.Observations); var actual = evaluator.Error(split.TestSet.Targets, predictions); Assert.AreEqual(0.16279069767441862, actual, 0.000001); Assert.AreEqual(90, model.Trees.First().ToArray().Length); }
public void ClassificationGradientBoostLearner_LearnWithEarlyStopping() { var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(r => r != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var splitter = new StratifiedTrainingTestIndexSplitter <double>(0.6, 1234); var split = splitter.SplitSet(observations, targets); var sut = new ClassificationBinomialGradientBoostLearner(100, 0.01, 9, 1, 1e-6, .5, 0, false); var evaluator = new TotalErrorClassificationMetric <double>(); var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets, split.TestSet.Observations, split.TestSet.Targets, evaluator, 10); var predictions = model.Predict(split.TestSet.Observations); var actual = evaluator.Error(split.TestSet.Targets, predictions); Assert.AreEqual(0.16279069767441862, actual, 0.000001); Assert.AreEqual(90, model.Trees.First().ToArray().Length); }
public void Classification_Find_Best_Model_With_Default_Parameters() { #region Read and Transform Data var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix (all columns different from the targetName) var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // create minmax normalizer (normalizes each feature from 0.0 to 1.0) var minMaxTransformer = new MinMaxTransformer(0.0, 1.0); // transforms features using the feature normalization transform minMaxTransformer.Transform(observations, observations); // read targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); #endregion // split data // creates training test splitter, training and test set are splittet // to have equal distribution of classes in both set. var splitter = new StratifiedTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24); var trainingTestSplit = splitter.SplitSet(observations, targets); var trainingSet = trainingTestSplit.TrainingSet; var testSet = trainingTestSplit.TestSet; // Create list of all classification learners (with default parameters) var learners = new List <ILearner <double> > { new ClassificationDecisionTreeLearner(), new ClassificationRandomForestLearner(), new ClassificationExtremelyRandomizedTreesLearner(), new ClassificationAdaBoostLearner(), new ClassificationBinomialGradientBoostLearner(), }; // metric for measuring the error var metric = new TotalErrorClassificationMetric <double>(); // try all learners var testPredictions = new double[testSet.Targets.Length]; var testObservation = new double[trainingSet.Observations.ColumnCount]; foreach (var learner in learners) { // train model var model = learner.Learn(trainingSet.Observations, trainingSet.Targets); // iterate over test set and predict each observation for (int i = 0; i < testSet.Targets.Length; i++) { testSet.Observations.Row(i, testObservation); testPredictions[i] = model.Predict(testObservation); } // measure error on test set var error = metric.Error(testSet.Targets, testPredictions); // Trace learner type and error to output window Trace.WriteLine(string.Format("{0}: {1:0.0000}", learner.GetType().Name, error)); } }