public void Classification_Neural_Net_Using_ValidtionSet_For_Selecting_The_best_Model()
        {
            #region Read Data
            // Use StreamReader(filepath) when running from filesystem
            var trainingParser = new CsvParser(() => new StringReader(Resources.mnist_small_train));
            var testParser     = new CsvParser(() => new StringReader(Resources.mnist_small_test));

            var targetName = "Class";

            var featureNames = trainingParser.EnumerateRows(c => c != targetName).First().ColumnNameToIndex.Keys.ToArray();

            // read feature matrix (training)
            var trainingObservations = trainingParser
                                       .EnumerateRows(featureNames)
                                       .ToF64Matrix();
            // read classification targets (training)
            var trainingTargets = trainingParser.EnumerateRows(targetName)
                                  .ToF64Vector();

            // read feature matrix (test)
            var testObservations = testParser
                                   .EnumerateRows(featureNames)
                                   .ToF64Matrix();
            // read classification targets (test)
            var testTargets = testParser.EnumerateRows(targetName)
                              .ToF64Vector();
            #endregion

            // transform pixel values to be between 0 and 1.
            trainingObservations.Map(p => p / 255);
            testObservations.Map(p => p / 255);

            // create training validation split
            var splitter = new StratifiedTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);
            var split    = splitter.SplitSet(trainingObservations, trainingTargets);

            // the output layer must know the number of classes.
            var numberOfClasses = trainingTargets.Distinct().Count();

            var net = new NeuralNet();
            net.Add(new InputLayer(width: 28, height: 28, depth: 1)); // MNIST data is 28x28x1.
            net.Add(new DenseLayer(800, Activation.Relu));
            net.Add(new SoftMaxLayer(numberOfClasses));

            // using classification accuracy as error metric.
            // When using a validation set, the error metric
            // is used for selecting the best iteration based on models error on the validation set.
            var learner = new ClassificationNeuralNetLearner(net, iterations: 10, loss: new AccuracyLoss());

            var model = learner.Learn(split.TrainingSet.Observations, split.TrainingSet.Targets, //);
                                      split.TestSet.Observations, split.TestSet.Targets);        // the validation set for estimating how well the network generalises to new data.

            var metric      = new TotalErrorClassificationMetric <double>();
            var predictions = model.Predict(testObservations);

            Trace.WriteLine("Test Error: " + metric.Error(testTargets, predictions));
        }
        public void StratifiedTrainingTestIndexSplitter_Split()
        {
            var sut = new StratifiedTrainingTestIndexSplitter <double>(0.8);

            var targets = new double[] { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };

            var actual   = sut.Split(targets);
            var expected = new TrainingTestIndexSplit(new int[] { 9, 0, 4, 2, 5, 7, 3, 8 },
                                                      new int[] { 1, 6 });

            Assert.AreEqual(expected, actual);
        }
Пример #3
0
        public void ClassificationGradientBoostLearner_LearnWithEarlyStopping_ToFewIterations()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var splitter = new StratifiedTrainingTestIndexSplitter <double>(0.6, 1234);
            var split    = splitter.SplitSet(observations, targets);

            var sut       = new ClassificationBinomialGradientBoostLearner(10, 0.01, 9, 1, 1e-6, .5, 1);
            var evaluator = new TotalErrorClassificationMetric <double>();

            var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets,
                                                   split.TestSet.Observations, split.TestSet.Targets, evaluator, 10);
        }
Пример #4
0
        public void ClassificationGradientBoostLearner_LearnWithEarlyStopping_ToFewIterations()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(r => r != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var splitter = new StratifiedTrainingTestIndexSplitter <double>(0.6, 1234);
            var split    = splitter.SplitSet(observations, targets);

            var sut       = new ClassificationBinomialGradientBoostLearner(10, 0.01, 9, 1, 1e-6, .5, 1);
            var evaluator = new TotalErrorClassificationMetric <double>();

            var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets,
                                                   split.TestSet.Observations, split.TestSet.Targets, evaluator, 10);
        }
Пример #5
0
        public void ClassificationGradientBoostLearner_LearnWithEarlyStopping()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var splitter = new StratifiedTrainingTestIndexSplitter <double>(0.6, 1234);
            var split    = splitter.SplitSet(observations, targets);

            var sut       = new ClassificationBinomialGradientBoostLearner(100, 0.01, 9, 1, 1e-6, .5, 0, false);
            var evaluator = new TotalErrorClassificationMetric <double>();

            var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets,
                                                   split.TestSet.Observations, split.TestSet.Targets, evaluator, 10);

            var predictions = model.Predict(split.TestSet.Observations);
            var actual      = evaluator.Error(split.TestSet.Targets, predictions);

            Assert.AreEqual(0.16279069767441862, actual, 0.000001);
            Assert.AreEqual(90, model.Trees.First().ToArray().Length);
        }
Пример #6
0
        public void ClassificationGradientBoostLearner_LearnWithEarlyStopping()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(r => r != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var splitter = new StratifiedTrainingTestIndexSplitter <double>(0.6, 1234);
            var split    = splitter.SplitSet(observations, targets);

            var sut       = new ClassificationBinomialGradientBoostLearner(100, 0.01, 9, 1, 1e-6, .5, 0, false);
            var evaluator = new TotalErrorClassificationMetric <double>();

            var model = sut.LearnWithEarlyStopping(split.TrainingSet.Observations, split.TrainingSet.Targets,
                                                   split.TestSet.Observations, split.TestSet.Targets, evaluator, 10);

            var predictions = model.Predict(split.TestSet.Observations);
            var actual      = evaluator.Error(split.TestSet.Targets, predictions);

            Assert.AreEqual(0.16279069767441862, actual, 0.000001);
            Assert.AreEqual(90, model.Trees.First().ToArray().Length);
        }
Пример #7
0
        public void Classification_Find_Best_Model_With_Default_Parameters()
        {
            #region Read and Transform Data
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix (all columns different from the targetName)
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // create minmax normalizer (normalizes each feature from 0.0 to 1.0)
            var minMaxTransformer = new MinMaxTransformer(0.0, 1.0);

            // transforms features using the feature normalization transform
            minMaxTransformer.Transform(observations, observations);

            // read targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();
            #endregion

            // split data
            // creates training test splitter, training and test set are splittet
            // to have equal distribution of classes in both set.
            var splitter = new StratifiedTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainingSet       = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;

            // Create list of all classification learners (with default parameters)
            var learners = new List <ILearner <double> >
            {
                new ClassificationDecisionTreeLearner(),
                new ClassificationRandomForestLearner(),
                new ClassificationExtremelyRandomizedTreesLearner(),
                new ClassificationAdaBoostLearner(),
                new ClassificationBinomialGradientBoostLearner(),
            };

            // metric for measuring the error
            var metric = new TotalErrorClassificationMetric <double>();

            // try all learners
            var testPredictions = new double[testSet.Targets.Length];
            var testObservation = new double[trainingSet.Observations.ColumnCount];
            foreach (var learner in learners)
            {
                // train model
                var model = learner.Learn(trainingSet.Observations, trainingSet.Targets);

                // iterate over test set and predict each observation
                for (int i = 0; i < testSet.Targets.Length; i++)
                {
                    testSet.Observations.Row(i, testObservation);
                    testPredictions[i] = model.Predict(testObservation);
                }

                // measure error on test set
                var error = metric.Error(testSet.Targets, testPredictions);

                // Trace learner type and error to output window
                Trace.WriteLine(string.Format("{0}: {1:0.0000}", learner.GetType().Name, error));
            }
        }