public void learn_pendigits_normalization()
        {
            Console.WriteLine("Starting NormalQuasiNewtonHiddenLearningTest.learn_pendigits_normalization");

            using (var travis = new KeepTravisAlive())
            {
                #region doc_learn_pendigits
                // Ensure we get reproducible results
                Accord.Math.Random.Generator.Seed = 0;

                // Download the PENDIGITS dataset from UCI ML repository
                var pendigits = new Pendigits(path: Path.GetTempPath());

                // Get and pre-process the training set
                double[][][] trainInputs  = pendigits.Training.Item1;
                int[]        trainOutputs = pendigits.Training.Item2;

                // Pre-process the digits so each of them is centered and scaled
                trainInputs = trainInputs.Apply(Accord.Statistics.Tools.ZScores);
                trainInputs = trainInputs.Apply((x) => x.Subtract(x.Min())); // make them positive

                // Create some prior distributions to help initialize our parameters
                var priorC = new WishartDistribution(dimension: 2, degreesOfFreedom: 5);
                var priorM = new MultivariateNormalDistribution(dimension: 2);

                // Create a new learning algorithm for creating continuous hidden Markov model classifiers
                var teacher1 = new HiddenMarkovClassifierLearning <MultivariateNormalDistribution, double[]>()
                {
                    // This tells the generative algorithm how to train each of the component models. Note: The learning
                    // algorithm is more efficient if all generic parameters are specified, including the fitting options
                    Learner = (i) => new BaumWelchLearning <MultivariateNormalDistribution, double[], NormalOptions>()
                    {
                        Topology = new Forward(5), // Each model will have a forward topology with 5 states

                        // Their emissions will be multivariate Normal distributions initialized using the prior distributions
                        Emissions = (j) => new MultivariateNormalDistribution(mean: priorM.Generate(), covariance: priorC.Generate()),

                        // We will train until the relative change in the average log-likelihood is less than 1e-6 between iterations
                        Tolerance     = 1e-6,
                        MaxIterations = 1000, // or until we perform 1000 iterations (which is unlikely for this dataset)

                        // We will prevent our covariance matrices from becoming degenerate by adding a small
                        // regularization value to their diagonal until they become positive-definite again:
                        FittingOptions = new NormalOptions()
                        {
                            Regularization = 1e-6
                        }
                    }
                };

                // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization
                teacher1.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism)

                // Use the learning algorithm to create a classifier
                var hmmc = teacher1.Learn(trainInputs, trainOutputs);


                // Create a new learning algorithm for creating HCRFs
                var teacher2 = new HiddenQuasiNewtonLearning <double[]>()
                {
                    Function = new MarkovMultivariateFunction(hmmc),

                    MaxIterations = 10
                };

                // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization
                teacher2.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism)

                // Use the learning algorithm to create a classifier
                var hcrf = teacher2.Learn(trainInputs, trainOutputs);

                // Compute predictions for the training set
                int[] trainPredicted = hcrf.Decide(trainInputs);

                // Check the performance of the classifier by comparing with the ground-truth:
                var    m1       = new GeneralConfusionMatrix(predicted: trainPredicted, expected: trainOutputs);
                double trainAcc = m1.Accuracy; // should be 0.66523727844482561


                // Prepare the testing set
                double[][][] testInputs  = pendigits.Testing.Item1;
                int[]        testOutputs = pendigits.Testing.Item2;

                // Apply the same normalizations
                testInputs = testInputs.Apply(Accord.Statistics.Tools.ZScores);
                testInputs = testInputs.Apply((x) => x.Subtract(x.Min())); // make them positive

                // Compute predictions for the test set
                int[] testPredicted = hcrf.Decide(testInputs);

                // Check the performance of the classifier by comparing with the ground-truth:
                var    m2      = new GeneralConfusionMatrix(predicted: testPredicted, expected: testOutputs);
                double testAcc = m2.Accuracy; // should be 0.66506538564184681
                #endregion

                Assert.AreEqual(0.66523727844482561, trainAcc, 1e-10);
                Assert.AreEqual(0.66506538564184681, testAcc, 1e-10);
            }
        }
Beispiel #2
0
        public void learn_pendigits_normalization()
        {
            Console.WriteLine("Starting BagOfWordsTest.learn_pendigits_normalization");

            using (var travis = new KeepTravisAlive())
            {
                #region doc_learn_pendigits
                // The Bag-Of-Words model can be used to extract finite-length feature
                // vectors from sequences of arbitrary length, like handwritten digits

                // Ensure we get reproducible results
                Accord.Math.Random.Generator.Seed = 0;

                // Download the PENDIGITS dataset from UCI ML repository
                var pendigits = new Pendigits(path: Path.GetTempPath());

                // Get and pre-process the training set
                double[][][] trainInputs  = pendigits.Training.Item1;
                int[]        trainOutputs = pendigits.Training.Item2;

                // Pre-process the digits so each of them is centered and scaled
                trainInputs = trainInputs.Apply(Accord.Statistics.Tools.ZScores);

                // Create a Bag-of-Words learning algorithm
                var bow = new BagOfWords <double[], KMeans>()
                {
                    Clustering = new KMeans(5),
                };

                // Use the BoW to create a quantizer
                var quantizer = bow.Learn(trainInputs);

                // Extract vector representations from the pen sequences
                double[][] trainVectors = quantizer.Transform(trainInputs);

                // Create a new learning algorithm for support vector machines
                var teacher = new MulticlassSupportVectorLearning <ChiSquare, double[]>
                {
                    Learner = (p) => new SequentialMinimalOptimization <ChiSquare, double[]>()
                    {
                        Complexity = 1
                    }
                };

                // Use the learning algorithm to create a classifier
                var svm = teacher.Learn(trainVectors, trainOutputs);

                // Compute predictions for the training set
                int[] trainPredicted = svm.Decide(trainVectors);

                // Check the performance of the classifier by comparing with the ground-truth:
                var    m1       = new GeneralConfusionMatrix(predicted: trainPredicted, expected: trainOutputs);
                double trainAcc = m1.Accuracy; // should be 0.690


                // Prepare the testing set
                double[][][] testInputs  = pendigits.Testing.Item1;
                int[]        testOutputs = pendigits.Testing.Item2;

                // Apply the same normalizations
                testInputs = testInputs.Apply(Accord.Statistics.Tools.ZScores);

                double[][] testVectors = quantizer.Transform(testInputs);

                // Compute predictions for the test set
                int[] testPredicted = svm.Decide(testVectors);

                // Check the performance of the classifier by comparing with the ground-truth:
                var    m2      = new GeneralConfusionMatrix(predicted: testPredicted, expected: testOutputs);
                double testAcc = m2.Accuracy; // should be 0.600
                #endregion

#if NET35
                Assert.AreEqual(0.89594053744997137d, trainAcc, 1e-10);
                Assert.AreEqual(0.89605017347211102d, testAcc, 1e-10);
#else
                Assert.AreEqual(0.69039451114922812, trainAcc, 1e-10);
                Assert.AreEqual(0.600880704563651, testAcc, 1e-10);
#endif
            }
        }