Пример #1
0
        public void GenerateTest4()
        {
            // Create a Wishart distribution with the parameters:
            WishartDistribution wishart = new WishartDistribution(

                // Degrees of freedom
                degreesOfFreedom: 7,

                // Scale parameter
                scale: new double[, ]
            {
                { 4, 1, 1 },
                { 1, 2, 2 },      // (must be symmetric and positive definite)
                { 1, 2, 6 },
            }
                );

            double[,] one = wishart.Generate();
            Assert.AreEqual(3, one.Rows());
            Assert.AreEqual(3, one.Columns());
            Assert.IsTrue(one.IsPositiveDefinite());

            double[][,] many = wishart.Generate(100);
            for (int i = 0; i < many.Length; i++)
            {
                Assert.AreEqual(3, many[i].Rows());
                Assert.AreEqual(3, many[i].Columns());
                Assert.IsTrue(many[i].IsPositiveDefinite());
            }
        }
        public void sequence_parsing_test()
        {
            #region doc_learn_fraud_analysis

            // Ensure results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // Let's say we have the following data about credit card transactions,
            // where the data is organized in order of transaction, per credit card
            // holder. Everytime the "Time" column starts at zero, denotes that the
            // sequence of observations follow will correspond to transactions of the
            // same person:

            double[,] data =
            {
                // "Time", "V1",   "V2",  "V3", "V4", "V5", "Amount",  "Fraud"
                { 0, 0.521, 0.124, 0.622, 15.2, 25.6,  2.70, 0 },              // first person, ok
                { 1, 0.121, 0.124, 0.822, 12.2, 25.6,  42.0, 0 },              // first person, ok

                { 0, 0.551, 0.124, 0.422, 17.5, 25.6,  20.0, 0 },              // second person, ok
                { 1, 0.136, 0.154, 0.322, 15.3, 25.6,  50.0, 0 },              // second person, ok
                { 2, 0.721, 0.240, 0.422, 12.2, 25.6, 100.0, 1 },              // second person, fraud!
                { 3, 0.222, 0.126, 0.722, 18.1, 25.8,  10.0, 0 },              // second person, ok
            };

            // Transform the above data into a jagged matrix
            double[][][] input;
            int[][]      states;
            transform(data, out input, out states);

            // Determine here the number of dimensions in the observations (in this case, 6)
            int observationDimensions = 6; // 6 columns: "V1", "V2", "V3", "V4", "V5", "Amount"

            // Create some prior distributions to help initialize our parameters
            var priorC = new WishartDistribution(dimension: observationDimensions, degreesOfFreedom: 10); // this 10 is just some random number, you might have to tune as if it was a hyperparameter
            var priorM = new MultivariateNormalDistribution(dimension: observationDimensions);

            // Configure the learning algorithms to train the sequence classifier
            var teacher = new MaximumLikelihoodLearning <MultivariateNormalDistribution, double[]>()
            {
                // Their emissions will be multivariate Normal distributions initialized using the prior distributions
                Emissions = (j) => new MultivariateNormalDistribution(mean: priorM.Generate(), covariance: priorC.Generate()),

                // We will prevent our covariance matrices from becoming degenerate by adding a small
                // regularization value to their diagonal until they become positive-definite again:
                FittingOptions = new NormalOptions()
                {
                    Regularization = 1e-6
                },
            };

            // Use the teacher to learn a new HMM
            var hmm = teacher.Learn(input, states);

            // Use the HMM to predict whether the transations were fradulent or not:
            int[] firstPerson = hmm.Decide(input[0]);  // predict the first person, output should be: 0, 0

            int[] secondPerson = hmm.Decide(input[1]); // predict the second person, output should be: 0, 0, 1, 0
            #endregion


            Assert.AreEqual(new[] { 0, 0 }, firstPerson);
            Assert.AreEqual(new[] { 0, 0, 1, 0 }, secondPerson);
        }
        public void learn_pendigits_normalization()
        {
            Console.WriteLine("Starting NormalQuasiNewtonHiddenLearningTest.learn_pendigits_normalization");

            using (var travis = new KeepTravisAlive())
            {
                #region doc_learn_pendigits
                // Ensure we get reproducible results
                Accord.Math.Random.Generator.Seed = 0;

                // Download the PENDIGITS dataset from UCI ML repository
                var pendigits = new Pendigits(path: Path.GetTempPath());

                // Get and pre-process the training set
                double[][][] trainInputs  = pendigits.Training.Item1;
                int[]        trainOutputs = pendigits.Training.Item2;

                // Pre-process the digits so each of them is centered and scaled
                trainInputs = trainInputs.Apply(Accord.Statistics.Tools.ZScores);
                trainInputs = trainInputs.Apply((x) => x.Subtract(x.Min())); // make them positive

                // Create some prior distributions to help initialize our parameters
                var priorC = new WishartDistribution(dimension: 2, degreesOfFreedom: 5);
                var priorM = new MultivariateNormalDistribution(dimension: 2);

                // Create a new learning algorithm for creating continuous hidden Markov model classifiers
                var teacher1 = new HiddenMarkovClassifierLearning <MultivariateNormalDistribution, double[]>()
                {
                    // This tells the generative algorithm how to train each of the component models. Note: The learning
                    // algorithm is more efficient if all generic parameters are specified, including the fitting options
                    Learner = (i) => new BaumWelchLearning <MultivariateNormalDistribution, double[], NormalOptions>()
                    {
                        Topology = new Forward(5), // Each model will have a forward topology with 5 states

                        // Their emissions will be multivariate Normal distributions initialized using the prior distributions
                        Emissions = (j) => new MultivariateNormalDistribution(mean: priorM.Generate(), covariance: priorC.Generate()),

                        // We will train until the relative change in the average log-likelihood is less than 1e-6 between iterations
                        Tolerance     = 1e-6,
                        MaxIterations = 1000, // or until we perform 1000 iterations (which is unlikely for this dataset)

                        // We will prevent our covariance matrices from becoming degenerate by adding a small
                        // regularization value to their diagonal until they become positive-definite again:
                        FittingOptions = new NormalOptions()
                        {
                            Regularization = 1e-6
                        }
                    }
                };

                // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization
                teacher1.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism)

                // Use the learning algorithm to create a classifier
                var hmmc = teacher1.Learn(trainInputs, trainOutputs);


                // Create a new learning algorithm for creating HCRFs
                var teacher2 = new HiddenQuasiNewtonLearning <double[]>()
                {
                    Function = new MarkovMultivariateFunction(hmmc),

                    MaxIterations = 10
                };

                // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization
                teacher2.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism)

                // Use the learning algorithm to create a classifier
                var hcrf = teacher2.Learn(trainInputs, trainOutputs);

                // Compute predictions for the training set
                int[] trainPredicted = hcrf.Decide(trainInputs);

                // Check the performance of the classifier by comparing with the ground-truth:
                var    m1       = new GeneralConfusionMatrix(predicted: trainPredicted, expected: trainOutputs);
                double trainAcc = m1.Accuracy; // should be 0.66523727844482561


                // Prepare the testing set
                double[][][] testInputs  = pendigits.Testing.Item1;
                int[]        testOutputs = pendigits.Testing.Item2;

                // Apply the same normalizations
                testInputs = testInputs.Apply(Accord.Statistics.Tools.ZScores);
                testInputs = testInputs.Apply((x) => x.Subtract(x.Min())); // make them positive

                // Compute predictions for the test set
                int[] testPredicted = hcrf.Decide(testInputs);

                // Check the performance of the classifier by comparing with the ground-truth:
                var    m2      = new GeneralConfusionMatrix(predicted: testPredicted, expected: testOutputs);
                double testAcc = m2.Accuracy; // should be 0.66506538564184681
                #endregion

                Assert.AreEqual(0.66523727844482561, trainAcc, 1e-10);
                Assert.AreEqual(0.66506538564184681, testAcc, 1e-10);
            }
        }
        public void learn_pendigits_normalization()
        {
            #region doc_learn_pendigits
            // Ensure we get reproducible results
            Accord.Math.Random.Generator.Seed = 0;

            // Download the PENDIGITS dataset from UCI ML repository
            var pendigits = new Pendigits(path: Path.GetTempPath());

            // Get and pre-process the training set
            double[][][] trainInputs  = pendigits.Training.Item1;
            int[]        trainOutputs = pendigits.Training.Item2;

            // Pre-process the digits so each of them is centered and scaled
            trainInputs = trainInputs.Apply(Accord.Statistics.Tools.ZScores);
            trainInputs = trainInputs.Apply((x) => x.Subtract(x.Min())); // make them positive

            // Create some prior distributions to help initialize our parameters
            var priorC = new WishartDistribution(dimension: 2, degreesOfFreedom: 5);
            var priorM = new MultivariateNormalDistribution(dimension: 2);

            // Create a template Markov classifier that we can use as a base for the HCRF
            var hmmc = new HiddenMarkovClassifier <MultivariateNormalDistribution, double[]>(
                classes: pendigits.NumberOfClasses, topology: new Forward(5),
                initial: (i, j) => new MultivariateNormalDistribution(mean: priorM.Generate(), covariance: priorC.Generate()));

            // Create a new learning algorithm for creating HCRFs
            var teacher = new HiddenQuasiNewtonLearning <double[]>()
            {
                Function = new MarkovMultivariateFunction(hmmc),

                MaxIterations = 10
            };

            // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization
            teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism)

            // Use the learning algorithm to create a classifier
            var hcrf = teacher.Learn(trainInputs, trainOutputs);

            // Compute predictions for the training set
            int[] trainPredicted = hcrf.Decide(trainInputs);

            // Check the performance of the classifier by comparing with the ground-truth:
            var    m1       = new ConfusionMatrix(predicted: trainPredicted, expected: trainOutputs);
            double trainAcc = m1.Accuracy; // should be 0.89594053744997137


            // Prepare the testing set
            double[][][] testInputs  = pendigits.Testing.Item1;
            int[]        testOutputs = pendigits.Testing.Item2;

            // Apply the same normalizations
            testInputs = testInputs.Apply(Accord.Statistics.Tools.ZScores);
            testInputs = testInputs.Apply((x) => x.Subtract(x.Min())); // make them positive

            // Compute predictions for the test set
            int[] testPredicted = hcrf.Decide(testInputs);

            // Check the performance of the classifier by comparing with the ground-truth:
            var    m2      = new ConfusionMatrix(predicted: testPredicted, expected: testOutputs);
            double testAcc = m2.Accuracy; // should be 0.89594053744997137
            #endregion

            Assert.AreEqual(0.89594053744997137, trainAcc, 1e-10);
            Assert.AreEqual(0.896050173472111, testAcc, 1e-10);
        }