public void GenerateTest4() { // Create a Wishart distribution with the parameters: WishartDistribution wishart = new WishartDistribution( // Degrees of freedom degreesOfFreedom: 7, // Scale parameter scale: new double[, ] { { 4, 1, 1 }, { 1, 2, 2 }, // (must be symmetric and positive definite) { 1, 2, 6 }, } ); double[,] one = wishart.Generate(); Assert.AreEqual(3, one.Rows()); Assert.AreEqual(3, one.Columns()); Assert.IsTrue(one.IsPositiveDefinite()); double[][,] many = wishart.Generate(100); for (int i = 0; i < many.Length; i++) { Assert.AreEqual(3, many[i].Rows()); Assert.AreEqual(3, many[i].Columns()); Assert.IsTrue(many[i].IsPositiveDefinite()); } }
public void ConstructorTest4() { // Create a Wishart distribution with the parameters: WishartDistribution wishart = new WishartDistribution( // Degrees of freedom degreesOfFreedom: 7, // Scale parameter scale: new double[, ] { { 4, 1, 1 }, { 1, 2, 2 }, // (must be symmetric and positive definite) { 1, 2, 6 }, } ); // Common measures double[] var = wishart.Variance; // { 224, 56, 504 } double[,] cov = wishart.Covariance; // see below double[,] meanm = wishart.MeanMatrix; // see below // 224 63 175 28 7 7 // cov = 63 56 112 mean = 7 14 14 // 175 112 504 7 14 42 // (the above matrix representations have been transcribed to text using) // string scov = cov.ToString(DefaultMatrixFormatProvider.InvariantCulture); // string smean = meanm.ToString(DefaultMatrixFormatProvider.InvariantCulture); // For compatibility reasons, .Mean stores a flattened mean matrix double[] mean = wishart.Mean; // { 28, 7, 7, 7, 14, 14, 7, 14, 42 } // Probability density functions double pdf = wishart.ProbabilityDensityFunction(new double[, ] { { 8, 3, 1 }, { 3, 7, 1 }, // 0.000000011082455043473361 { 1, 1, 8 }, }); double lpdf = wishart.LogProbabilityDensityFunction(new double[, ] { { 8, 3, 1 }, { 3, 7, 1 }, // -18.317902605850534 { 1, 1, 8 }, }); Assert.AreEqual(28.0, mean[0]); Assert.AreEqual(7.0, mean[1]); Assert.AreEqual(7.0, mean[3]); Assert.AreEqual(14.0, mean[4]); Assert.AreEqual(224.0, var[0]); Assert.AreEqual(56.0, var[1]); Assert.AreEqual(504.0, var[2]); Assert.AreEqual(224.0, cov[0, 0]); Assert.AreEqual(63.0, cov[0, 1]); Assert.AreEqual(63.0, cov[1, 0]); Assert.AreEqual(56.0, cov[1, 1]); Assert.AreEqual(0.00000001108245504347336, pdf); Assert.AreEqual(-18.317902605850534, lpdf); }
public void sequence_parsing_test() { #region doc_learn_fraud_analysis // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // Let's say we have the following data about credit card transactions, // where the data is organized in order of transaction, per credit card // holder. Everytime the "Time" column starts at zero, denotes that the // sequence of observations follow will correspond to transactions of the // same person: double[,] data = { // "Time", "V1", "V2", "V3", "V4", "V5", "Amount", "Fraud" { 0, 0.521, 0.124, 0.622, 15.2, 25.6, 2.70, 0 }, // first person, ok { 1, 0.121, 0.124, 0.822, 12.2, 25.6, 42.0, 0 }, // first person, ok { 0, 0.551, 0.124, 0.422, 17.5, 25.6, 20.0, 0 }, // second person, ok { 1, 0.136, 0.154, 0.322, 15.3, 25.6, 50.0, 0 }, // second person, ok { 2, 0.721, 0.240, 0.422, 12.2, 25.6, 100.0, 1 }, // second person, fraud! { 3, 0.222, 0.126, 0.722, 18.1, 25.8, 10.0, 0 }, // second person, ok }; // Transform the above data into a jagged matrix double[][][] input; int[][] states; transform(data, out input, out states); // Determine here the number of dimensions in the observations (in this case, 6) int observationDimensions = 6; // 6 columns: "V1", "V2", "V3", "V4", "V5", "Amount" // Create some prior distributions to help initialize our parameters var priorC = new WishartDistribution(dimension: observationDimensions, degreesOfFreedom: 10); // this 10 is just some random number, you might have to tune as if it was a hyperparameter var priorM = new MultivariateNormalDistribution(dimension: observationDimensions); // Configure the learning algorithms to train the sequence classifier var teacher = new MaximumLikelihoodLearning <MultivariateNormalDistribution, double[]>() { // Their emissions will be multivariate Normal distributions initialized using the prior distributions Emissions = (j) => new MultivariateNormalDistribution(mean: priorM.Generate(), covariance: priorC.Generate()), // We will prevent our covariance matrices from becoming degenerate by adding a small // regularization value to their diagonal until they become positive-definite again: FittingOptions = new NormalOptions() { Regularization = 1e-6 }, }; // Use the teacher to learn a new HMM var hmm = teacher.Learn(input, states); // Use the HMM to predict whether the transations were fradulent or not: int[] firstPerson = hmm.Decide(input[0]); // predict the first person, output should be: 0, 0 int[] secondPerson = hmm.Decide(input[1]); // predict the second person, output should be: 0, 0, 1, 0 #endregion Assert.AreEqual(new[] { 0, 0 }, firstPerson); Assert.AreEqual(new[] { 0, 0, 1, 0 }, secondPerson); }
public void learn_pendigits_normalization() { Console.WriteLine("Starting NormalQuasiNewtonHiddenLearningTest.learn_pendigits_normalization"); using (var travis = new KeepTravisAlive()) { #region doc_learn_pendigits // Ensure we get reproducible results Accord.Math.Random.Generator.Seed = 0; // Download the PENDIGITS dataset from UCI ML repository var pendigits = new Pendigits(path: Path.GetTempPath()); // Get and pre-process the training set double[][][] trainInputs = pendigits.Training.Item1; int[] trainOutputs = pendigits.Training.Item2; // Pre-process the digits so each of them is centered and scaled trainInputs = trainInputs.Apply(Accord.Statistics.Tools.ZScores); trainInputs = trainInputs.Apply((x) => x.Subtract(x.Min())); // make them positive // Create some prior distributions to help initialize our parameters var priorC = new WishartDistribution(dimension: 2, degreesOfFreedom: 5); var priorM = new MultivariateNormalDistribution(dimension: 2); // Create a new learning algorithm for creating continuous hidden Markov model classifiers var teacher1 = new HiddenMarkovClassifierLearning <MultivariateNormalDistribution, double[]>() { // This tells the generative algorithm how to train each of the component models. Note: The learning // algorithm is more efficient if all generic parameters are specified, including the fitting options Learner = (i) => new BaumWelchLearning <MultivariateNormalDistribution, double[], NormalOptions>() { Topology = new Forward(5), // Each model will have a forward topology with 5 states // Their emissions will be multivariate Normal distributions initialized using the prior distributions Emissions = (j) => new MultivariateNormalDistribution(mean: priorM.Generate(), covariance: priorC.Generate()), // We will train until the relative change in the average log-likelihood is less than 1e-6 between iterations Tolerance = 1e-6, MaxIterations = 1000, // or until we perform 1000 iterations (which is unlikely for this dataset) // We will prevent our covariance matrices from becoming degenerate by adding a small // regularization value to their diagonal until they become positive-definite again: FittingOptions = new NormalOptions() { Regularization = 1e-6 } } }; // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization teacher1.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism) // Use the learning algorithm to create a classifier var hmmc = teacher1.Learn(trainInputs, trainOutputs); // Create a new learning algorithm for creating HCRFs var teacher2 = new HiddenQuasiNewtonLearning <double[]>() { Function = new MarkovMultivariateFunction(hmmc), MaxIterations = 10 }; // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization teacher2.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism) // Use the learning algorithm to create a classifier var hcrf = teacher2.Learn(trainInputs, trainOutputs); // Compute predictions for the training set int[] trainPredicted = hcrf.Decide(trainInputs); // Check the performance of the classifier by comparing with the ground-truth: var m1 = new GeneralConfusionMatrix(predicted: trainPredicted, expected: trainOutputs); double trainAcc = m1.Accuracy; // should be 0.66523727844482561 // Prepare the testing set double[][][] testInputs = pendigits.Testing.Item1; int[] testOutputs = pendigits.Testing.Item2; // Apply the same normalizations testInputs = testInputs.Apply(Accord.Statistics.Tools.ZScores); testInputs = testInputs.Apply((x) => x.Subtract(x.Min())); // make them positive // Compute predictions for the test set int[] testPredicted = hcrf.Decide(testInputs); // Check the performance of the classifier by comparing with the ground-truth: var m2 = new GeneralConfusionMatrix(predicted: testPredicted, expected: testOutputs); double testAcc = m2.Accuracy; // should be 0.66506538564184681 #endregion Assert.AreEqual(0.66523727844482561, trainAcc, 1e-10); Assert.AreEqual(0.66506538564184681, testAcc, 1e-10); } }
public void learn_pendigits_normalization() { #region doc_learn_pendigits // Ensure we get reproducible results Accord.Math.Random.Generator.Seed = 0; // Download the PENDIGITS dataset from UCI ML repository var pendigits = new Pendigits(path: Path.GetTempPath()); // Get and pre-process the training set double[][][] trainInputs = pendigits.Training.Item1; int[] trainOutputs = pendigits.Training.Item2; // Pre-process the digits so each of them is centered and scaled trainInputs = trainInputs.Apply(Accord.Statistics.Tools.ZScores); trainInputs = trainInputs.Apply((x) => x.Subtract(x.Min())); // make them positive // Create some prior distributions to help initialize our parameters var priorC = new WishartDistribution(dimension: 2, degreesOfFreedom: 5); var priorM = new MultivariateNormalDistribution(dimension: 2); // Create a template Markov classifier that we can use as a base for the HCRF var hmmc = new HiddenMarkovClassifier <MultivariateNormalDistribution, double[]>( classes: pendigits.NumberOfClasses, topology: new Forward(5), initial: (i, j) => new MultivariateNormalDistribution(mean: priorM.Generate(), covariance: priorC.Generate())); // Create a new learning algorithm for creating HCRFs var teacher = new HiddenQuasiNewtonLearning <double[]>() { Function = new MarkovMultivariateFunction(hmmc), MaxIterations = 10 }; // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism) // Use the learning algorithm to create a classifier var hcrf = teacher.Learn(trainInputs, trainOutputs); // Compute predictions for the training set int[] trainPredicted = hcrf.Decide(trainInputs); // Check the performance of the classifier by comparing with the ground-truth: var m1 = new ConfusionMatrix(predicted: trainPredicted, expected: trainOutputs); double trainAcc = m1.Accuracy; // should be 0.89594053744997137 // Prepare the testing set double[][][] testInputs = pendigits.Testing.Item1; int[] testOutputs = pendigits.Testing.Item2; // Apply the same normalizations testInputs = testInputs.Apply(Accord.Statistics.Tools.ZScores); testInputs = testInputs.Apply((x) => x.Subtract(x.Min())); // make them positive // Compute predictions for the test set int[] testPredicted = hcrf.Decide(testInputs); // Check the performance of the classifier by comparing with the ground-truth: var m2 = new ConfusionMatrix(predicted: testPredicted, expected: testOutputs); double testAcc = m2.Accuracy; // should be 0.89594053744997137 #endregion Assert.AreEqual(0.89594053744997137, trainAcc, 1e-10); Assert.AreEqual(0.896050173472111, testAcc, 1e-10); }