[Ignore("Intensive")] // reproducible parallelization of this test requires #870
        public void learn_pendigits_normalization()
        {
            Console.WriteLine("Starting ResilientGradientHiddenLearningTest.learn_pendigits_normalization");
            string localDownloadPath = Path.Combine(NUnit.Framework.TestContext.CurrentContext.TestDirectory, "pendigits3");

            using (var travis = new KeepTravisAlive())
            {
                #region doc_learn_pendigits
                // Ensure we get reproducible results
                Accord.Math.Random.Generator.Seed = 0;

                // Download the PENDIGITS dataset from UCI ML repository
                var pendigits = new Pendigits(path: localDownloadPath);

                // Get and pre-process the training set
                double[][][] trainInputs  = pendigits.Training.Item1;
                int[]        trainOutputs = pendigits.Training.Item2;

                // Pre-process the digits so each of them is centered and scaled
                trainInputs = trainInputs.Apply(Accord.Statistics.Tools.ZScores);
                trainInputs = trainInputs.Apply((x) => x.Subtract(x.Min())); // make them positive

                // Create some prior distributions to help initialize our parameters
                var priorC = new WishartDistribution(dimension: 2, degreesOfFreedom: 5);
                var priorM = new MultivariateNormalDistribution(dimension: 2);

                // Create a new learning algorithm for creating continuous hidden Markov model classifiers
                var teacher1 = new HiddenMarkovClassifierLearning <MultivariateNormalDistribution, double[]>()
                {
                    // This tells the generative algorithm how to train each of the component models. Note: The learning
                    // algorithm is more efficient if all generic parameters are specified, including the fitting options
                    Learner = (i) => new BaumWelchLearning <MultivariateNormalDistribution, double[], NormalOptions>()
                    {
                        Topology = new Forward(5), // Each model will have a forward topology with 5 states

                        // Their emissions will be multivariate Normal distributions initialized using the prior distributions
                        Emissions = (j) => new MultivariateNormalDistribution(mean: priorM.Generate(), covariance: priorC.Generate()),

                        // We will train until the relative change in the average log-likelihood is less than 1e-6 between iterations
                        Tolerance     = 1e-6,
                        MaxIterations = 1000, // or until we perform 1000 iterations (which is unlikely for this dataset)

                        // We will prevent our covariance matrices from becoming degenerate by adding a small
                        // regularization value to their diagonal until they become positive-definite again:
                        FittingOptions = new NormalOptions()
                        {
                            Regularization = 1e-6
                        }
                    }
                };

                //// The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization
                //teacher1.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism)

                // Use the learning algorithm to create a classifier
                var hmmc = teacher1.Learn(trainInputs, trainOutputs);

                // Create a new learning algorithm for creating HCRFs
                var teacher2 = new HiddenResilientGradientLearning <double[]>()
                {
                    Function = new MarkovMultivariateFunction(hmmc),

                    MaxIterations = 10
                };

                //// The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization
                //teacher2.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism)

                // Use the learning algorithm to create a classifier
                var hcrf = teacher2.Learn(trainInputs, trainOutputs);

                // Compute predictions for the training set
                int[] trainPredicted = hcrf.Decide(trainInputs);

                // Check the performance of the classifier by comparing with the ground-truth:
                var    m1       = new GeneralConfusionMatrix(predicted: trainPredicted, expected: trainOutputs);
                double trainAcc = m1.Accuracy; // should be 0.81532304173813608


                // Prepare the testing set
                double[][][] testInputs  = pendigits.Testing.Item1;
                int[]        testOutputs = pendigits.Testing.Item2;

                // Apply the same normalizations
                testInputs = testInputs.Apply(Accord.Statistics.Tools.ZScores);
                testInputs = testInputs.Apply((x) => x.Subtract(x.Min())); // make them positive

                // Compute predictions for the test set
                int[] testPredicted = hcrf.Decide(testInputs);

                // Check the performance of the classifier by comparing with the ground-truth:
                var    m2      = new GeneralConfusionMatrix(predicted: testPredicted, expected: testOutputs);
                double testAcc = m2.Accuracy; // should be 0.77061649319455561
                #endregion

                var loss = new Accord.Math.Optimization.Losses.ZeroOneLoss(testOutputs).Loss(testPredicted);
                Assert.AreEqual(1.0 - loss, m2.Accuracy);

                Assert.AreEqual(10, m1.Classes);
                Assert.AreEqual(10, m2.Classes);

#if NET35
                Assert.AreEqual(0.89594053744997137d, trainAcc, 1e-5);
                Assert.AreEqual(0.89605017347211102d, testAcc, 1e-5);
#else
                Assert.IsTrue(trainAcc.IsEqual(0.81532304173813608, 1e-5) || trainAcc.IsEqual(0.81532304173813608, 1e-5));
                Assert.IsTrue(testAcc.IsEqual(0.77061649319455561, 1e-5) || testAcc.IsEqual(0.77061649319455561, 1e-5));
#endif
            }
        }
Esempio n. 2
0
        static void Main(string[] args)
        {
            /*
             * some declartions
             */
            string    trainingfile = null;
            string    labelfile    = null;
            const int minargs      = 2;
            const int maxargs      = 3;

            int numArgs = Utility.parseCommandLine(args, maxargs, minargs);

            if (numArgs == 0)
            {
                Console.WriteLine(strings.usage);
                System.Environment.Exit(1);
            }

            if (numArgs == 2)
            {
                trainingfile = args[0];
                labelfile    = args[1];
            }
            if (numArgs == 3)  // no use for third parameter yet!
            {
                Console.WriteLine(strings.usage);
                System.Environment.Exit(1);
            }

            if (!Utility.checkFile(trainingfile))
            {
                Console.WriteLine("Error opening file{0}", trainingfile);
                System.Environment.Exit(1);
            }
            if (!Utility.checkFile(labelfile))
            {
                Console.WriteLine("Error opening file {0}", labelfile);
                System.Environment.Exit(1);
            }

            //
            // Setup a timer
            //
            Stopwatch stopWatch = new Stopwatch();
            string    elapsedTime;



            Console.WriteLine(" Logistic Regression (Accord.net) Training Utility Starting...\n");
            //Console.WriteLine("Learning 3 differernt Models: Profbablistic Coordinate Descent, Iterative Reweighted Least Squares, Conjugate Gradient Descent (BFGS)\n");

            //
            // Read in the training file an convert to a Matrix
            //
            CsvReader training_samples = new CsvReader(trainingfile, false);

            double[,] MatrixIn = training_samples.ToMatrix <double>();

            int rows = MatrixIn.Rows();
            int cols = MatrixIn.Columns();

            //
            // Read in the label file an convert to a Matrix
            //
            CsvReader labelscsv = new CsvReader(labelfile, false);

            double[,] labels = labelscsv.ToMatrix <double>();

            if (rows != labels.Rows()) // number of samples must match
            {
                Console.WriteLine(strings.SampleMisMatch, cols, 4);
                System.Environment.Exit(1);
            }

            // For Accord.net Logistic Regression the input data needs to be in Jagged Arrays
            double[][] input1 = MatrixIn.ToJagged <double>();

            // Labels can either be int (1,0) or bools
            int [] output1 = Utility.convetToJaggedArray(labels);

            // Learn a  Probabilistic Coordinate Descent model using a large margin solver (SVM) with L1 Regularization
            //

            // commenting this routine out, as it is a linear solver

            Console.WriteLine("Starting Probabilistic Coordinate Descent(Support Vector Machine)");
            stopWatch.Start();
            int[]  svmpredicts = MLAlgorithms.ProbabilisticCoordinateDescent(input1, output1, trainingfile);
            double svmaccuracy = Funcs.Utility.CalculateAccuraccy(svmpredicts, output1);

            stopWatch.Stop();
            var duration = stopWatch.Elapsed;

            elapsedTime = $"{duration.Hours}:{duration.Minutes}:{duration.Seconds}.{duration.Milliseconds / 10}";
            Console.WriteLine("Elapsed Time for training: " + elapsedTime);

            Console.Write(" Probablistic Coordinate Descent training Accuracy: ");
            Funcs.Utility.Printcolor(Math.Round(svmaccuracy * 100, 2), ConsoleColor.Red);
            // Compute the classification error as in SVM example
            double error = new Accord.Math.Optimization.Losses.ZeroOneLoss(output1).Loss(svmpredicts);



            Console.WriteLine("\nStarting Multinomial Logistic Regression using L-BFGS");
            stopWatch.Stop();
            int[]  BFGSPredicts = MLAlgorithms.MultiNomialLogisticRegressionBFGS(input1, output1, trainingfile.Replace(".csv", ".BFGS.save"));
            double BFGSAccuracy = Utility.CalculateAccuraccy(BFGSPredicts, output1);

            stopWatch.Stop();
            duration    = stopWatch.Elapsed;
            elapsedTime = $"{duration.Hours}:{duration.Minutes}:{duration.Seconds}.{duration.Milliseconds / 10}";
            Console.WriteLine("Elapsed Time for training: " + elapsedTime);
            Console.Write(" Multinomial LR Training Accuracy => ");

            Funcs.Utility.Printcolor(Math.Round(BFGSAccuracy * 100, 2), ConsoleColor.Red);
            Console.WriteLine();

            // Commenting this algorithm out, after running for a few hours on a 25 sample resume file it got an out of memeory error

            /*
             * Console.WriteLine("Starting Iterative Reweighted Least Squares");
             * int[] IRLSPredicts = MLAlgorithms.IterativeLeastSquares(input1, output1, trainingfile);
             * double IRLSAccuracy = Utility.CalculateAccuraccy (IRLSPredicts, output1);
             * Console.WriteLine(" Iterative Least Squares (IRLS)\nAccuracy => {0}\n", Math.Round(IRLSAccuracy * 100, 2));
             */

            // Commenting out this method, it is too long running on the resume data set.

            /*Console.WriteLine("starting Multinomial Log Regression w/ Lowerbound Newton Raphson");
             * int[] MNLRPredicts = MLAlgorithms.MultiNomialLogRegressionLowerBoundNewtonRaphson(input1, output1, trainingfile);
             * double MNLRAccuracy = Funcs.Utility.CalculateAccuraccy(MNLRPredicts, output1);
             * Console.Write ("Multinomial Logistic Regression using LB Newton Raphson Training Accuracy => ");
             * Funcs.Utility.Printcolor (Math.Round(MNLRAccuracy * 100, 2), ConsoleColor.Red);
             */
        }
Esempio n. 3
0
        //4, 10, 2, 1, 0, 0, 0, 0
        public string BankPrediction(int a, int b, int c, int d, int e, int f, int g, int h, string name)
        {
            string salida = "Vacio";

            try
            {
                var codebook = new Accord.Statistics.Filters.Codification(table);
                System.Data.DataTable symbols = codebook.Apply(table);

                int[][] inputs  = DataTableToMatrix(symbols, new string[] { "AGE", "JOB", "MARITAL", "EDUCATION", "DEBT", "BALANCE", "HOUSING", "LOAN" });
                int[][] outputs = DataTableToMatrix(symbols, new string[] { "DEPOSIT" });
                int[]   output  = new int[outputs.Length];
                for (int i = 0; i < outputs.Length; i++)
                {
                    output[i] = outputs[i][0];
                }

                ID3Learning teacher = new ID3Learning()
                {
                    new DecisionVariable("AGE", 5),
                    new DecisionVariable("JOB", 12),
                    new DecisionVariable("MARITAL", 3),
                    new DecisionVariable("EDUCATION", 4),
                    new DecisionVariable("DEBT", 2),
                    new DecisionVariable("BALANCE", 6),
                    new DecisionVariable("HOUSING", 2),
                    new DecisionVariable("LOAN", 2)
                };

                DecisionTree tree = teacher.Learn(inputs, output);

                //mandar la variable error a un label que me muestre el error que tiene el arbol
                double error = new Accord.Math.Optimization.Losses.ZeroOneLoss(output).Loss(tree.Decide(inputs));
                double ep    = Math.Floor(error * 100);
                errorLabel.Text = ep + "%" + " " + "-" + " " + "(" + error + ")";


                int[] input      = new int[] { a, b, c, d, e, f, g, h };
                int   prediccion = tree.Decide(input);

                string predijo = prediccion == 1 ? "yes" : "no";
                if (predijo.Equals("yes"))
                {
                    outputLabel.ForeColor = Color.FromArgb(0, 255, 0);
                }
                else if (predijo.Equals("no"))
                {
                    outputLabel.ForeColor = Color.FromArgb(255, 0, 0);
                }
                outputLabel.Text = predijo;
                salida           = predijo;

                subjectLabel.Text = "for" + " " + name + " " + "the prediction is";
            }
            catch (Exception) {
                string message = "Yo cannot make predictions without\nloading the data";
                string title   = "Warning";
                MessageBox.Show(message, title);
            }
            return(salida);
        }