Пример #1
0
        public void nativeBayesValidation()
        {
            var        learn = new NaiveBayesLearning();
            NaiveBayes nb    = learn.Learn(inputsInt, outputs);

            var cv = CrossValidation.Create(
                k: 3,

                learner: (p) => new NaiveBayesLearning(),

                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                x: inputsInt, y: outputs
                );

            var result = cv.Learn(inputsInt, outputs);

            int numberOfSamples = result.NumberOfSamples;
            int numberOfInputs  = result.NumberOfInputs;
            int numberOfOutputs = result.NumberOfOutputs;

            double trainingError       = result.Training.Mean;
            double validationError     = result.Validation.Mean;
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputsInt, outputs);
            double accuracy            = gcm.Accuracy;

            message += "Native Bayes Validacja\n";
            message += "trainingError " + trainingError.ToString() + "\n";
            message += "validationError " + validationError.ToString() + "\n";
            message += "accuracy " + accuracy.ToString() + "\n\n";
        }
Пример #2
0
        public void knnValidation()
        {
            var crossvalidation = CrossValidation.Create(
                k: 3,
                learner: (p) => new KNearestNeighbors(k: 4),
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: inputs, y: outputs
                );
            var result = crossvalidation.Learn(inputs, outputs);
            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;
            int numberOfInputs  = result.NumberOfInputs;
            int numberOfOutputs = result.NumberOfOutputs;

            double trainingError   = result.Training.Mean;
            double validationError = result.Validation.Mean;

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs);
            double accuracy            = gcm.Accuracy;

            message += "Knn Validacja\n";
            message += "trainingError " + trainingError.ToString() + "\n";
            message += "validationError " + validationError.ToString() + "\n";
            message += "accuracy " + accuracy.ToString() + "\n\n";
        }
        public override void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs)
        {
            crossValidation = CrossValidation.Create(

                k: 10,                                           // We will be using 10-fold cross validation

                learner: (p) => new KernelDiscriminantAnalysis() // here we create the learning algorithm
            {
                Kernel = new Quadratic()                         // We can choose any kernel function
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );
            var result = crossValidation.Learn(inputs, outputs);

            ConfusionMatrix = result.ToConfusionMatrix(inputs, outputs).Matrix;
        }
        public override void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs)
        {
            crossValidation = CrossValidation.Create(

                k: 10, // We will be using 10-fold cross validation

                learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                {
                    // Estimate a suitable guess for the Gaussian kernel's parameters.
                    // This estimate can serve as a starting point for a grid search.
                    UseKernelEstimation = true
                }
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );
            var result = crossValidation.Learn(inputs, outputs);

            ConfusionMatrix = result.ToConfusionMatrix(inputs, outputs).Matrix;
            //throw new NotImplementedException();
        }
        public void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs)
        {
            crossValidation = CrossValidation.Create(

                k: 10,                                     // We will be using 10-fold cross validation

                learner: (p) => new B.NaiveBayesLearning() // here we create the learning algorithm
            {
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs.Select(v => v.Select(u => Convert.ToInt32(u)).ToArray()).ToArray(), y: outputs
                );
            var result = crossValidation.Learn(inputs.Select(u => u.Select(v => Convert.ToInt32(v)).ToArray()).ToArray(), outputs);

            ConfusionMatrix = result.ToConfusionMatrix(inputs.Select(v => v.Select(u => Convert.ToInt32(u)).ToArray()).ToArray(), outputs).Matrix;
        }
Пример #6
0
        private static void breastCancerExample()
        {
            // Ensure we have reproducible results
            Accord.Math.Random.Generator.Seed = 0;

            // Get some data to be learned. We will be using the Wiconsin's
            // (Diagnostic) Breast Cancer dataset, where the goal is to determine
            // whether the characteristics extracted from a breast cancer exam
            // correspond to a malignant or benign type of cancer:
            var data = new WisconsinDiagnosticBreastCancer();

            double[][] input  = data.Features;    // 569 samples, 30-dimensional features
            int[]      output = data.ClassLabels; // 569 samples, 2 different class labels

            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 5 and where variables
            // are able to join the decision path at most 2 times during evaluation:
            var cv = CrossValidation.Create(

                k: 10,                            // We will be using 10-fold cross validation

                learner: (p) => new C45Learning() // here we create the learning algorithm
            {
                Join      = 2,
                MaxHeight = 5
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input, y: output
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(input, output);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;    // should be 569
            int numberOfInputs  = result.NumberOfInputs;     // should be 30
            int numberOfOutputs = result.NumberOfOutputs;    // should be 2

            double trainingError   = result.Training.Mean;   // should be 0.017771153143274855
            double validationError = result.Validation.Mean; // should be 0.0755952380952381

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output);
            double accuracy            = gcm.Accuracy; // result should be 0.92442882249560632

            Console.WriteLine("C45Learning learning algorithm accuracy is %" + (accuracy * 100).ToString("N2"));
        }
        private void trainingC45lib()
        {
            Accord.Math.Random.Generator.Seed = 0;
            c45Learning = new C45Learning()
            {
                Join      = 2,
                MaxHeight = 5
            };
            int size = trainingSets.Count;

            double[][] inputs1  = new double[size][];
            int[]      outputs1 = new int[size];
            int        i        = 0;

            foreach (Patient patient in trainingSets)
            {
                double[] aux = new double[9];
                for (int j = 1; j <= 9; j++)
                {
                    if (j == 1)
                    {
                        aux[j - 1] = patient.get(j) < 30 ? 0 : patient.get(j) < 60 ? 1 : 2;
                    }
                    else
                    {
                        aux[j - 1] = patient.get(j);
                    }
                }
                inputs1[i]  = aux;
                outputs1[i] = patient.get(10);
                i++;
            }

            var crossValidation = CrossValidation.Create(

                k: 5,

                learner: (p) => new C45Learning()
            {
                Join      = 2,
                MaxHeight = 5
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                x: inputs1, y: outputs1
                );

            decisionTreeLib = c45Learning.Learn(inputs1, outputs1);
            var result = crossValidation.Learn(inputs1, outputs1);

            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs1, outputs1);

            accuracyC45lib = Math.Round(gcm.Accuracy, 3);
        }
Пример #8
0
        static public int [] MultiNomialLogRegressionLowerBoundNewtonRaphson(double [][] input1, int[] labels, string SaveFile)
        {
            // http://accord-framework.net/docs/html/T_Accord_Statistics_Models_Regression_MultinomialLogisticRegression.htm
            // Create a estimation algorithm to estimate the regression
            LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson()
            {
                MaxIterations = 10,
                Tolerance     = 1e-6
            };
            // *******************************************************************************
            var cv = CrossValidation.Create(

                k: 10,     // We will be using 10-fold cross validation

                // First we define the learning algorithm:
                learner: (p) => new LowerBoundNewtonRaphson(),

                // Now we have to specify how the n.b. performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teach, x, y, w) => teach.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input1, y: labels
                );
            // Genrate a cross validation of the data
            var cvresult = cv.Learn(input1, labels);



            // iteratively estimate the  model
            MultinomialLogisticRegression mlr = lbnr.Learn(input1, labels);

            // Generate statistics from confusion matrices
            ConfusionMatrix        cm  = ConfusionMatrix.Estimate(mlr, input1, labels);
            GeneralConfusionMatrix gcm = cvresult.ToConfusionMatrix(input1, labels);

            Funcs.Utility.OutPutStats(cvresult.NumberOfSamples, cvresult.NumberOfInputs,
                                      cvresult.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore);

            // We can compute the model answers
            int[]  answers       = mlr.Decide(input1);
            string modelsavefile = SaveFile.Replace(".csv", ".MLR.save");

            mlr.Save(modelsavefile, compression: SerializerCompression.None);

            return(answers);
        }
Пример #9
0
        static public int[] MultiNomialLogisticRegressionBFGS(double [][] input, int [] labels, string fName)
        {
            /* The L-BFGS algorithm is a member of the broad family of quasi-Newton optimization methods.
             * L-BFGS stands for 'Limited memory BFGS'. Indeed, L-BFGS uses a limited memory variation of
             * the Broyden–Fletcher–Goldfarb–Shanno (BFGS) update to approximate the inverse Hessian matrix
             * (denoted by Hk). Unlike the original BFGS method which stores a dense approximation, L-BFGS
             * stores only a few vectors that represent the approximation implicitly. Due to its moderate
             * memory requirement, L-BFGS method is particularly well suited for optimization problems with
             * a large number of variables.
             */

            // Create a lbfgs model
            var mlbfgs = new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>();

            // Estimate using the data against a logistic regression
            MultinomialLogisticRegression mlr = mlbfgs.Learn(input, labels);

            //
            // Create a cross validation model derived from the training set to measure the performance of this
            // predictive model and estimate how well we expect the model will generalize. The algorithm executes
            // multiple rounds of cross validation on different partitions and averages the results.
            //
            int folds = 4; // could play around with this later
            var cv    = CrossValidation.Create(k: folds, learner: (p) => new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>(),
                                               loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                                               fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                                               x: input, y: labels);
            var result = cv.Learn(input, labels);
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, labels);
            ConfusionMatrix        cm  = ConfusionMatrix.Estimate(mlr, input, labels);

            //
            //output relevant statistics
            //
            Funcs.Utility.OutPutStats(result.NumberOfSamples, result.NumberOfInputs,
                                      result.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore);

            // Compute the model predictions and return the values
            int[] answers = mlr.Decide(input);

            // And also the probability of each of the answers
            double[][] probabilities = mlr.Probabilities(input);

            // Now we can check how good our model is at predicting
            double error = new Accord.Math.Optimization.Losses.ZeroOneLoss(labels).Loss(answers);

            mlr.Save(fName, compression: SerializerCompression.None);

            return(answers);
        }
Пример #10
0
        private void ClassifyDataByNaiveBayes(int numOfFolds = 3, int minOccurences = 1)
        {
            CalcInputAndOutputVariables(minOccurences);

            var cvNaiveBayesClassifier = CrossValidation.Create(
                k: numOfFolds,
                learner: p => new NaiveBayesLearning <BernoulliDistribution>(),
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: InputVariables,
                y: OutputVariables
                );

            // Run Cross-Validation
            Result = cvNaiveBayesClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>;
        }
        public double Accuracy()
        {
            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 6 and where variables
            // are able to join the decision path at most 1 times during evaluation:
            var cv = CrossValidation.Create(

                k: 5,                             // We will be using 5-fold cross validation

                learner: (p) => new ID3Learning() // here we create the learning algorithm
            {
                Join      = 1,
                MaxHeight = 0
            },

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(inputs, outputs);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples; // should be 1000
            int numberOfInputs  = result.NumberOfInputs;  // should be 4
            int numberOfOutputs = result.NumberOfOutputs; // should be 6

            double trainingError   = result.Training.Mean;
            double validationError = result.Validation.Mean;

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs);

            return(gcm.Accuracy * 100);
        }
Пример #12
0
        private void ClassifyDataByLogisticRegression(int numOfFolds = 3, int minOccurences = 1, int maxIterations = 100)
        {
            CalcInputAndOutputVariables(minOccurences);

            var cvLogisticRegressionClassifier = CrossValidation.Create(
                k: numOfFolds,
                learner: (p) => new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                MaxIterations  = 100,
                Regularization = 1e-6
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: InputVariables,
                y: OutputVariables
                );

            // Run Cross-Validation
            Result = cvLogisticRegressionClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>;
        }
Пример #13
0
        public CrossValidationResult <LogisticRegression, double[], int> BuildModel(double[][] inputs, int[] outputs)
        {
            var cvLogisticRegressionClassifier =
                CrossValidation.Create <LogisticRegression,
                                        IterativeReweightedLeastSquares <LogisticRegression>,
                                        double[],
                                        int>(
                    k: _appSettings.ModelNumFolds,
                    learner: (p) => new IterativeReweightedLeastSquares <LogisticRegression>
            {
                MaxIterations  = 100,
                Regularization = 1e-6
            },
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: inputs,
                    y: outputs
                    );

            var result = cvLogisticRegressionClassifier.Learn(inputs, outputs);

            return(result);
        }
Пример #14
0
        private void Button5_Click(object sender, RoutedEventArgs e) //Generate
        {
            if (classes.Count < 2)
            {
                var dialogResult = System.Windows.MessageBox.Show(
                    "Please have at least two classes created to generate",
                    "Data generating error", System.Windows.MessageBoxButton.OK,
                    System.Windows.MessageBoxImage.Warning);
                return;
            }
            if (attrs.Count < 1)
            {
                var dialogResult = System.Windows.MessageBox.Show(
                    "Please have at least one attribute created to generate",
                    "Data generating error", System.Windows.MessageBoxButton.OK,
                    System.Windows.MessageBoxImage.Warning);
                return;
            }
            using (var dirB = new System.Windows.Forms.SaveFileDialog()) {
                dirB.Filter     = "Text Files | *.txt";
                dirB.DefaultExt = "txt";
                var res = dirB.ShowDialog();
                if (res == System.Windows.Forms.DialogResult.OK)
                {
                    List <float[]> attrValues  = new List <float[]>();
                    List <int>     classValues = new List <int>();

                    using (var file = new System.IO.StreamWriter(dirB.FileName)) {
                        string line;// = "Class";
                        //foreach (var v in attrs)
                        //    line += "," + v.Key;
                        //file.WriteLine(line);
                        for (int v = 0; v < classes.Count; v++)
                        {
                            //foreach (var v in classes)
                            for (int n = 0; n < classes[v].Value; n++)
                            {
                                line = classes[v].Key;
                                classValues.Add(v);
                                List <float> aVals = new List <float>();

                                for (int t = 0; t < classAttrs[v].Count; t++)
                                {
                                    float aVal = attrs[t].Value.genetare(classAttrs[v][t]);
                                    aVals.Add(aVal);
                                    line += "," + aVal.ToString(System.Globalization.CultureInfo.InvariantCulture);
                                }

                                attrValues.Add(aVals.ToArray <float>());
                                file.WriteLine(line);
                            }
                        }
                    }

                    var dialogResult = System.Windows.MessageBox.Show("Do you want to test the generated data?", "Data testing - crossvalidation", System.Windows.MessageBoxButton.YesNo);
                    if (dialogResult == MessageBoxResult.Yes)
                    {
                        float[][]  inputs   = attrValues.ToArray();
                        double[][] inputs_d = inputs.Select(xa => xa.Select(ya => (double)ya).ToArray()).ToArray();
                        int[][]    inputs_i = inputs.Select(xa => xa.Select(ya => (int)Math.Round(ya * 100)).ToArray()).ToArray();
                        int[]      outputs  = classValues.ToArray();


                        //var learn = new NaiveBayesLearning();
                        //NaiveBayes nb = learn.Learn(inputs, outputs);

                        var cv = CrossValidation.Create(
                            k: 4,
                            learner: (p) => new NaiveBayesLearning(),
                            loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                            fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                            x: inputs_i, y: outputs
                            );

                        var result = cv.Learn(inputs_i, outputs);

                        int numberOfSamples = result.NumberOfSamples;
                        int numberOfInputs  = result.NumberOfInputs;
                        int numberOfOutputs = result.NumberOfOutputs;

                        double trainingError       = result.Training.Mean;
                        double validationError     = result.Validation.Mean;
                        GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs_i, outputs);
                        double nb_accuracy         = gcm.Accuracy;

                        //..................
                        int classesSqrt = (int)Math.Round(Math.Sqrt(outputs.Length));

                        var crossvalidation = CrossValidation.Create(
                            k: 4,
                            learner: (p) => new KNearestNeighbors(k: classesSqrt),
                            loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                            fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                            x: inputs_d, y: outputs
                            );
                        var result2 = crossvalidation.Learn(inputs_d, outputs);
                        // We can grab some information about the problem:
                        numberOfSamples = result2.NumberOfSamples;
                        numberOfInputs  = result2.NumberOfInputs;
                        numberOfOutputs = result2.NumberOfOutputs;

                        trainingError   = result2.Training.Mean;
                        validationError = result2.Validation.Mean;

                        // If desired, compute an aggregate confusion matrix for the validation sets:
                        gcm = result2.ToConfusionMatrix(inputs_d, outputs);
                        double knn_accuracy = gcm.Accuracy;

                        //............................

                        var crossvalidationsvm = CrossValidation.Create(
                            k: 4,
                            learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
                        {
                            Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                            {
                                UseKernelEstimation = true
                            }
                        },
                            loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                            fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                            x: inputs_d, y: outputs
                            );
                        //crossvalidationReadsvm.ParallelOptions.MaxDegreeOfParallelism = 1;
                        var resultsvm = crossvalidationsvm.Learn(inputs_d, outputs);
                        // We can grab some information about the problem:
                        var numberOfSamplessvm = resultsvm.NumberOfSamples;
                        var numberOfInputssvm  = resultsvm.NumberOfInputs;
                        var numberOfOutputssvm = resultsvm.NumberOfOutputs;

                        var trainingErrorsvm   = resultsvm.Training.Mean;
                        var validationErrorsvm = resultsvm.Validation.Mean;

                        var    CMsvm        = resultsvm.ToConfusionMatrix(inputs_d, outputs);
                        double svm_accuracy = CMsvm.Accuracy;


                        System.Windows.MessageBox.Show("Naive Bayes Accuracy: " + (nb_accuracy * 100)
                                                       .ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                                                       + "%\n" +
                                                       "\nk Nearest Neighbors Accuracy: " + (knn_accuracy * 100)
                                                       .ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                                                       + "%\n" +
                                                       "\nSupport Vector Machine Accuracy: " + (svm_accuracy * 100)
                                                       .ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                                                       + "%\n", "Data testing - crossvalidation", System.Windows.MessageBoxButton.OK);
                        using (var write = new System.IO.StreamWriter("TestDataDump.txt"))
                        {
                            write.WriteLine("GeneratedDataAmt," + outputs.Length);
                            write.WriteLine("Accuracy," +
                                            (100.0 * knn_accuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                                            (100.0 * nb_accuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                                            (100.0 * svm_accuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                        }
                        //System.Diagnostics.Process.Start("TestDataDump.txt");
                        dialogResult = System.Windows.MessageBox.Show("Do you want to open the file with generated data?", "Data testing - extended data", System.Windows.MessageBoxButton.YesNo);
                        if (dialogResult == MessageBoxResult.Yes)
                        {
                            System.Diagnostics.Process.Start(dirB.FileName);
                        }
                    }
                }
            }
        }
Пример #15
0
        static public int[] ProbabilisticCoordinateDescent(double[][] input1, int[] labels, string SaveFile)
        {
            // http://accord-framework.net/docs/html/T_Accord_MachineLearning_VectorMachines_Learning_ProbabilisticCoordinateDescent.htm

            /* This class implements a SupportVectorMachine learning algorithm specifically crafted for
             * probabilistic linear machines only. It provides a L1- regularized coordinate descent learning
             * algorithm for optimizing the learning problem. The code has been based on liblinear's method
             * solve_l1r_lr method, whose original description is provided below.
             *
             * Liblinear's solver -s 6: L1R_LR. A coordinate descent algorithm for L1-regularized logistic
             * regression (probabilistic svm) problems.
             */

            int folds = 5;

            Accord.Math.Random.Generator.Seed = 0;
            var cv = CrossValidation.Create(

                k: folds,     // We will be using 10-fold cross validation

                // First we define the learning algorithm:
                learner: (p) => new ProbabilisticCoordinateDescent(),

                // Now we have to specify how the n.b. performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teach, x, y, w) => teach.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input1, y: labels
                );
            var cvresult = cv.Learn(input1, labels);
            GeneralConfusionMatrix gcm = cvresult.ToConfusionMatrix(input1, labels);

            var teacher = new ProbabilisticCoordinateDescent()
            {
                Tolerance  = 1e-10,
                Complexity = 1e+10,
                // learn a hard-margin model

                /* Complexity (cost) parameter C. Increasing the value of C forces the creation of a more
                 * accurate model that may not generalize well. If this value is not set and UseComplexityHeuristic
                 * is set to true, the framework will automatically guess a value for C. If this value is manually
                 * set to something else, then UseComplexityHeuristic will be automatically disabled and the given
                 * value will be used instead.
                 */
            };
            var             svm           = teacher.Learn(input1, labels);
            var             svmregression = (LogisticRegression)svm;
            ConfusionMatrix cm            = ConfusionMatrix.Estimate(svm, input1, labels);

            // accuracy, TP, FP, FN, TN and FScore Diagonal
            Utility.OutPutStats(cvresult.NumberOfSamples, cvresult.NumberOfInputs, cvresult.Training.Mean,
                                gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore);

            // Write the model out to a save file
            string modelsavefilename = SaveFile.Replace(".csv", ".PCD.save");

            svmregression.Save(modelsavefilename, compression: SerializerCompression.None);

            bool[] answers = svmregression.Decide(input1);
            return(Funcs.Utility.BoolToInt(answers));
        }
        private void GenerateBasedOnData()
        {
            List <string[]> generating = new List <string[]>(); // do ewentualnego sprawdzania

            var attrType = RemoveAt(this.attrType, 0);

            //tutaj dorzucam tworzenie wykresu ciągłego prawdopodobieństwa
            Spline3Deg[,] probabilities = new Spline3Deg[classes, attribs];
            for (int i = 0; i < attribs; i++)
            {
                if (attrType[i].Equals("double") || attrType[i].Equals("integer"))
                {
                    for (int j = 0; j < classes; j++)
                    {
                        int      c    = values.ElementAt(j).Value.Item2.ElementAt(i).Value.Count;
                        double[] y, x = new double[c];
                        SortedList <double, int> temp = new SortedList <double, int>();
                        foreach (var v in values.ElementAt(j).Value.Item2.ElementAt(i).Value)
                        {
                            int tI = v.Value; double tD = Double.Parse(v.Key.Replace(" ", string.Empty),
                                                                       System.Globalization.NumberStyles.AllowDecimalPoint,
                                                                       System.Globalization.NumberFormatInfo.InvariantInfo);
                            temp.Add(tD, tI);
                        }
                        y    = temp.Keys.ToArray();
                        x[0] = 0;
                        for (int k = 1; k < temp.Count; k++)
                        {
                            x[k] = x[k - 1] + temp.ElementAt(k - 1).Value + temp.ElementAt(k).Value;
                        }
                        probabilities[j, i] = new Spline3Deg(x, y);
                    }
                }
            }


            //do sprawdzania punktacji później
            //podzielić dane wejściowe i wygenerowane na klasy i artybuty
            var readClass  = new int[reading.Count];
            var readAttr_d = new double[reading.Count, reading.ElementAt(0).Length - 1].ToJagged();

            var stringIntCheatSheet = new Dictionary <string, int> [reading.ElementAt(0).Length];

            for (int i = 0; i < stringIntCheatSheet.Length; i++)
            {
                stringIntCheatSheet[i] = new Dictionary <string, int>();
            }

            for (int x = 0; x < reading.Count; x++)
            {
                for (int y = 0; y < reading.ElementAt(0).Length; y++)
                {
                    double rr = 0;
                    string ss = reading.ElementAt(x)[y];
                    if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint,
                                         System.Globalization.NumberFormatInfo.InvariantInfo, out rr) ||
                        y == 0)
                    {
                        if (!stringIntCheatSheet[y].ContainsKey(ss))
                        {
                            stringIntCheatSheet[y].Add(ss, stringIntCheatSheet[y].Count);
                        }
                        rr = stringIntCheatSheet[y][ss];
                    }
                    if (y == 0)
                    {
                        readClass[x] = (int)rr;
                    }
                    else
                    {
                        readAttr_d[x][y - 1] = rr;
                    }
                }
            }
            int readClassesSqrt = (int)Math.Round(Math.Sqrt(reading.Count)),
                genClassesSqrt, mixClassesSqrt;
            var learnKnn = new KNearestNeighbors(readClassesSqrt);

            var knn = learnKnn.Learn(readAttr_d, readClass);

            double[] attrcr = new double[attribs];


            string[] bestattr = new string[attribs];
            double   bestscore;

            //czas generować ten szajs
            var newStuff = new string[newData, attribs + 1];

            for (int it = 0; it < newStuff.GetLength(0); it++)
            {
                bestscore = 0;

                int cl = rnd.Next(classes); //rnd to zadelkarowany wcześniej Random //losowanie klasy
                newStuff[it, 0] = values.ElementAt(cl).Key;
                int safety = 0;
                do
                {
                    for (int v = 1; v <= attribs; v++)
                    {     //losowanie wartości atrybutu
                        if (attrType[v - 1].Equals("string"))
                        { //funkcja dyskretna
                            int val = rnd.Next(values.ElementAt(cl).Value.Item1);
                            int b   = 0;
                            foreach (var a in values.ElementAt(cl).Value.Item2[v])
                            {
                                if (val < (b += a.Value))
                                {
                                    newStuff[it, v] = a.Key; //na Monte Carlo
                                    break;
                                }
                            }
                        }
                        else
                        {  //funkcja ciągła
                            Tuple <double, double> extr = probabilities[cl, v - 1].Limits();
                            double val = rnd.Next((int)extr.Item1, (int)extr.Item2) + rnd.NextDouble();
                            double r   = probabilities[cl, v - 1].y(val);
                            if (attrType[v - 1].Equals("double"))
                            {
                                newStuff[it, v] = r.ToString(fltPrec, System.Globalization.CultureInfo.InvariantCulture);
                            }
                            else //if (attrType[v - 1].Equals("integer"))
                            {
                                newStuff[it, v] = Math.Round(r).ToString();
                            }
                        }//koniec losowania wartości atrybutu
                        ///ekstra warunek bezpieczeństwa, bo czasami trafiają się NULLe
                        if (string.IsNullOrEmpty(newStuff[it, v]))
                        {
                            v--;
                            continue; //jeśli atrybut ma nulla, powtórz pętlę
                        }
                        ///koniec ekstra warunku bespieczeństwa
                    }//koniec generowania obiektu


                    //do tabliczki do sprawdzenia punktacji
                    for (int v = 1; v <= attribs; v++)
                    {
                        double rr = 0;
                        string ss = newStuff[it, v];
                        if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint,
                                             System.Globalization.NumberFormatInfo.InvariantInfo, out rr))
                        {
                            if (!stringIntCheatSheet[v].ContainsKey(ss))
                            {
                                stringIntCheatSheet[v].Add(ss, stringIntCheatSheet[v].Count);
                            }
                            rr = stringIntCheatSheet[v][ss];
                        }
                        attrcr[v - 1] = rr;
                    }
                    if (knn.Score(attrcr, cl) > bestscore)
                    {
                        for (int iter = 0; iter < attribs; iter++)
                        {
                            bestattr[iter] = newStuff[it, iter + 1];
                        }
                    }
                } while (knn.Score(attrcr, cl) < scoreH / 100 && ++safety < 1000);

                for (int iter = 0; iter < attribs; iter++)
                {
                    newStuff[it, iter + 1] = bestattr[iter];
                }
            }//koniec całego generowania

            //tu dać zapis do pliku
            string savefiledir = "";

            using (var dirB = new System.Windows.Forms.SaveFileDialog())
            {
                dirB.Filter     = "Text Files | *.txt";
                dirB.DefaultExt = "txt";
                var res = dirB.ShowDialog();
                if (res == System.Windows.Forms.DialogResult.OK)
                {
                    using (var write = new System.IO.StreamWriter(savefiledir = dirB.FileName))
                    {
                        for (int x = 0; x < newStuff.GetLength(0); x++)
                        {
                            string line = "";
                            for (int y = 0; y < newStuff.GetLength(1); y++)
                            {
                                line += newStuff[x, y] + ',';
                            }
                            line = line.Remove(line.Length - 1);
                            string[] temp = line.Split(',');
                            generating.Add(line.Split(','));
                            swap(ref temp[0], ref temp[clsCol]);
                            line = "";
                            for (int y = 0; y < temp.Length; y++)
                            {
                                line += temp[y] + ',';
                            }
                            line = line.Remove(line.Length - 1);
                            write.WriteLine(line);
                        }
                    }
                }
                else
                {
                    return;
                }
            }
            //tu dać walidację wygenerowanych danych

            var dialogResult = System.Windows.MessageBox.Show("Do you want to test the generated data?", "Data testing - extended data", System.Windows.MessageBoxButton.YesNo);

            if (dialogResult == MessageBoxResult.Yes)
            {
                var genClass = new int[generating.Count];
                //var genAttr = new int[generating.Count, generating.ElementAt(0).Length - 1].ToJagged();
                var genAttr_d = new double[generating.Count, generating.ElementAt(0).Length - 1].ToJagged();


                for (int x = 0; x < generating.Count; x++)
                {
                    for (int y = 0; y < generating.ElementAt(0).Length; y++)
                    {
                        double rr = 0;
                        string ss = generating.ElementAt(x)[y];
                        if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint,
                                             System.Globalization.NumberFormatInfo.InvariantInfo, out rr) || y == 0)
                        {
                            if (!stringIntCheatSheet[y].ContainsKey(ss))
                            {
                                stringIntCheatSheet[y].Add(ss, stringIntCheatSheet[y].Count);
                            }
                            rr = stringIntCheatSheet[y][ss];
                        }
                        if (y == 0)
                        {
                            genClass[x] = (int)rr;
                        }
                        else
                        {
                            genAttr_d[x][y - 1] = rr;
                        }
                    }
                }

                //przerobienie na tablicę intów, z przesunięciem dobli o precyzję
                var genAttr_i  = new int[generating.Count, generating.ElementAt(0).Length - 1].ToJagged();
                var readAttr_i = new int[reading.Count, reading.ElementAt(0).Length - 1].ToJagged();

                int shift = (int)Math.Pow(10, FltPrecBox.SelectedIndex + 1);
                for (int x = 0; x < generating.Count; x++)
                {
                    for (int y = 0; y < generating.ElementAt(0).Length - 1; y++)
                    {
                        if (attrType[y].Equals("double"))
                        {
                            genAttr_i[x][y] = (int)(genAttr_d[x][y] * shift);
                        }
                        else
                        {
                            genAttr_i[x][y] = (int)genAttr_d[x][y];
                        }
                    }
                }
                for (int x = 0; x < reading.Count; x++)
                {
                    for (int y = 0; y < reading.ElementAt(0).Length - 1; y++)
                    {
                        if (attrType[y].Equals("double"))
                        {
                            readAttr_i[x][y] = (int)(readAttr_d[x][y] * shift);
                        }
                        else
                        {
                            readAttr_i[x][y] = (int)readAttr_d[x][y];
                        }
                    }
                }


                int correctnb = 0, incorrectnb = 0, correctknn = 0, incorrectknn = 0, correctsvm = 0, incorrectsvm = 0;

                var        learn = new NaiveBayesLearning();
                NaiveBayes nb    = learn.Learn(readAttr_i, readClass);
                var        test  = nb.Decide(genAttr_i);
                foreach (var v in test)
                {
                    if (v.Equals(genClass[test.IndexOf(v)]))
                    {
                        correctnb++;
                    }
                    else
                    {
                        incorrectnb++;
                    }
                }

                /////////////////////////////////////////////////////////////////////////

                var testknn = knn.Decide(genAttr_d);
                for (int i = 0; i < testknn.Length; i++)
                //foreach (var v in testknn)
                {
                    if (testknn[i].Equals(genClass[i]))
                    {
                        correctknn++;
                    }
                    else
                    {
                        incorrectknn++;
                    }
                }
                /////////////////////////////////////////////////////////////////////////

                try
                {
                    var teach = new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        // Configure the learning algorithm to use SMO to train the
                        //  underlying SVMs in each of the binary class subproblems.
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            // Estimate a suitable guess for the Gaussian kernel's parameters.
                            // This estimate can serve as a starting point for a grid search.
                            UseKernelEstimation = true
                        }
                    };
                    var svm = teach.Learn(readAttr_d, readClass);

                    var testsvm = svm.Decide(genAttr_d);
                    for (int i = 0; i < testsvm.Length; i++)
                    //foreach (var v in testknn)
                    {
                        if (testsvm[i].Equals(genClass[i]))
                        {
                            correctsvm++;
                        }
                        else
                        {
                            incorrectsvm++;
                        }
                    }
                }
                catch (AggregateException) { }
                ////////////////////////////////////////////////////////////

                double[][] mixAttr_d = new double[genAttr_d.GetLength(0) + readAttr_d.GetLength(0),
                                                  genAttr_d[0].Length].ToJagged();
                int[] mixClass = new int[genClass.Length + readClass.Length];

                Array.Copy(readClass, mixClass, readClass.Length);
                Array.Copy(genClass, 0, mixClass, readClass.Length, genClass.Length);

                Array.Copy(readAttr_d, mixAttr_d, readAttr_d.Length);
                Array.Copy(genAttr_d, 0, mixAttr_d, readAttr_d.Length, genAttr_d.Length);

                int[][] mixAttr_i = new int[genAttr_i.GetLength(0) + readAttr_i.GetLength(0),
                                            genAttr_i[0].Length].ToJagged();

                Array.Copy(readAttr_i, mixAttr_i, readAttr_i.Length);
                Array.Copy(genAttr_i, 0, mixAttr_i, readAttr_i.Length, genAttr_i.Length);

                //KROSWALIDACJAAAAAAAAAAAAAAAAAA
                genClassesSqrt = (int)Math.Round(Math.Sqrt(genClass.Length));
                mixClassesSqrt = (int)Math.Round(Math.Sqrt(mixClass.Length));

                //KNN

                var crossvalidationRead = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new KNearestNeighbors(k: readClassesSqrt),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: readAttr_d, y: readClass
                    );
                var resultRead = crossvalidationRead.Learn(readAttr_d, readClass);
                // We can grab some information about the problem:
                var numberOfSamplesRead = resultRead.NumberOfSamples;
                var numberOfInputsRead  = resultRead.NumberOfInputs;
                var numberOfOutputsRead = resultRead.NumberOfOutputs;

                var trainingErrorRead   = resultRead.Training.Mean;
                var validationErrorRead = resultRead.Validation.Mean;

                var    readCM       = resultRead.ToConfusionMatrix(readAttr_d, readClass);
                double readAccuracy = readCM.Accuracy;
                //////////////////////////////////////////////////////////
                var crossvalidationGen = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new KNearestNeighbors(k: genClassesSqrt),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: genAttr_d, y: genClass
                    );
                var resultGen = crossvalidationGen.Learn(genAttr_d, genClass);
                // We can grab some information about the problem:
                var numberOfSamplesGen = resultGen.NumberOfSamples;
                var numberOfInputsGen  = resultGen.NumberOfInputs;
                var numberOfOutputsGen = resultGen.NumberOfOutputs;

                var    trainingErrorGen   = resultGen.Training.Mean;
                var    validationErrorGen = resultGen.Validation.Mean;
                var    genCM       = resultGen.ToConfusionMatrix(genAttr_d, genClass);
                double genAccuracy = genCM.Accuracy;
                //////////////////////////////////////////////////////////

                var crossvalidationMix = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new KNearestNeighbors(k: mixClassesSqrt),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: mixAttr_d, y: mixClass
                    );
                var resultMix = crossvalidationMix.Learn(readAttr_d, readClass);
                // We can grab some information about the problem:
                var numberOfSamplesMix = resultMix.NumberOfSamples;
                var numberOfInputsMix  = resultMix.NumberOfInputs;
                var numberOfOutputsMix = resultMix.NumberOfOutputs;

                var trainingErrorMix   = resultMix.Training.Mean;
                var validationErrorMix = resultMix.Validation.Mean;

                var    mixCM       = resultMix.ToConfusionMatrix(mixAttr_d, mixClass);
                double mixAccuracy = mixCM.Accuracy;

                //NB
                var crossvalidationReadnb = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new NaiveBayesLearning(),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: readAttr_i, y: readClass
                    );
                var resultReadnb = crossvalidationReadnb.Learn(readAttr_i, readClass);
                // We can grab some information about the problem:
                var numberOfSamplesReadnb = resultReadnb.NumberOfSamples;
                var numberOfInputsReadnb  = resultReadnb.NumberOfInputs;
                var numberOfOutputsReadnb = resultReadnb.NumberOfOutputs;

                var trainingErrorReadnb   = resultReadnb.Training.Mean;
                var validationErrorReadnb = resultReadnb.Validation.Mean;

                var    readCMnb       = resultReadnb.ToConfusionMatrix(readAttr_i, readClass);
                double readAccuracynb = readCMnb.Accuracy;
                //////////////////////////////////////////////////////////
                var crossvalidationGennb = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new NaiveBayesLearning(),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: genAttr_i, y: genClass
                    );
                var resultGennb = crossvalidationGennb.Learn(genAttr_i, genClass);
                // We can grab some information about the problem:
                var numberOfSamplesGennb = resultGennb.NumberOfSamples;
                var numberOfInputsGennb  = resultGennb.NumberOfInputs;
                var numberOfOutputsGennb = resultGennb.NumberOfOutputs;

                var    trainingErrorGennb   = resultGennb.Training.Mean;
                var    validationErrorGennb = resultGennb.Validation.Mean;
                var    genCMnb       = resultGennb.ToConfusionMatrix(genAttr_i, genClass);
                double genAccuracynb = genCMnb.Accuracy;
                //////////////////////////////////////////////////////////

                var crossvalidationMixnb = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new NaiveBayesLearning(),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: mixAttr_i, y: mixClass
                    );
                var resultMixnb = crossvalidationMixnb.Learn(mixAttr_i, mixClass);
                // We can grab some information about the problem:
                var numberOfSamplesMixnb = resultMixnb.NumberOfSamples;
                var numberOfInputsMixnb  = resultMixnb.NumberOfInputs;
                var numberOfOutputsMixnb = resultMixnb.NumberOfOutputs;

                var trainingErrorMixnb   = resultMixnb.Training.Mean;
                var validationErrorMixnb = resultMixnb.Validation.Mean;

                var    mixCMnb       = resultMixnb.ToConfusionMatrix(mixAttr_i, mixClass);
                double mixAccuracynb = mixCMnb.Accuracy;

                //SVM
                double readAccuracysvm = 0, genAccuracysvm = 0, mixAccuracysvm = 0;
                try
                {
                    var crossvalidationReadsvm = CrossValidation.Create(
                        k: 4,
                        learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            UseKernelEstimation = true
                        }
                    },
                        loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                        fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                        x: readAttr_d, y: readClass
                        );
                    //crossvalidationReadsvm.ParallelOptions.MaxDegreeOfParallelism = 1;
                    var resultReadsvm = crossvalidationReadsvm.Learn(readAttr_d, readClass);
                    // We can grab some information about the problem:
                    var numberOfSamplesReadsvm = resultReadsvm.NumberOfSamples;
                    var numberOfInputsReadsvm  = resultReadsvm.NumberOfInputs;
                    var numberOfOutputsReadsvm = resultReadsvm.NumberOfOutputs;

                    var trainingErrorReadsvm   = resultReadsvm.Training.Mean;
                    var validationErrorReadsvm = resultReadsvm.Validation.Mean;

                    var readCMsvm = resultReadsvm.ToConfusionMatrix(readAttr_d, readClass);
                    readAccuracysvm = readCMsvm.Accuracy;
                }
                catch (AggregateException) { }
                //////////////////////////////////////////////////////////
                try
                {
                    var crossvalidationGensvm = CrossValidation.Create(
                        k: 4,
                        learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            UseKernelEstimation = true
                        }
                    },
                        loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                        fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                        x: genAttr_d, y: genClass
                        );
                    var resultGensvm = crossvalidationGensvm.Learn(genAttr_d, genClass);
                    // We can grab some information about the problem:
                    var numberOfSamplesGensvm = resultGensvm.NumberOfSamples;
                    var numberOfInputsGensvm  = resultGensvm.NumberOfInputs;
                    var numberOfOutputsGensvm = resultGensvm.NumberOfOutputs;

                    var trainingErrorGensvm   = resultGensvm.Training.Mean;
                    var validationErrorGensvm = resultGensvm.Validation.Mean;
                    var genCMsvm = resultGensvm.ToConfusionMatrix(genAttr_d, genClass);
                    genAccuracysvm = genCMsvm.Accuracy;
                }
                catch (AggregateException) { }
                //////////////////////////////////////////////////////////
                try
                {
                    var crossvalidationMixsvm = CrossValidation.Create(
                        k: 4,
                        learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            UseKernelEstimation = true
                        }
                    },
                        loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                        fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                        x: mixAttr_d, y: mixClass
                        );
                    var resultMixsvm = crossvalidationMixsvm.Learn(mixAttr_d, mixClass);
                    // We can grab some information about the problem:
                    var numberOfSamplesMixsvm = resultMixsvm.NumberOfSamples;
                    var numberOfInputsMixsvm  = resultMixsvm.NumberOfInputs;
                    var numberOfOutputsMixsvm = resultMixsvm.NumberOfOutputs;

                    var trainingErrorMixsvm   = resultMixsvm.Training.Mean;
                    var validationErrorMixsvm = resultMixsvm.Validation.Mean;

                    var mixCMsvm = resultMixsvm.ToConfusionMatrix(mixAttr_d, mixClass);
                    mixAccuracysvm = mixCMsvm.Accuracy;
                }
                catch (AggregateException) { }
                /////////////////////////////////////////////////
                if (correctsvm == 0 && incorrectsvm == 0)
                {
                    incorrectsvm = 1;
                }
                double knnRatio = 100.0 * correctknn / (correctknn + incorrectknn),
                       nbRatio  = 100.0 * correctnb / (correctnb + incorrectnb),
                       svmRatio = 100.0 * correctsvm / (correctsvm + incorrectsvm);
                System.Windows.MessageBox.Show(
                    "K Nearest Neighbours Classification:\nGenerated Data Correct Ratio: " +
                    knnRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" +
                    "Original Data X-Validation Accuracy: "
                    + (100.0 * readAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Generated Data X-Validation Accuracy: "
                    + (100.0 * genAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Mixed Data X-Validation Accuracy: "
                    + (100.0 * mixAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n"
                    + "\n\n" + "Naive Bayes Classification:\nGenerated Data Correct Ratio: " +
                    nbRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" +
                    "Original Data X-Validation Accuracy: "
                    + (100.0 * readAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Generated Data X-Validation Accuracy: "
                    + (100.0 * genAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Mixed Data X-Validation Accuracy: "
                    + (100.0 * mixAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" +
                    "\n\n" + "Support Vector Machine Classification:\nGenerated Data Correct Ratio: " +
                    svmRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" +
                    "Original Data X-Validation Accuracy: "
                    + (100.0 * readAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Generated Data X-Validation Accuracy: "
                    + (100.0 * genAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Mixed Data X-Validation Accuracy: "
                    + (100.0 * mixAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n",
                    "Data Testing - extending dataset",
                    System.Windows.MessageBoxButton.OK);

                /*
                 * ///TEMP - do eksportowania danych do arkusza
                 *
                 *  using (var write = new System.IO.StreamWriter("TestDataDump.txt")){
                 *      write.WriteLine("ScoreTreshold," + scoreH.ToString());
                 *      write.WriteLine("NewDataAmt," + newData.ToString());
                 *      write.WriteLine("Generated Data Correct Ratio," +
                 *          knnRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          nbRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) +"," +
                 *          svmRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *      write.WriteLine("Original Data X-Validation Accuracy," +
                 *          (100.0 * readAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * readAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * readAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *      write.WriteLine("Generated Data X-Validation Accuracy," +
                 *          (100.0 * genAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * genAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * genAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *  write.WriteLine("Mixed Data X-Validation Accuracy," +
                 *          (100.0 * mixAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * mixAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * mixAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *
                 * }
                 *  System.Diagnostics.Process.Start("TestDataDump.txt");
                 */
            }
            dialogResult = System.Windows.MessageBox.Show("Do you want to open the file with generated data?", "Data testing - extended data", System.Windows.MessageBoxButton.YesNo);
            if (dialogResult == MessageBoxResult.Yes)
            {
                System.Diagnostics.Process.Start(savefiledir);
            }
        }
Пример #17
0
        public void CrossValidationTest()
        {
            #region doc_cross_validation
            // Ensure we have reproducible results
            Accord.Math.Random.Generator.Seed = 0;

            // Get some data to be learned. We will be using the Wiconsin's
            // (Diagnostic) Breast Cancer dataset, where the goal is to determine
            // whether the characteristics extracted from a breast cancer exam
            // correspond to a malignant or benign type of cancer:
            var        data   = new WisconsinDiagnosticBreastCancer();
            double[][] input  = data.Features;    // 569 samples, 30-dimensional features
            int[]      output = data.ClassLabels; // 569 samples, 2 different class labels

            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 5 and where variables
            // are able to join the decision path at most 2 times during evaluation:
            var cv = CrossValidation.Create(

                k: 10,                            // We will be using 10-fold cross validation

                learner: (p) => new C45Learning() // here we create the learning algorithm
            {
                Join      = 2,
                MaxHeight = 5
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input, y: output
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(input, output);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;    // should be 569
            int numberOfInputs  = result.NumberOfInputs;     // should be 30
            int numberOfOutputs = result.NumberOfOutputs;    // should be 2

            double trainingError   = result.Training.Mean;   // should be 0
            double validationError = result.Validation.Mean; // should be 0.089661654135338359
            #endregion

            Assert.AreEqual(569, numberOfSamples);
            Assert.AreEqual(30, numberOfInputs);
            Assert.AreEqual(2, numberOfOutputs);

            Assert.AreEqual(10, cv.K);
            Assert.AreEqual(0.017770391691033137, result.Training.Mean, 1e-10);
            Assert.AreEqual(0.077318295739348369, result.Validation.Mean, 1e-10);

            Assert.AreEqual(3.0913682243756776E-05, result.Training.Variance, 1e-10);
            Assert.AreEqual(0.00090104473101439207, result.Validation.Variance, 1e-10);

            Assert.AreEqual(10, cv.Folds.Length);
            Assert.AreEqual(10, result.Models.Length);

            var tree   = result.Models[0].Model;
            int height = tree.GetHeight();
            Assert.AreEqual(5, height);

            cv = CrossValidation.Create(
                k: 10,
                learner: (p) => new C45Learning()
            {
                Join         = 1,
                MaxHeight    = 1,
                MaxVariables = 1
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: input, y: output
                );

            result = cv.Learn(input, output);

            tree   = result.Models[0].Model;
            height = tree.GetHeight();

            Assert.AreEqual(1, height);

            Assert.AreEqual(0.10896305433723197, result.Training.Mean, 5e-3);
            Assert.AreEqual(0.1125, result.Validation.Mean, 1e-10);

            Assert.AreEqual(2.1009258672955873E-05, result.Training.Variance, 1e-10);
            Assert.AreEqual(0.0017292179645018977, result.Validation.Variance, 1e-10);
        }
Пример #18
0
        public static void DecisionTree_crossValidation(double[][] inputs, int[] outputs)
        {
            // Ensure we have reproducible results
            Accord.Math.Random.Generator.Seed = 0;



            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 5 and where variables
            // are able to join the decision path at most 2 times during evaluation:
            var cv = CrossValidation.Create(

                k: 10,                            // We will be using 10-fold cross validation

                learner: (p) => new C45Learning() // here we create the learning algorithm
            {
                Join      = 2,
                MaxHeight = 5
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(inputs, outputs);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;    // should be 569
            int numberOfInputs  = result.NumberOfInputs;     // should be 30
            int numberOfOutputs = result.NumberOfOutputs;    // should be 2

            double trainingError   = result.Training.Mean;   // should be 0.017771153143274855
            double validationError = result.Validation.Mean; // should be 0.0755952380952381

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs);
            double accuracy            = gcm.Accuracy; // result should be 0.92442882249560632

            Console.WriteLine("Accuracy:" + gcm.Accuracy);
            Console.WriteLine("Error:" + gcm.Error);

            Console.WriteLine("Not Anomaly Precision:" + gcm.Precision[0]);
            Console.WriteLine("Not Anomaly Recall:" + gcm.Recall[0]);
            Console.WriteLine("Anomaly Precision:" + gcm.Precision[1]);
            Console.WriteLine("Anomaly Recall:" + gcm.Recall[1]);

            double anomalyFScore    = 2 * (gcm.Precision[1] * gcm.Recall[1]) / (gcm.Precision[1] + gcm.Recall[1]);
            double NotAnomalyFScore = 2 * (gcm.Precision[0] * gcm.Recall[0]) / (gcm.Precision[0] + gcm.Recall[0]);

            Console.WriteLine("Not ANomaly F-score:" + NotAnomalyFScore);
            Console.WriteLine("Anomaly F-score:" + anomalyFScore);
        }
Пример #19
0
        static void Main(string[] args)
        {
            /*
             * Takes a csv files as input and trains a naive bayes classfier, if the test flag is set the rountine
             * will calculate the accuracy of the input files using the previous saved model in the exeution directioy
             * If the test flag is set a new classifier is not trainied
             * but the previous model is loaded and used agains the test data.
             *
             * arg 1 = training file or test file
             * arg 2 = label file
             * arg 3 = test flag (-s or -S)
             * arg 4 = Specify file name of model file
             */

            const int minargs = 2;
            const int maxargs = 4;
            const int Folds   = 4;

            Accord.Math.Random.Generator.Seed = 0;
            string trainingFname = null;
            string labelFname    = null;
            string modelFname    = "NBmodel.sav"; // Default model file name
            bool   NoTrain       = false;

            Functions.Welcome();
            int numArgs = Functions.parseCommandLine(args, maxargs, minargs);

            if (numArgs == 0)
            {
                Console.WriteLine(Strings.resources.usage);
                System.Environment.Exit(1);
            }

            if (numArgs == 2)
            {
                trainingFname = args[0];
                labelFname    = args[1];
            }
            if (numArgs == 3) // no use for third parameter yet!
            {
                if (args[2] == ("-s") | args[2] == ("-S"))
                {
                    NoTrain       = true;
                    trainingFname = args[0];
                    labelFname    = args[1];
                }
                else
                {
                    Console.WriteLine(Strings.resources.usage);
                    System.Environment.Exit(1);
                }
            }

            if (numArgs == 4)
            {
                NoTrain       = true;
                trainingFname = args[0];
                labelFname    = args[1];
                modelFname    = args[3];
            }
            //
            // Check if the training and label files exist and are not locked by anohter process
            //

            if (!Utility.Functions.checkFile(trainingFname))
            {
                Console.WriteLine("Error opening file{0}", trainingFname);
                System.Environment.Exit(1);
            }
            if (!Functions.checkFile(labelFname))
            {
                Console.WriteLine("Error opening file {0}", labelFname);
                System.Environment.Exit(1);
            }

            //
            // Read in the training and label files, CSV format
            //
            CsvReader training_samples = new CsvReader(trainingFname, false);

            int[,] MatrixIn = training_samples.ToMatrix <int>();
            int[][] trainingset = Functions.convertToJaggedArray(MatrixIn);

            //
            // Naive Bayes gets trained on integer arrays or arrays of "strings"
            //
            CsvReader label_samples = new CsvReader(labelFname, false);

            int[,] labelsIn = label_samples.ToMatrix <int>(); // COnvert the labels to a matrix and then to jagged array
            int[][] LabelSet = Functions.convertToJaggedArray(labelsIn);
            int[]   output   = Functions.convertTointArray(LabelSet);

            NaiveBayes loaded_nb;   // setup for loading a trained model if one exists

            if (!NoTrain)
            {
                // Create a new Naive Bayes learning instance
                var learner = new NaiveBayesLearning();

                // Create a Naive Bayes classifier and train with the input datasets
                NaiveBayes classifier = learner.Learn(trainingset, output);

                /* Cross-validation is a technique for estimating the performance of a predictive model.
                 * It can be used to measure how the results of a statistical analysis will generalize to
                 * an independent data set. It is mainly used in settings where the goal is prediction, and
                 * one wants to estimate how accurately a predictive model will perform in practice.
                 *
                 * One round of cross-validation involves partitioning a sample of data into complementary
                 * subsets, performing the analysis on one subset (called the training set), and validating
                 * the analysis on the other subset (called the validation set or testing set). To reduce
                 * variability, multiple rounds of cross-validation are performed using different partitions,
                 * and the validation results are averaged over the rounds
                 */

                // Gets results based on performing a k-fold cross validation based on the input training set
                // Create a cross validation instance


                var cv = CrossValidation.Create(k: Folds, learner: (p) => new NaiveBayesLearning(),
                                                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                                                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                                                x: trainingset, y: output);

                var result = cv.Learn(trainingset, output);

                Console.WriteLine("Performing n-fold cross validation where n = {0}", cv.K);

                // We can grab some information about the problem:
                Console.WriteLine("Cross Validation Results");
                Console.WriteLine("     number of samples {0}", result.NumberOfSamples);
                Console.WriteLine("     number of features: {0}", result.NumberOfInputs);
                Console.WriteLine("     number of outputs {0}", result.NumberOfOutputs);
                Console.WriteLine("     Training Error: {0:n2}", result.Training.Mean); // should be 0 or no
                Console.WriteLine("     Validation Mean: {0}\n", result.Validation.Mean);

                Console.WriteLine("Creating General Confusion Matrix from Cross Validation");
                GeneralConfusionMatrix gcm = result.ToConfusionMatrix(trainingset, output);
                double accuracy            = gcm.Accuracy; // should be 0.625
                Console.WriteLine(" GCM Accuracy {0}%\n", accuracy * 100);


                ConfusionMatrix cm = ConfusionMatrix.Estimate(classifier, trainingset, output);
                Console.WriteLine("Confusion Error {0}", cm.Error);
                Console.WriteLine("Confusion accuracy {0}", cm.Accuracy);
                double tp     = cm.TruePositives;
                double tn     = cm.TrueNegatives;
                double fscore = cm.FScore;
                double fp     = cm.FalsePositives;
                double fn     = cm.FalseNegatives;
                Console.WriteLine("TP = {0},TN = {1}, FP = {2}, FN = {3}, Fscore = {4} ", tp, tn, fp, fn, fscore);


                // Save the model created from the training set

                classifier.Save("NBmodel.sav", compression: SerializerCompression.None);
                Console.WriteLine("Successfully saved the model");
            }
            else
            {
                // load a previous model
                loaded_nb = Serializer.Load <NaiveBayes>(modelFname);                               // Load the model
                int[]  results  = loaded_nb.Decide(trainingset);                                    // Make preditions from the input
                double accuracy = Functions.CalculateAccuraccy(output, results);
                Console.WriteLine("Accuracy of predictions = {0}%", Math.Round(accuracy * 100, 2)); // Compare the predicions to the labels
            }
        }
Пример #20
0
        public void learn_test_simple()
        {
            #region doc_learn_simple
            // Ensure results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // This is a sample code on how to use Cross-Validation
            // to assess the performance of Support Vector Machines.

            // Consider the example binary data. We will be trying
            // to learn a XOR problem and see how well does SVMs
            // perform on this data.

            double[][] data =
            {
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
            };

            int[] xor = // result of xor for the sample input data
            {
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
            };


            // Create a new Cross-validation algorithm passing the data set size and the number of folds
            var crossvalidation = CrossValidation.Create(
                k: 3, // Use 3 folds in cross-validation

                // Indicate how learning algorithms for the models should be created
                learner: (s) => new SequentialMinimalOptimization <Linear>()
            {
                Complexity = 100
            },

                // Indicate how the performance of those models will be measured
                loss: (expected, actual, p) => new ZeroOneLoss(expected).Loss(actual),

                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: data,
                y: xor
                );

            // If needed, control the parallelization degree
            crossvalidation.ParallelOptions.MaxDegreeOfParallelism = 1;

            var result = crossvalidation.Learn(data, xor);

            // Finally, access the measured performance.
            double trainingErrors   = result.Training.Mean;
            double validationErrors = result.Validation.Mean;
            #endregion

            Assert.AreEqual(3, crossvalidation.K);
            Assert.AreEqual(0.37575757575757579, result.Training.Mean, 1e-10);
            Assert.AreEqual(0.75555555555555554, result.Validation.Mean, 1e-10);

            Assert.AreEqual(0.00044077134986225924, result.Training.Variance, 1e-10);
            Assert.AreEqual(0.0059259259259259334, result.Validation.Variance, 1e-10);

            Assert.AreEqual(0.020994555243259126, result.Training.StandardDeviation, 1e-10);
            Assert.AreEqual(0.076980035891950155, result.Validation.StandardDeviation, 1e-10);

            Assert.AreEqual(0, result.Training.PooledStandardDeviation);
            Assert.AreEqual(0, result.Validation.PooledStandardDeviation);

            Assert.AreEqual(3, crossvalidation.Folds.Length);
            Assert.AreEqual(3, result.Models.Length);
        }
Пример #21
0
        static void Main(string[] args)
        {
            // Read in the file we created in the Data Preparation step
            // TODO: change the path to point to your data directory
            string dataDirPath = "\\\\Mac\\Home\\Documents\\c-sharp-machine-learning\\ch.2\\output";
            // Load the data into a data frame and set the "emailNum" column as an index
            var wordVecDF = Frame.ReadCsv(
                Path.Combine(dataDirPath, "data-preparation-step\\subjectWordVec-alphaonly.csv"),
                hasHeaders: true,
                inferTypes: true
                );
            // Load the transformed data from data preparation step to get "is_ham" column
            var rawDF = Frame.ReadCsv(
                Path.Combine(dataDirPath, "data-preparation-step\\transformed.csv"),
                hasHeaders: true,
                inferTypes: false,
                schema: "int,string,string,int"
                ).IndexRows <int>("emailNum").SortRowsByKey();
            // Load Term Frequency Data
            var spamTermFrequencyDF = Frame.ReadCsv(
                Path.Combine(dataDirPath, "data-analysis-step\\frequency-alphaonly\\subject-line\\spam-frequencies-after-stopwords.csv"),
                hasHeaders: false,
                inferTypes: false,
                schema: "string,int"
                );

            spamTermFrequencyDF.RenameColumns(new string[] { "word", "num_occurences" });
            var indexedSpamTermFrequencyDF = spamTermFrequencyDF.IndexRows <string>("word");

            // Change number of features to reduce overfitting
            int minNumOccurences = 1;

            string[] wordFeatures = indexedSpamTermFrequencyDF.Where(
                x => x.Value.GetAs <int>("num_occurences") >= minNumOccurences
                ).RowKeys.ToArray();
            Console.WriteLine("Num Features Selected: {0}", wordFeatures.Count());

            // subtracting "is_ham" values from 1 to encode this target variable with 1 for spam emails
            var targetVariables = 1 - rawDF.GetColumn <int>("is_ham");

            Console.WriteLine("{0} spams vs. {1} hams", targetVariables.NumSum(), (targetVariables.KeyCount - targetVariables.NumSum()));

            // Create input and output variables from data frames, so that we can use them for Accord.NET MachineLearning models
            double[][] input = wordVecDF.Columns[wordFeatures].Rows.Select(
                x => Array.ConvertAll <object, double>(x.Value.ValuesAll.ToArray(), o => Convert.ToDouble(o))
                ).ValuesAll.ToArray();
            int[] output = targetVariables.Values.ToArray();

            // Number of folds
            int numFolds = 3;

            var cvNaiveBayesClassifier = CrossValidation.Create <NaiveBayes <BernoulliDistribution>, NaiveBayesLearning <BernoulliDistribution>, double[], int>(
                // number of folds
                k: numFolds,
                // Naive Bayes Classifier with Binomial Distribution
                learner: (p) => new NaiveBayesLearning <BernoulliDistribution>(),
                // Using Zero-One Loss Function as a Cost Function
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                // Fitting a classifier
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                // Input with Features
                x: input,
                // Output
                y: output
                );

            // Run Cross-Validation
            var result = cvNaiveBayesClassifier.Learn(input, output);

            // Sample Size
            int numberOfSamples = result.NumberOfSamples;
            int numberOfInputs  = result.NumberOfInputs;
            int numberOfOutputs = result.NumberOfOutputs;

            // Training & Validation Errors
            double trainingError   = result.Training.Mean;
            double validationError = result.Validation.Mean;

            // Confusion Matrix
            Console.WriteLine("\n---- Confusion Matrix ----");
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output);

            Console.WriteLine("");
            Console.Write("\t\tActual 0\t\tActual 1\n");
            for (int i = 0; i < gcm.Matrix.GetLength(0); i++)
            {
                Console.Write("Pred {0} :\t", i);
                for (int j = 0; j < gcm.Matrix.GetLength(1); j++)
                {
                    Console.Write(gcm.Matrix[i, j] + "\t\t\t");
                }
                Console.WriteLine();
            }

            Console.WriteLine("\n---- Sample Size ----");
            Console.WriteLine("# samples: {0}, # inputs: {1}, # outputs: {2}", numberOfSamples, numberOfInputs, numberOfOutputs);
            Console.WriteLine("training error: {0}", trainingError);
            Console.WriteLine("validation error: {0}\n", validationError);

            Console.WriteLine("\n---- Calculating Accuracy, Precision, Recall ----");

            float truePositive  = (float)gcm.Matrix[1, 1];
            float trueNegative  = (float)gcm.Matrix[0, 0];
            float falsePositive = (float)gcm.Matrix[1, 0];
            float falseNegative = (float)gcm.Matrix[0, 1];

            // Accuracy
            Console.WriteLine(
                "Accuracy: {0}",
                (truePositive + trueNegative) / numberOfSamples
                );
            // True-Positive / (True-Positive + False-Positive)
            Console.WriteLine("Precision: {0}", (truePositive / (truePositive + falsePositive)));
            // True-Positive / (True-Positive + False-Negative)
            Console.WriteLine("Recall: {0}", (truePositive / (truePositive + falseNegative)));

            Console.ReadKey();
        }
Пример #22
0
        public void CrossValidationTest()
        {
            #region doc_cross_validation
            // Ensure we have reproducible results
            Accord.Math.Random.Generator.Seed = 0;

            // Get some data to be learned. We will be using the Wiconsin's
            // (Diagnostic) Breast Cancer dataset, where the goal is to determine
            // whether the characteristics extracted from a breast cancer exam
            // correspond to a malignant or benign type of cancer:
            var        data   = new WisconsinDiagnosticBreastCancer();
            double[][] input  = data.Features;    // 569 samples, 30-dimensional features
            int[]      output = data.ClassLabels; // 569 samples, 2 different class labels

            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 5 and where variables
            // are able to join the decision path at most 2 times during evaluation:
            var cv = CrossValidation.Create(

                k: 10,                            // We will be using 10-fold cross validation

                learner: (p) => new C45Learning() // here we create the learning algorithm
            {
                Join      = 2,
                MaxHeight = 5
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input, y: output
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(input, output);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;    // should be 569
            int numberOfInputs  = result.NumberOfInputs;     // should be 30
            int numberOfOutputs = result.NumberOfOutputs;    // should be 2

            double trainingError   = result.Training.Mean;   // should be 0.017771153143274855
            double validationError = result.Validation.Mean; // should be 0.0755952380952381

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output);
            double accuracy            = gcm.Accuracy; // result should be 0.92442882249560632
            #endregion

            Assert.AreEqual(569, gcm.Samples);
            Assert.AreEqual(0.92442882249560632, gcm.Accuracy);
            Assert.AreEqual(0.075571177504393683, gcm.Error);
            Assert.AreEqual(2, gcm.Classes);

            Assert.AreEqual(569, numberOfSamples);
            Assert.AreEqual(30, numberOfInputs);
            Assert.AreEqual(2, numberOfOutputs);

            Assert.AreEqual(10, cv.K);
            Assert.AreEqual(0.017771153143274855, result.Training.Mean, 1e-10);
            Assert.AreEqual(0.0755952380952381, result.Validation.Mean, 1e-10);

            Assert.AreEqual(3.0929835736884063E-05, result.Training.Variance, 1e-10);
            Assert.AreEqual(0.00096549963219103182, result.Validation.Variance, 1e-10);

            Assert.AreEqual(10, cv.Folds.Length);
            Assert.AreEqual(10, result.Models.Length);

            var tree   = result.Models[0].Model;
            int height = tree.GetHeight();
            Assert.AreEqual(5, height);

            Accord.Math.Random.Generator.Seed = 0;

            cv = CrossValidation.Create(
                k: 10,
                learner: (p) => new C45Learning()
            {
                Join         = 1,
                MaxHeight    = 1,
                MaxVariables = 1
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: input, y: output
                );

            cv.ParallelOptions.MaxDegreeOfParallelism = 1;

            result = cv.Learn(input, output);

            tree   = result.Models[0].Model;
            height = tree.GetHeight();

            Assert.AreEqual(1, height);

            Assert.AreEqual(0.24842341313352828, result.Training.Mean, 1e-10);
            Assert.AreEqual(0.25112781954887214, result.Validation.Mean, 1e-10);

            Assert.AreEqual(0.017727583138285874, result.Training.Variance, 1e-10);
            Assert.AreEqual(0.018956888182583998, result.Validation.Variance, 1e-10);
        }
        public void validation()
        {
            var data = path;

            var csv             = new CsvReader(File.OpenText(path));
            var myCustomObjects = csv.GetRecords <MealData>();

            DataTable dt = new DataTable("FoodDBSample");
            DataRow   row;

            dt.Columns.Add("Category", "Carb", "Protein", "Fat", "Calorie", "Fiber", "Decision");
            foreach (var record in myCustomObjects)
            {
                row = dt.NewRow();


                row["Category"] = record.Category;
                row["Carb"]     = record.Carb;
                row["Protein"]  = record.Protein;
                row["Fat"]      = record.Fat;
                row["Calorie"]  = record.Calorie;
                row["Fiber"]    = record.Fiber;
                row["Decision"] = record.Outcome;

                dt.Rows.Add(row);
            }
            var codebook = new Codification(dt);

            DataTable symbols = codebook.Apply(dt);

            int[][] inputs  = symbols.ToJagged <int>("Category", "Carb", "Protein", "Fat", "Calorie", "Fiber");
            int[]   outputs = symbols.ToArray <int>("Decision");

            //specify which columns to use for making decisions
            var id3learning = new ID3Learning()
            {
                new DecisionVariable("Category", 4),
                new DecisionVariable("Carb", 2),
                new DecisionVariable("Protein", 2),
                new DecisionVariable("Fat", 2),
                new DecisionVariable("Calorie", 2),
                new DecisionVariable("Fiber", 2)
            };



            DecisionTree tree = id3learning.Learn(inputs, outputs);

            // Compute the training error
            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            // measure the cross-validation performance of
            // a decision tree with a maximum tree height of 5. With variables
            // able to join the decision path at most 2 times during evaluation:
            var cv = CrossValidation.Create(

                k: 5,                             // 5-fold cross-validation

                learner: (p) => new ID3Learning() //create the learning algorithm
            {
                new DecisionVariable("Category", 4),
                new DecisionVariable("Carb", 2),
                new DecisionVariable("Protein", 2),
                new DecisionVariable("Fat", 2),
                new DecisionVariable("Calorie", 2),
                new DecisionVariable("Fiber", 2)
            },


                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(inputs, outputs);

            //Gather info
            int numberOfSamples = result.NumberOfSamples;
            int numberOfInputs  = result.NumberOfInputs;
            int numberOfOutputs = result.NumberOfOutputs;

            double trainingError   = result.Training.Mean;
            double validationError = result.Validation.Mean;

            System.Diagnostics.Debug.WriteLine("ID3 Mean: " + validationError);
            System.Diagnostics.Debug.WriteLine("ID3 Error: " + trainingError);
        }