C# (CSharp) CrossValidation.Create примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: CrossValidation

Метод/Функция: Create

Примеров на hotexamples.com: 23

C# (CSharp) CrossValidation.Create - 23 примеров найдено. Это лучшие примеры C# (CSharp) кода для CrossValidation.Create, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Create(23)

Compute(17)

Learn(6)

CV(3)

Splittings(3)

CrossValidate(2)

GetTestFold(2)

GetTrainFold(2)

run(2)

Prepare(1)

Start(1)

TrainTestSplit(1)

addEvaluator(1)

getCPtr(1)

getEvaluator(1)

Пример #1

Показать файл

        public void nativeBayesValidation()
        {
            var        learn = new NaiveBayesLearning();
            NaiveBayes nb    = learn.Learn(inputsInt, outputs);

            var cv = CrossValidation.Create(
                k: 3,

                learner: (p) => new NaiveBayesLearning(),

                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                x: inputsInt, y: outputs
                );

            var result = cv.Learn(inputsInt, outputs);

            int numberOfSamples = result.NumberOfSamples;
            int numberOfInputs  = result.NumberOfInputs;
            int numberOfOutputs = result.NumberOfOutputs;

            double trainingError       = result.Training.Mean;
            double validationError     = result.Validation.Mean;
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputsInt, outputs);
            double accuracy            = gcm.Accuracy;

            message += "Native Bayes Validacja\n";
            message += "trainingError " + trainingError.ToString() + "\n";
            message += "validationError " + validationError.ToString() + "\n";
            message += "accuracy " + accuracy.ToString() + "\n\n";
        }

Пример #2

Показать файл

        public void knnValidation()
        {
            var crossvalidation = CrossValidation.Create(
                k: 3,
                learner: (p) => new KNearestNeighbors(k: 4),
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: inputs, y: outputs
                );
            var result = crossvalidation.Learn(inputs, outputs);
            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;
            int numberOfInputs  = result.NumberOfInputs;
            int numberOfOutputs = result.NumberOfOutputs;

            double trainingError   = result.Training.Mean;
            double validationError = result.Validation.Mean;

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs);
            double accuracy            = gcm.Accuracy;

            message += "Knn Validacja\n";
            message += "trainingError " + trainingError.ToString() + "\n";
            message += "validationError " + validationError.ToString() + "\n";
            message += "accuracy " + accuracy.ToString() + "\n\n";
        }

Пример #3

Показать файл

Файл: AccordSupervisedClassifiers.cs Проект: jrenewhite/dotnet-vic

        public override void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs)
        {
            crossValidation = CrossValidation.Create(

                k: 10,                                           // We will be using 10-fold cross validation

                learner: (p) => new KernelDiscriminantAnalysis() // here we create the learning algorithm
            {
                Kernel = new Quadratic()                         // We can choose any kernel function
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );
            var result = crossValidation.Learn(inputs, outputs);

            ConfusionMatrix = result.ToConfusionMatrix(inputs, outputs).Matrix;
        }

Пример #4

Показать файл

Файл: AccordSupervisedClassifiers.cs Проект: jrenewhite/dotnet-vic

        public override void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs)
        {
            crossValidation = CrossValidation.Create(

                k: 10, // We will be using 10-fold cross validation

                learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                {
                    // Estimate a suitable guess for the Gaussian kernel's parameters.
                    // This estimate can serve as a starting point for a grid search.
                    UseKernelEstimation = true
                }
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );
            var result = crossValidation.Learn(inputs, outputs);

            ConfusionMatrix = result.ToConfusionMatrix(inputs, outputs).Matrix;
            //throw new NotImplementedException();
        }

Пример #5

Показать файл

Файл: AccordSupervisedClassifiers.cs Проект: jrenewhite/dotnet-vic

        public void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs)
        {
            crossValidation = CrossValidation.Create(

                k: 10,                                     // We will be using 10-fold cross validation

                learner: (p) => new B.NaiveBayesLearning() // here we create the learning algorithm
            {
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs.Select(v => v.Select(u => Convert.ToInt32(u)).ToArray()).ToArray(), y: outputs
                );
            var result = crossValidation.Learn(inputs.Select(u => u.Select(v => Convert.ToInt32(v)).ToArray()).ToArray(), outputs);

            ConfusionMatrix = result.ToConfusionMatrix(inputs.Select(v => v.Select(u => Convert.ToInt32(u)).ToArray()).ToArray(), outputs).Matrix;
        }

Пример #6

Показать файл

        private static void breastCancerExample()
        {
            // Ensure we have reproducible results
            Accord.Math.Random.Generator.Seed = 0;

            // Get some data to be learned. We will be using the Wiconsin's
            // (Diagnostic) Breast Cancer dataset, where the goal is to determine
            // whether the characteristics extracted from a breast cancer exam
            // correspond to a malignant or benign type of cancer:
            var data = new WisconsinDiagnosticBreastCancer();

            double[][] input  = data.Features;    // 569 samples, 30-dimensional features
            int[]      output = data.ClassLabels; // 569 samples, 2 different class labels

            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 5 and where variables
            // are able to join the decision path at most 2 times during evaluation:
            var cv = CrossValidation.Create(

                k: 10,                            // We will be using 10-fold cross validation

                learner: (p) => new C45Learning() // here we create the learning algorithm
            {
                Join      = 2,
                MaxHeight = 5
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input, y: output
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(input, output);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;    // should be 569
            int numberOfInputs  = result.NumberOfInputs;     // should be 30
            int numberOfOutputs = result.NumberOfOutputs;    // should be 2

            double trainingError   = result.Training.Mean;   // should be 0.017771153143274855
            double validationError = result.Validation.Mean; // should be 0.0755952380952381

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output);
            double accuracy            = gcm.Accuracy; // result should be 0.92442882249560632

            Console.WriteLine("C45Learning learning algorithm accuracy is %" + (accuracy * 100).ToString("N2"));
        }

Пример #7

Показать файл

Файл: Hospital.cs Проект: JuanSebastianMoralesVilla/ProjectX

        private void trainingC45lib()
        {
            Accord.Math.Random.Generator.Seed = 0;
            c45Learning = new C45Learning()
            {
                Join      = 2,
                MaxHeight = 5
            };
            int size = trainingSets.Count;

            double[][] inputs1  = new double[size][];
            int[]      outputs1 = new int[size];
            int        i        = 0;

            foreach (Patient patient in trainingSets)
            {
                double[] aux = new double[9];
                for (int j = 1; j <= 9; j++)
                {
                    if (j == 1)
                    {
                        aux[j - 1] = patient.get(j) < 30 ? 0 : patient.get(j) < 60 ? 1 : 2;
                    }
                    else
                    {
                        aux[j - 1] = patient.get(j);
                    }
                }
                inputs1[i]  = aux;
                outputs1[i] = patient.get(10);
                i++;
            }

            var crossValidation = CrossValidation.Create(

                k: 5,

                learner: (p) => new C45Learning()
            {
                Join      = 2,
                MaxHeight = 5
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                x: inputs1, y: outputs1
                );

            decisionTreeLib = c45Learning.Learn(inputs1, outputs1);
            var result = crossValidation.Learn(inputs1, outputs1);

            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs1, outputs1);

            accuracyC45lib = Math.Round(gcm.Accuracy, 3);
        }

Пример #8

Показать файл

        static public int [] MultiNomialLogRegressionLowerBoundNewtonRaphson(double [][] input1, int[] labels, string SaveFile)
        {
            // http://accord-framework.net/docs/html/T_Accord_Statistics_Models_Regression_MultinomialLogisticRegression.htm
            // Create a estimation algorithm to estimate the regression
            LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson()
            {
                MaxIterations = 10,
                Tolerance     = 1e-6
            };
            // *******************************************************************************
            var cv = CrossValidation.Create(

                k: 10,     // We will be using 10-fold cross validation

                // First we define the learning algorithm:
                learner: (p) => new LowerBoundNewtonRaphson(),

                // Now we have to specify how the n.b. performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teach, x, y, w) => teach.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input1, y: labels
                );
            // Genrate a cross validation of the data
            var cvresult = cv.Learn(input1, labels);



            // iteratively estimate the  model
            MultinomialLogisticRegression mlr = lbnr.Learn(input1, labels);

            // Generate statistics from confusion matrices
            ConfusionMatrix        cm  = ConfusionMatrix.Estimate(mlr, input1, labels);
            GeneralConfusionMatrix gcm = cvresult.ToConfusionMatrix(input1, labels);

            Funcs.Utility.OutPutStats(cvresult.NumberOfSamples, cvresult.NumberOfInputs,
                                      cvresult.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore);

            // We can compute the model answers
            int[]  answers       = mlr.Decide(input1);
            string modelsavefile = SaveFile.Replace(".csv", ".MLR.save");

            mlr.Save(modelsavefile, compression: SerializerCompression.None);

            return(answers);
        }

Пример #9

Показать файл

        static public int[] MultiNomialLogisticRegressionBFGS(double [][] input, int [] labels, string fName)
        {
            /* The L-BFGS algorithm is a member of the broad family of quasi-Newton optimization methods.
             * L-BFGS stands for 'Limited memory BFGS'. Indeed, L-BFGS uses a limited memory variation of
             * the Broyden–Fletcher–Goldfarb–Shanno (BFGS) update to approximate the inverse Hessian matrix
             * (denoted by Hk). Unlike the original BFGS method which stores a dense approximation, L-BFGS
             * stores only a few vectors that represent the approximation implicitly. Due to its moderate
             * memory requirement, L-BFGS method is particularly well suited for optimization problems with
             * a large number of variables.
             */

            // Create a lbfgs model
            var mlbfgs = new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>();

            // Estimate using the data against a logistic regression
            MultinomialLogisticRegression mlr = mlbfgs.Learn(input, labels);

            //
            // Create a cross validation model derived from the training set to measure the performance of this
            // predictive model and estimate how well we expect the model will generalize. The algorithm executes
            // multiple rounds of cross validation on different partitions and averages the results.
            //
            int folds = 4; // could play around with this later
            var cv    = CrossValidation.Create(k: folds, learner: (p) => new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>(),
                                               loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                                               fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                                               x: input, y: labels);
            var result = cv.Learn(input, labels);
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, labels);
            ConfusionMatrix        cm  = ConfusionMatrix.Estimate(mlr, input, labels);

            //
            //output relevant statistics
            //
            Funcs.Utility.OutPutStats(result.NumberOfSamples, result.NumberOfInputs,
                                      result.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore);

            // Compute the model predictions and return the values
            int[] answers = mlr.Decide(input);

            // And also the probability of each of the answers
            double[][] probabilities = mlr.Probabilities(input);

            // Now we can check how good our model is at predicting
            double error = new Accord.Math.Optimization.Losses.ZeroOneLoss(labels).Loss(answers);

            mlr.Save(fName, compression: SerializerCompression.None);

            return(answers);
        }

Пример #10

Показать файл

        private void ClassifyDataByNaiveBayes(int numOfFolds = 3, int minOccurences = 1)
        {
            CalcInputAndOutputVariables(minOccurences);

            var cvNaiveBayesClassifier = CrossValidation.Create(
                k: numOfFolds,
                learner: p => new NaiveBayesLearning <BernoulliDistribution>(),
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: InputVariables,
                y: OutputVariables
                );

            // Run Cross-Validation
            Result = cvNaiveBayesClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>;
        }

Пример #11

Показать файл

Файл: DecisionTreeLibrary.cs Проект: Gevorah/product-prediction

        public double Accuracy()
        {
            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 6 and where variables
            // are able to join the decision path at most 1 times during evaluation:
            var cv = CrossValidation.Create(

                k: 5,                             // We will be using 5-fold cross validation

                learner: (p) => new ID3Learning() // here we create the learning algorithm
            {
                Join      = 1,
                MaxHeight = 0
            },

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(inputs, outputs);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples; // should be 1000
            int numberOfInputs  = result.NumberOfInputs;  // should be 4
            int numberOfOutputs = result.NumberOfOutputs; // should be 6

            double trainingError   = result.Training.Mean;
            double validationError = result.Validation.Mean;

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs);

            return(gcm.Accuracy * 100);
        }

Пример #12

Показать файл

        private void ClassifyDataByLogisticRegression(int numOfFolds = 3, int minOccurences = 1, int maxIterations = 100)
        {
            CalcInputAndOutputVariables(minOccurences);

            var cvLogisticRegressionClassifier = CrossValidation.Create(
                k: numOfFolds,
                learner: (p) => new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                MaxIterations  = 100,
                Regularization = 1e-6
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: InputVariables,
                y: OutputVariables
                );

            // Run Cross-Validation
            Result = cvLogisticRegressionClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>;
        }

Пример #13

Показать файл

Файл: ModelBuilder.cs Проект: davemanton/SpamFilter

        public CrossValidationResult <LogisticRegression, double[], int> BuildModel(double[][] inputs, int[] outputs)
        {
            var cvLogisticRegressionClassifier =
                CrossValidation.Create <LogisticRegression,
                                        IterativeReweightedLeastSquares <LogisticRegression>,
                                        double[],
                                        int>(
                    k: _appSettings.ModelNumFolds,
                    learner: (p) => new IterativeReweightedLeastSquares <LogisticRegression>
            {
                MaxIterations  = 100,
                Regularization = 1e-6
            },
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: inputs,
                    y: outputs
                    );

            var result = cvLogisticRegressionClassifier.Learn(inputs, outputs);

            return(result);
        }

Пример #14

Показать файл

        private void Button5_Click(object sender, RoutedEventArgs e) //Generate
        {
            if (classes.Count < 2)
            {
                var dialogResult = System.Windows.MessageBox.Show(
                    "Please have at least two classes created to generate",
                    "Data generating error", System.Windows.MessageBoxButton.OK,
                    System.Windows.MessageBoxImage.Warning);
                return;
            }
            if (attrs.Count < 1)
            {
                var dialogResult = System.Windows.MessageBox.Show(
                    "Please have at least one attribute created to generate",
                    "Data generating error", System.Windows.MessageBoxButton.OK,
                    System.Windows.MessageBoxImage.Warning);
                return;
            }
            using (var dirB = new System.Windows.Forms.SaveFileDialog()) {
                dirB.Filter     = "Text Files | *.txt";
                dirB.DefaultExt = "txt";
                var res = dirB.ShowDialog();
                if (res == System.Windows.Forms.DialogResult.OK)
                {
                    List <float[]> attrValues  = new List <float[]>();
                    List <int>     classValues = new List <int>();

                    using (var file = new System.IO.StreamWriter(dirB.FileName)) {
                        string line;// = "Class";
                        //foreach (var v in attrs)
                        //    line += "," + v.Key;
                        //file.WriteLine(line);
                        for (int v = 0; v < classes.Count; v++)
                        {
                            //foreach (var v in classes)
                            for (int n = 0; n < classes[v].Value; n++)
                            {
                                line = classes[v].Key;
                                classValues.Add(v);
                                List <float> aVals = new List <float>();

                                for (int t = 0; t < classAttrs[v].Count; t++)
                                {
                                    float aVal = attrs[t].Value.genetare(classAttrs[v][t]);
                                    aVals.Add(aVal);
                                    line += "," + aVal.ToString(System.Globalization.CultureInfo.InvariantCulture);
                                }

                                attrValues.Add(aVals.ToArray <float>());
                                file.WriteLine(line);
                            }
                        }
                    }

                    var dialogResult = System.Windows.MessageBox.Show("Do you want to test the generated data?", "Data testing - crossvalidation", System.Windows.MessageBoxButton.YesNo);
                    if (dialogResult == MessageBoxResult.Yes)
                    {
                        float[][]  inputs   = attrValues.ToArray();
                        double[][] inputs_d = inputs.Select(xa => xa.Select(ya => (double)ya).ToArray()).ToArray();
                        int[][]    inputs_i = inputs.Select(xa => xa.Select(ya => (int)Math.Round(ya * 100)).ToArray()).ToArray();
                        int[]      outputs  = classValues.ToArray();


                        //var learn = new NaiveBayesLearning();
                        //NaiveBayes nb = learn.Learn(inputs, outputs);

                        var cv = CrossValidation.Create(
                            k: 4,
                            learner: (p) => new NaiveBayesLearning(),
                            loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                            fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                            x: inputs_i, y: outputs
                            );

                        var result = cv.Learn(inputs_i, outputs);

                        int numberOfSamples = result.NumberOfSamples;
                        int numberOfInputs  = result.NumberOfInputs;
                        int numberOfOutputs = result.NumberOfOutputs;

                        double trainingError       = result.Training.Mean;
                        double validationError     = result.Validation.Mean;
                        GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs_i, outputs);
                        double nb_accuracy         = gcm.Accuracy;

                        //..................
                        int classesSqrt = (int)Math.Round(Math.Sqrt(outputs.Length));

                        var crossvalidation = CrossValidation.Create(
                            k: 4,
                            learner: (p) => new KNearestNeighbors(k: classesSqrt),
                            loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                            fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                            x: inputs_d, y: outputs
                            );
                        var result2 = crossvalidation.Learn(inputs_d, outputs);
                        // We can grab some information about the problem:
                        numberOfSamples = result2.NumberOfSamples;
                        numberOfInputs  = result2.NumberOfInputs;
                        numberOfOutputs = result2.NumberOfOutputs;

                        trainingError   = result2.Training.Mean;
                        validationError = result2.Validation.Mean;

                        // If desired, compute an aggregate confusion matrix for the validation sets:
                        gcm = result2.ToConfusionMatrix(inputs_d, outputs);
                        double knn_accuracy = gcm.Accuracy;

                        //............................

                        var crossvalidationsvm = CrossValidation.Create(
                            k: 4,
                            learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
                        {
                            Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                            {
                                UseKernelEstimation = true
                            }
                        },
                            loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                            fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                            x: inputs_d, y: outputs
                            );
                        //crossvalidationReadsvm.ParallelOptions.MaxDegreeOfParallelism = 1;
                        var resultsvm = crossvalidationsvm.Learn(inputs_d, outputs);
                        // We can grab some information about the problem:
                        var numberOfSamplessvm = resultsvm.NumberOfSamples;
                        var numberOfInputssvm  = resultsvm.NumberOfInputs;
                        var numberOfOutputssvm = resultsvm.NumberOfOutputs;

                        var trainingErrorsvm   = resultsvm.Training.Mean;
                        var validationErrorsvm = resultsvm.Validation.Mean;

                        var    CMsvm        = resultsvm.ToConfusionMatrix(inputs_d, outputs);
                        double svm_accuracy = CMsvm.Accuracy;


                        System.Windows.MessageBox.Show("Naive Bayes Accuracy: " + (nb_accuracy * 100)
                                                       .ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                                                       + "%\n" +
                                                       "\nk Nearest Neighbors Accuracy: " + (knn_accuracy * 100)
                                                       .ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                                                       + "%\n" +
                                                       "\nSupport Vector Machine Accuracy: " + (svm_accuracy * 100)
                                                       .ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                                                       + "%\n", "Data testing - crossvalidation", System.Windows.MessageBoxButton.OK);
                        using (var write = new System.IO.StreamWriter("TestDataDump.txt"))
                        {
                            write.WriteLine("GeneratedDataAmt," + outputs.Length);
                            write.WriteLine("Accuracy," +
                                            (100.0 * knn_accuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                                            (100.0 * nb_accuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                                            (100.0 * svm_accuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                        }
                        //System.Diagnostics.Process.Start("TestDataDump.txt");
                        dialogResult = System.Windows.MessageBox.Show("Do you want to open the file with generated data?", "Data testing - extended data", System.Windows.MessageBoxButton.YesNo);
                        if (dialogResult == MessageBoxResult.Yes)
                        {
                            System.Diagnostics.Process.Start(dirB.FileName);
                        }
                    }
                }
            }
        }

Пример #15

Показать файл

        static public int[] ProbabilisticCoordinateDescent(double[][] input1, int[] labels, string SaveFile)
        {
            // http://accord-framework.net/docs/html/T_Accord_MachineLearning_VectorMachines_Learning_ProbabilisticCoordinateDescent.htm

            /* This class implements a SupportVectorMachine learning algorithm specifically crafted for
             * probabilistic linear machines only. It provides a L1- regularized coordinate descent learning
             * algorithm for optimizing the learning problem. The code has been based on liblinear's method
             * solve_l1r_lr method, whose original description is provided below.
             *
             * Liblinear's solver -s 6: L1R_LR. A coordinate descent algorithm for L1-regularized logistic
             * regression (probabilistic svm) problems.
             */

            int folds = 5;

            Accord.Math.Random.Generator.Seed = 0;
            var cv = CrossValidation.Create(

                k: folds,     // We will be using 10-fold cross validation

                // First we define the learning algorithm:
                learner: (p) => new ProbabilisticCoordinateDescent(),

                // Now we have to specify how the n.b. performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teach, x, y, w) => teach.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input1, y: labels
                );
            var cvresult = cv.Learn(input1, labels);
            GeneralConfusionMatrix gcm = cvresult.ToConfusionMatrix(input1, labels);

            var teacher = new ProbabilisticCoordinateDescent()
            {
                Tolerance  = 1e-10,
                Complexity = 1e+10,
                // learn a hard-margin model

                /* Complexity (cost) parameter C. Increasing the value of C forces the creation of a more
                 * accurate model that may not generalize well. If this value is not set and UseComplexityHeuristic
                 * is set to true, the framework will automatically guess a value for C. If this value is manually
                 * set to something else, then UseComplexityHeuristic will be automatically disabled and the given
                 * value will be used instead.
                 */
            };
            var             svm           = teacher.Learn(input1, labels);
            var             svmregression = (LogisticRegression)svm;
            ConfusionMatrix cm            = ConfusionMatrix.Estimate(svm, input1, labels);

            // accuracy, TP, FP, FN, TN and FScore Diagonal
            Utility.OutPutStats(cvresult.NumberOfSamples, cvresult.NumberOfInputs, cvresult.Training.Mean,
                                gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore);

            // Write the model out to a save file
            string modelsavefilename = SaveFile.Replace(".csv", ".PCD.save");

            svmregression.Save(modelsavefilename, compression: SerializerCompression.None);

            bool[] answers = svmregression.Decide(input1);
            return(Funcs.Utility.BoolToInt(answers));
        }

Пример #16

Показать файл

Файл: DataExtendWindow.xaml.cs Проект: MihnikHangover/Inzynierka2

        private void GenerateBasedOnData()
        {
            List <string[]> generating = new List <string[]>(); // do ewentualnego sprawdzania

            var attrType = RemoveAt(this.attrType, 0);

            //tutaj dorzucam tworzenie wykresu ciągłego prawdopodobieństwa
            Spline3Deg[,] probabilities = new Spline3Deg[classes, attribs];
            for (int i = 0; i < attribs; i++)
            {
                if (attrType[i].Equals("double") || attrType[i].Equals("integer"))
                {
                    for (int j = 0; j < classes; j++)
                    {
                        int      c    = values.ElementAt(j).Value.Item2.ElementAt(i).Value.Count;
                        double[] y, x = new double[c];
                        SortedList <double, int> temp = new SortedList <double, int>();
                        foreach (var v in values.ElementAt(j).Value.Item2.ElementAt(i).Value)
                        {
                            int tI = v.Value; double tD = Double.Parse(v.Key.Replace(" ", string.Empty),
                                                                       System.Globalization.NumberStyles.AllowDecimalPoint,
                                                                       System.Globalization.NumberFormatInfo.InvariantInfo);
                            temp.Add(tD, tI);
                        }
                        y    = temp.Keys.ToArray();
                        x[0] = 0;
                        for (int k = 1; k < temp.Count; k++)
                        {
                            x[k] = x[k - 1] + temp.ElementAt(k - 1).Value + temp.ElementAt(k).Value;
                        }
                        probabilities[j, i] = new Spline3Deg(x, y);
                    }
                }
            }


            //do sprawdzania punktacji później
            //podzielić dane wejściowe i wygenerowane na klasy i artybuty
            var readClass  = new int[reading.Count];
            var readAttr_d = new double[reading.Count, reading.ElementAt(0).Length - 1].ToJagged();

            var stringIntCheatSheet = new Dictionary <string, int> [reading.ElementAt(0).Length];

            for (int i = 0; i < stringIntCheatSheet.Length; i++)
            {
                stringIntCheatSheet[i] = new Dictionary <string, int>();
            }

            for (int x = 0; x < reading.Count; x++)
            {
                for (int y = 0; y < reading.ElementAt(0).Length; y++)
                {
                    double rr = 0;
                    string ss = reading.ElementAt(x)[y];
                    if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint,
                                         System.Globalization.NumberFormatInfo.InvariantInfo, out rr) ||
                        y == 0)
                    {
                        if (!stringIntCheatSheet[y].ContainsKey(ss))
                        {
                            stringIntCheatSheet[y].Add(ss, stringIntCheatSheet[y].Count);
                        }
                        rr = stringIntCheatSheet[y][ss];
                    }
                    if (y == 0)
                    {
                        readClass[x] = (int)rr;
                    }
                    else
                    {
                        readAttr_d[x][y - 1] = rr;
                    }
                }
            }
            int readClassesSqrt = (int)Math.Round(Math.Sqrt(reading.Count)),
                genClassesSqrt, mixClassesSqrt;
            var learnKnn = new KNearestNeighbors(readClassesSqrt);

            var knn = learnKnn.Learn(readAttr_d, readClass);

            double[] attrcr = new double[attribs];


            string[] bestattr = new string[attribs];
            double   bestscore;

            //czas generować ten szajs
            var newStuff = new string[newData, attribs + 1];

            for (int it = 0; it < newStuff.GetLength(0); it++)
            {
                bestscore = 0;

                int cl = rnd.Next(classes); //rnd to zadelkarowany wcześniej Random //losowanie klasy
                newStuff[it, 0] = values.ElementAt(cl).Key;
                int safety = 0;
                do
                {
                    for (int v = 1; v <= attribs; v++)
                    {     //losowanie wartości atrybutu
                        if (attrType[v - 1].Equals("string"))
                        { //funkcja dyskretna
                            int val = rnd.Next(values.ElementAt(cl).Value.Item1);
                            int b   = 0;
                            foreach (var a in values.ElementAt(cl).Value.Item2[v])
                            {
                                if (val < (b += a.Value))
                                {
                                    newStuff[it, v] = a.Key; //na Monte Carlo
                                    break;
                                }
                            }
                        }
                        else
                        {  //funkcja ciągła
                            Tuple <double, double> extr = probabilities[cl, v - 1].Limits();
                            double val = rnd.Next((int)extr.Item1, (int)extr.Item2) + rnd.NextDouble();
                            double r   = probabilities[cl, v - 1].y(val);
                            if (attrType[v - 1].Equals("double"))
                            {
                                newStuff[it, v] = r.ToString(fltPrec, System.Globalization.CultureInfo.InvariantCulture);
                            }
                            else //if (attrType[v - 1].Equals("integer"))
                            {
                                newStuff[it, v] = Math.Round(r).ToString();
                            }
                        }//koniec losowania wartości atrybutu
                        ///ekstra warunek bezpieczeństwa, bo czasami trafiają się NULLe
                        if (string.IsNullOrEmpty(newStuff[it, v]))
                        {
                            v--;
                            continue; //jeśli atrybut ma nulla, powtórz pętlę
                        }
                        ///koniec ekstra warunku bespieczeństwa
                    }//koniec generowania obiektu


                    //do tabliczki do sprawdzenia punktacji
                    for (int v = 1; v <= attribs; v++)
                    {
                        double rr = 0;
                        string ss = newStuff[it, v];
                        if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint,
                                             System.Globalization.NumberFormatInfo.InvariantInfo, out rr))
                        {
                            if (!stringIntCheatSheet[v].ContainsKey(ss))
                            {
                                stringIntCheatSheet[v].Add(ss, stringIntCheatSheet[v].Count);
                            }
                            rr = stringIntCheatSheet[v][ss];
                        }
                        attrcr[v - 1] = rr;
                    }
                    if (knn.Score(attrcr, cl) > bestscore)
                    {
                        for (int iter = 0; iter < attribs; iter++)
                        {
                            bestattr[iter] = newStuff[it, iter + 1];
                        }
                    }
                } while (knn.Score(attrcr, cl) < scoreH / 100 && ++safety < 1000);

                for (int iter = 0; iter < attribs; iter++)
                {
                    newStuff[it, iter + 1] = bestattr[iter];
                }
            }//koniec całego generowania

            //tu dać zapis do pliku
            string savefiledir = "";

            using (var dirB = new System.Windows.Forms.SaveFileDialog())
            {
                dirB.Filter     = "Text Files | *.txt";
                dirB.DefaultExt = "txt";
                var res = dirB.ShowDialog();
                if (res == System.Windows.Forms.DialogResult.OK)
                {
                    using (var write = new System.IO.StreamWriter(savefiledir = dirB.FileName))
                    {
                        for (int x = 0; x < newStuff.GetLength(0); x++)
                        {
                            string line = "";
                            for (int y = 0; y < newStuff.GetLength(1); y++)
                            {
                                line += newStuff[x, y] + ',';
                            }
                            line = line.Remove(line.Length - 1);
                            string[] temp = line.Split(',');
                            generating.Add(line.Split(','));
                            swap(ref temp[0], ref temp[clsCol]);
                            line = "";
                            for (int y = 0; y < temp.Length; y++)
                            {
                                line += temp[y] + ',';
                            }
                            line = line.Remove(line.Length - 1);
                            write.WriteLine(line);
                        }
                    }
                }
                else
                {
                    return;
                }
            }
            //tu dać walidację wygenerowanych danych

            var dialogResult = System.Windows.MessageBox.Show("Do you want to test the generated data?", "Data testing - extended data", System.Windows.MessageBoxButton.YesNo);

            if (dialogResult == MessageBoxResult.Yes)
            {
                var genClass = new int[generating.Count];
                //var genAttr = new int[generating.Count, generating.ElementAt(0).Length - 1].ToJagged();
                var genAttr_d = new double[generating.Count, generating.ElementAt(0).Length - 1].ToJagged();


                for (int x = 0; x < generating.Count; x++)
                {
                    for (int y = 0; y < generating.ElementAt(0).Length; y++)
                    {
                        double rr = 0;
                        string ss = generating.ElementAt(x)[y];
                        if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint,
                                             System.Globalization.NumberFormatInfo.InvariantInfo, out rr) || y == 0)
                        {
                            if (!stringIntCheatSheet[y].ContainsKey(ss))
                            {
                                stringIntCheatSheet[y].Add(ss, stringIntCheatSheet[y].Count);
                            }
                            rr = stringIntCheatSheet[y][ss];
                        }
                        if (y == 0)
                        {
                            genClass[x] = (int)rr;
                        }
                        else
                        {
                            genAttr_d[x][y - 1] = rr;
                        }
                    }
                }

                //przerobienie na tablicę intów, z przesunięciem dobli o precyzję
                var genAttr_i  = new int[generating.Count, generating.ElementAt(0).Length - 1].ToJagged();
                var readAttr_i = new int[reading.Count, reading.ElementAt(0).Length - 1].ToJagged();

                int shift = (int)Math.Pow(10, FltPrecBox.SelectedIndex + 1);
                for (int x = 0; x < generating.Count; x++)
                {
                    for (int y = 0; y < generating.ElementAt(0).Length - 1; y++)
                    {
                        if (attrType[y].Equals("double"))
                        {
                            genAttr_i[x][y] = (int)(genAttr_d[x][y] * shift);
                        }
                        else
                        {
                            genAttr_i[x][y] = (int)genAttr_d[x][y];
                        }
                    }
                }
                for (int x = 0; x < reading.Count; x++)
                {
                    for (int y = 0; y < reading.ElementAt(0).Length - 1; y++)
                    {
                        if (attrType[y].Equals("double"))
                        {
                            readAttr_i[x][y] = (int)(readAttr_d[x][y] * shift);
                        }
                        else
                        {
                            readAttr_i[x][y] = (int)readAttr_d[x][y];
                        }
                    }
                }


                int correctnb = 0, incorrectnb = 0, correctknn = 0, incorrectknn = 0, correctsvm = 0, incorrectsvm = 0;

                var        learn = new NaiveBayesLearning();
                NaiveBayes nb    = learn.Learn(readAttr_i, readClass);
                var        test  = nb.Decide(genAttr_i);
                foreach (var v in test)
                {
                    if (v.Equals(genClass[test.IndexOf(v)]))
                    {
                        correctnb++;
                    }
                    else
                    {
                        incorrectnb++;
                    }
                }

                /////////////////////////////////////////////////////////////////////////

                var testknn = knn.Decide(genAttr_d);
                for (int i = 0; i < testknn.Length; i++)
                //foreach (var v in testknn)
                {
                    if (testknn[i].Equals(genClass[i]))
                    {
                        correctknn++;
                    }
                    else
                    {
                        incorrectknn++;
                    }
                }
                /////////////////////////////////////////////////////////////////////////

                try
                {
                    var teach = new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        // Configure the learning algorithm to use SMO to train the
                        //  underlying SVMs in each of the binary class subproblems.
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            // Estimate a suitable guess for the Gaussian kernel's parameters.
                            // This estimate can serve as a starting point for a grid search.
                            UseKernelEstimation = true
                        }
                    };
                    var svm = teach.Learn(readAttr_d, readClass);

                    var testsvm = svm.Decide(genAttr_d);
                    for (int i = 0; i < testsvm.Length; i++)
                    //foreach (var v in testknn)
                    {
                        if (testsvm[i].Equals(genClass[i]))
                        {
                            correctsvm++;
                        }
                        else
                        {
                            incorrectsvm++;
                        }
                    }
                }
                catch (AggregateException) { }
                ////////////////////////////////////////////////////////////

                double[][] mixAttr_d = new double[genAttr_d.GetLength(0) + readAttr_d.GetLength(0),
                                                  genAttr_d[0].Length].ToJagged();
                int[] mixClass = new int[genClass.Length + readClass.Length];

                Array.Copy(readClass, mixClass, readClass.Length);
                Array.Copy(genClass, 0, mixClass, readClass.Length, genClass.Length);

                Array.Copy(readAttr_d, mixAttr_d, readAttr_d.Length);
                Array.Copy(genAttr_d, 0, mixAttr_d, readAttr_d.Length, genAttr_d.Length);

                int[][] mixAttr_i = new int[genAttr_i.GetLength(0) + readAttr_i.GetLength(0),
                                            genAttr_i[0].Length].ToJagged();

                Array.Copy(readAttr_i, mixAttr_i, readAttr_i.Length);
                Array.Copy(genAttr_i, 0, mixAttr_i, readAttr_i.Length, genAttr_i.Length);

                //KROSWALIDACJAAAAAAAAAAAAAAAAAA
                genClassesSqrt = (int)Math.Round(Math.Sqrt(genClass.Length));
                mixClassesSqrt = (int)Math.Round(Math.Sqrt(mixClass.Length));

                //KNN

                var crossvalidationRead = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new KNearestNeighbors(k: readClassesSqrt),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: readAttr_d, y: readClass
                    );
                var resultRead = crossvalidationRead.Learn(readAttr_d, readClass);
                // We can grab some information about the problem:
                var numberOfSamplesRead = resultRead.NumberOfSamples;
                var numberOfInputsRead  = resultRead.NumberOfInputs;
                var numberOfOutputsRead = resultRead.NumberOfOutputs;

                var trainingErrorRead   = resultRead.Training.Mean;
                var validationErrorRead = resultRead.Validation.Mean;

                var    readCM       = resultRead.ToConfusionMatrix(readAttr_d, readClass);
                double readAccuracy = readCM.Accuracy;
                //////////////////////////////////////////////////////////
                var crossvalidationGen = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new KNearestNeighbors(k: genClassesSqrt),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: genAttr_d, y: genClass
                    );
                var resultGen = crossvalidationGen.Learn(genAttr_d, genClass);
                // We can grab some information about the problem:
                var numberOfSamplesGen = resultGen.NumberOfSamples;
                var numberOfInputsGen  = resultGen.NumberOfInputs;
                var numberOfOutputsGen = resultGen.NumberOfOutputs;

                var    trainingErrorGen   = resultGen.Training.Mean;
                var    validationErrorGen = resultGen.Validation.Mean;
                var    genCM       = resultGen.ToConfusionMatrix(genAttr_d, genClass);
                double genAccuracy = genCM.Accuracy;
                //////////////////////////////////////////////////////////

                var crossvalidationMix = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new KNearestNeighbors(k: mixClassesSqrt),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: mixAttr_d, y: mixClass
                    );
                var resultMix = crossvalidationMix.Learn(readAttr_d, readClass);
                // We can grab some information about the problem:
                var numberOfSamplesMix = resultMix.NumberOfSamples;
                var numberOfInputsMix  = resultMix.NumberOfInputs;
                var numberOfOutputsMix = resultMix.NumberOfOutputs;

                var trainingErrorMix   = resultMix.Training.Mean;
                var validationErrorMix = resultMix.Validation.Mean;

                var    mixCM       = resultMix.ToConfusionMatrix(mixAttr_d, mixClass);
                double mixAccuracy = mixCM.Accuracy;

                //NB
                var crossvalidationReadnb = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new NaiveBayesLearning(),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: readAttr_i, y: readClass
                    );
                var resultReadnb = crossvalidationReadnb.Learn(readAttr_i, readClass);
                // We can grab some information about the problem:
                var numberOfSamplesReadnb = resultReadnb.NumberOfSamples;
                var numberOfInputsReadnb  = resultReadnb.NumberOfInputs;
                var numberOfOutputsReadnb = resultReadnb.NumberOfOutputs;

                var trainingErrorReadnb   = resultReadnb.Training.Mean;
                var validationErrorReadnb = resultReadnb.Validation.Mean;

                var    readCMnb       = resultReadnb.ToConfusionMatrix(readAttr_i, readClass);
                double readAccuracynb = readCMnb.Accuracy;
                //////////////////////////////////////////////////////////
                var crossvalidationGennb = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new NaiveBayesLearning(),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: genAttr_i, y: genClass
                    );
                var resultGennb = crossvalidationGennb.Learn(genAttr_i, genClass);
                // We can grab some information about the problem:
                var numberOfSamplesGennb = resultGennb.NumberOfSamples;
                var numberOfInputsGennb  = resultGennb.NumberOfInputs;
                var numberOfOutputsGennb = resultGennb.NumberOfOutputs;

                var    trainingErrorGennb   = resultGennb.Training.Mean;
                var    validationErrorGennb = resultGennb.Validation.Mean;
                var    genCMnb       = resultGennb.ToConfusionMatrix(genAttr_i, genClass);
                double genAccuracynb = genCMnb.Accuracy;
                //////////////////////////////////////////////////////////

                var crossvalidationMixnb = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new NaiveBayesLearning(),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: mixAttr_i, y: mixClass
                    );
                var resultMixnb = crossvalidationMixnb.Learn(mixAttr_i, mixClass);
                // We can grab some information about the problem:
                var numberOfSamplesMixnb = resultMixnb.NumberOfSamples;
                var numberOfInputsMixnb  = resultMixnb.NumberOfInputs;
                var numberOfOutputsMixnb = resultMixnb.NumberOfOutputs;

                var trainingErrorMixnb   = resultMixnb.Training.Mean;
                var validationErrorMixnb = resultMixnb.Validation.Mean;

                var    mixCMnb       = resultMixnb.ToConfusionMatrix(mixAttr_i, mixClass);
                double mixAccuracynb = mixCMnb.Accuracy;

                //SVM
                double readAccuracysvm = 0, genAccuracysvm = 0, mixAccuracysvm = 0;
                try
                {
                    var crossvalidationReadsvm = CrossValidation.Create(
                        k: 4,
                        learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            UseKernelEstimation = true
                        }
                    },
                        loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                        fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                        x: readAttr_d, y: readClass
                        );
                    //crossvalidationReadsvm.ParallelOptions.MaxDegreeOfParallelism = 1;
                    var resultReadsvm = crossvalidationReadsvm.Learn(readAttr_d, readClass);
                    // We can grab some information about the problem:
                    var numberOfSamplesReadsvm = resultReadsvm.NumberOfSamples;
                    var numberOfInputsReadsvm  = resultReadsvm.NumberOfInputs;
                    var numberOfOutputsReadsvm = resultReadsvm.NumberOfOutputs;

                    var trainingErrorReadsvm   = resultReadsvm.Training.Mean;
                    var validationErrorReadsvm = resultReadsvm.Validation.Mean;

                    var readCMsvm = resultReadsvm.ToConfusionMatrix(readAttr_d, readClass);
                    readAccuracysvm = readCMsvm.Accuracy;
                }
                catch (AggregateException) { }
                //////////////////////////////////////////////////////////
                try
                {
                    var crossvalidationGensvm = CrossValidation.Create(
                        k: 4,
                        learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            UseKernelEstimation = true
                        }
                    },
                        loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                        fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                        x: genAttr_d, y: genClass
                        );
                    var resultGensvm = crossvalidationGensvm.Learn(genAttr_d, genClass);
                    // We can grab some information about the problem:
                    var numberOfSamplesGensvm = resultGensvm.NumberOfSamples;
                    var numberOfInputsGensvm  = resultGensvm.NumberOfInputs;
                    var numberOfOutputsGensvm = resultGensvm.NumberOfOutputs;

                    var trainingErrorGensvm   = resultGensvm.Training.Mean;
                    var validationErrorGensvm = resultGensvm.Validation.Mean;
                    var genCMsvm = resultGensvm.ToConfusionMatrix(genAttr_d, genClass);
                    genAccuracysvm = genCMsvm.Accuracy;
                }
                catch (AggregateException) { }
                //////////////////////////////////////////////////////////
                try
                {
                    var crossvalidationMixsvm = CrossValidation.Create(
                        k: 4,
                        learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            UseKernelEstimation = true
                        }
                    },
                        loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                        fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                        x: mixAttr_d, y: mixClass
                        );
                    var resultMixsvm = crossvalidationMixsvm.Learn(mixAttr_d, mixClass);
                    // We can grab some information about the problem:
                    var numberOfSamplesMixsvm = resultMixsvm.NumberOfSamples;
                    var numberOfInputsMixsvm  = resultMixsvm.NumberOfInputs;
                    var numberOfOutputsMixsvm = resultMixsvm.NumberOfOutputs;

                    var trainingErrorMixsvm   = resultMixsvm.Training.Mean;
                    var validationErrorMixsvm = resultMixsvm.Validation.Mean;

                    var mixCMsvm = resultMixsvm.ToConfusionMatrix(mixAttr_d, mixClass);
                    mixAccuracysvm = mixCMsvm.Accuracy;
                }
                catch (AggregateException) { }
                /////////////////////////////////////////////////
                if (correctsvm == 0 && incorrectsvm == 0)
                {
                    incorrectsvm = 1;
                }
                double knnRatio = 100.0 * correctknn / (correctknn + incorrectknn),
                       nbRatio  = 100.0 * correctnb / (correctnb + incorrectnb),
                       svmRatio = 100.0 * correctsvm / (correctsvm + incorrectsvm);
                System.Windows.MessageBox.Show(
                    "K Nearest Neighbours Classification:\nGenerated Data Correct Ratio: " +
                    knnRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" +
                    "Original Data X-Validation Accuracy: "
                    + (100.0 * readAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Generated Data X-Validation Accuracy: "
                    + (100.0 * genAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Mixed Data X-Validation Accuracy: "
                    + (100.0 * mixAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n"
                    + "\n\n" + "Naive Bayes Classification:\nGenerated Data Correct Ratio: " +
                    nbRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" +
                    "Original Data X-Validation Accuracy: "
                    + (100.0 * readAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Generated Data X-Validation Accuracy: "
                    + (100.0 * genAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Mixed Data X-Validation Accuracy: "
                    + (100.0 * mixAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" +
                    "\n\n" + "Support Vector Machine Classification:\nGenerated Data Correct Ratio: " +
                    svmRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" +
                    "Original Data X-Validation Accuracy: "
                    + (100.0 * readAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Generated Data X-Validation Accuracy: "
                    + (100.0 * genAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Mixed Data X-Validation Accuracy: "
                    + (100.0 * mixAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n",
                    "Data Testing - extending dataset",
                    System.Windows.MessageBoxButton.OK);

                /*
                 * ///TEMP - do eksportowania danych do arkusza
                 *
                 *  using (var write = new System.IO.StreamWriter("TestDataDump.txt")){
                 *      write.WriteLine("ScoreTreshold," + scoreH.ToString());
                 *      write.WriteLine("NewDataAmt," + newData.ToString());
                 *      write.WriteLine("Generated Data Correct Ratio," +
                 *          knnRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          nbRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) +"," +
                 *          svmRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *      write.WriteLine("Original Data X-Validation Accuracy," +
                 *          (100.0 * readAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * readAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * readAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *      write.WriteLine("Generated Data X-Validation Accuracy," +
                 *          (100.0 * genAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * genAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * genAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *  write.WriteLine("Mixed Data X-Validation Accuracy," +
                 *          (100.0 * mixAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * mixAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * mixAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *
                 * }
                 *  System.Diagnostics.Process.Start("TestDataDump.txt");
                 */
            }
            dialogResult = System.Windows.MessageBox.Show("Do you want to open the file with generated data?", "Data testing - extended data", System.Windows.MessageBoxButton.YesNo);
            if (dialogResult == MessageBoxResult.Yes)
            {
                System.Diagnostics.Process.Start(savefiledir);
            }
        }

Пример #17

Показать файл

        public void CrossValidationTest()
        {
            #region doc_cross_validation
            // Ensure we have reproducible results
            Accord.Math.Random.Generator.Seed = 0;

            // Get some data to be learned. We will be using the Wiconsin's
            // (Diagnostic) Breast Cancer dataset, where the goal is to determine
            // whether the characteristics extracted from a breast cancer exam
            // correspond to a malignant or benign type of cancer:
            var        data   = new WisconsinDiagnosticBreastCancer();
            double[][] input  = data.Features;    // 569 samples, 30-dimensional features
            int[]      output = data.ClassLabels; // 569 samples, 2 different class labels

            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 5 and where variables
            // are able to join the decision path at most 2 times during evaluation:
            var cv = CrossValidation.Create(

                k: 10,                            // We will be using 10-fold cross validation

                learner: (p) => new C45Learning() // here we create the learning algorithm
            {
                Join      = 2,
                MaxHeight = 5
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input, y: output
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(input, output);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;    // should be 569
            int numberOfInputs  = result.NumberOfInputs;     // should be 30
            int numberOfOutputs = result.NumberOfOutputs;    // should be 2

            double trainingError   = result.Training.Mean;   // should be 0
            double validationError = result.Validation.Mean; // should be 0.089661654135338359
            #endregion

            Assert.AreEqual(569, numberOfSamples);
            Assert.AreEqual(30, numberOfInputs);
            Assert.AreEqual(2, numberOfOutputs);

            Assert.AreEqual(10, cv.K);
            Assert.AreEqual(0.017770391691033137, result.Training.Mean, 1e-10);
            Assert.AreEqual(0.077318295739348369, result.Validation.Mean, 1e-10);

            Assert.AreEqual(3.0913682243756776E-05, result.Training.Variance, 1e-10);
            Assert.AreEqual(0.00090104473101439207, result.Validation.Variance, 1e-10);

            Assert.AreEqual(10, cv.Folds.Length);
            Assert.AreEqual(10, result.Models.Length);

            var tree   = result.Models[0].Model;
            int height = tree.GetHeight();
            Assert.AreEqual(5, height);

            cv = CrossValidation.Create(
                k: 10,
                learner: (p) => new C45Learning()
            {
                Join         = 1,
                MaxHeight    = 1,
                MaxVariables = 1
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: input, y: output
                );

            result = cv.Learn(input, output);

            tree   = result.Models[0].Model;
            height = tree.GetHeight();

            Assert.AreEqual(1, height);

            Assert.AreEqual(0.10896305433723197, result.Training.Mean, 5e-3);
            Assert.AreEqual(0.1125, result.Validation.Mean, 1e-10);

            Assert.AreEqual(2.1009258672955873E-05, result.Training.Variance, 1e-10);
            Assert.AreEqual(0.0017292179645018977, result.Validation.Variance, 1e-10);
        }

Пример #18

Показать файл

Файл: Program.cs Проект: abdalmoneema/RoadScanner_BE

        public static void DecisionTree_crossValidation(double[][] inputs, int[] outputs)
        {
            // Ensure we have reproducible results
            Accord.Math.Random.Generator.Seed = 0;



            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 5 and where variables
            // are able to join the decision path at most 2 times during evaluation:
            var cv = CrossValidation.Create(

                k: 10,                            // We will be using 10-fold cross validation

                learner: (p) => new C45Learning() // here we create the learning algorithm
            {
                Join      = 2,
                MaxHeight = 5
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(inputs, outputs);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;    // should be 569
            int numberOfInputs  = result.NumberOfInputs;     // should be 30
            int numberOfOutputs = result.NumberOfOutputs;    // should be 2

            double trainingError   = result.Training.Mean;   // should be 0.017771153143274855
            double validationError = result.Validation.Mean; // should be 0.0755952380952381

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs);
            double accuracy            = gcm.Accuracy; // result should be 0.92442882249560632

            Console.WriteLine("Accuracy:" + gcm.Accuracy);
            Console.WriteLine("Error:" + gcm.Error);

            Console.WriteLine("Not Anomaly Precision:" + gcm.Precision[0]);
            Console.WriteLine("Not Anomaly Recall:" + gcm.Recall[0]);
            Console.WriteLine("Anomaly Precision:" + gcm.Precision[1]);
            Console.WriteLine("Anomaly Recall:" + gcm.Recall[1]);

            double anomalyFScore    = 2 * (gcm.Precision[1] * gcm.Recall[1]) / (gcm.Precision[1] + gcm.Recall[1]);
            double NotAnomalyFScore = 2 * (gcm.Precision[0] * gcm.Recall[0]) / (gcm.Precision[0] + gcm.Recall[0]);

            Console.WriteLine("Not ANomaly F-score:" + NotAnomalyFScore);
            Console.WriteLine("Anomaly F-score:" + anomalyFScore);
        }

Пример #19

Показать файл

Файл: Program.cs Проект: msbobh/NBayes

        static void Main(string[] args)
        {
            /*
             * Takes a csv files as input and trains a naive bayes classfier, if the test flag is set the rountine
             * will calculate the accuracy of the input files using the previous saved model in the exeution directioy
             * If the test flag is set a new classifier is not trainied
             * but the previous model is loaded and used agains the test data.
             *
             * arg 1 = training file or test file
             * arg 2 = label file
             * arg 3 = test flag (-s or -S)
             * arg 4 = Specify file name of model file
             */

            const int minargs = 2;
            const int maxargs = 4;
            const int Folds   = 4;

            Accord.Math.Random.Generator.Seed = 0;
            string trainingFname = null;
            string labelFname    = null;
            string modelFname    = "NBmodel.sav"; // Default model file name
            bool   NoTrain       = false;

            Functions.Welcome();
            int numArgs = Functions.parseCommandLine(args, maxargs, minargs);

            if (numArgs == 0)
            {
                Console.WriteLine(Strings.resources.usage);
                System.Environment.Exit(1);
            }

            if (numArgs == 2)
            {
                trainingFname = args[0];
                labelFname    = args[1];
            }
            if (numArgs == 3) // no use for third parameter yet!
            {
                if (args[2] == ("-s") | args[2] == ("-S"))
                {
                    NoTrain       = true;
                    trainingFname = args[0];
                    labelFname    = args[1];
                }
                else
                {
                    Console.WriteLine(Strings.resources.usage);
                    System.Environment.Exit(1);
                }
            }

            if (numArgs == 4)
            {
                NoTrain       = true;
                trainingFname = args[0];
                labelFname    = args[1];
                modelFname    = args[3];
            }
            //
            // Check if the training and label files exist and are not locked by anohter process
            //

            if (!Utility.Functions.checkFile(trainingFname))
            {
                Console.WriteLine("Error opening file{0}", trainingFname);
                System.Environment.Exit(1);
            }
            if (!Functions.checkFile(labelFname))
            {
                Console.WriteLine("Error opening file {0}", labelFname);
                System.Environment.Exit(1);
            }

            //
            // Read in the training and label files, CSV format
            //
            CsvReader training_samples = new CsvReader(trainingFname, false);

            int[,] MatrixIn = training_samples.ToMatrix <int>();
            int[][] trainingset = Functions.convertToJaggedArray(MatrixIn);

            //
            // Naive Bayes gets trained on integer arrays or arrays of "strings"
            //
            CsvReader label_samples = new CsvReader(labelFname, false);

            int[,] labelsIn = label_samples.ToMatrix <int>(); // COnvert the labels to a matrix and then to jagged array
            int[][] LabelSet = Functions.convertToJaggedArray(labelsIn);
            int[]   output   = Functions.convertTointArray(LabelSet);

            NaiveBayes loaded_nb;   // setup for loading a trained model if one exists

            if (!NoTrain)
            {
                // Create a new Naive Bayes learning instance
                var learner = new NaiveBayesLearning();

                // Create a Naive Bayes classifier and train with the input datasets
                NaiveBayes classifier = learner.Learn(trainingset, output);

                /* Cross-validation is a technique for estimating the performance of a predictive model.
                 * It can be used to measure how the results of a statistical analysis will generalize to
                 * an independent data set. It is mainly used in settings where the goal is prediction, and
                 * one wants to estimate how accurately a predictive model will perform in practice.
                 *
                 * One round of cross-validation involves partitioning a sample of data into complementary
                 * subsets, performing the analysis on one subset (called the training set), and validating
                 * the analysis on the other subset (called the validation set or testing set). To reduce
                 * variability, multiple rounds of cross-validation are performed using different partitions,
                 * and the validation results are averaged over the rounds
                 */

                // Gets results based on performing a k-fold cross validation based on the input training set
                // Create a cross validation instance


                var cv = CrossValidation.Create(k: Folds, learner: (p) => new NaiveBayesLearning(),
                                                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                                                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                                                x: trainingset, y: output);

                var result = cv.Learn(trainingset, output);

                Console.WriteLine("Performing n-fold cross validation where n = {0}", cv.K);

                // We can grab some information about the problem:
                Console.WriteLine("Cross Validation Results");
                Console.WriteLine("     number of samples {0}", result.NumberOfSamples);
                Console.WriteLine("     number of features: {0}", result.NumberOfInputs);
                Console.WriteLine("     number of outputs {0}", result.NumberOfOutputs);
                Console.WriteLine("     Training Error: {0:n2}", result.Training.Mean); // should be 0 or no
                Console.WriteLine("     Validation Mean: {0}\n", result.Validation.Mean);

                Console.WriteLine("Creating General Confusion Matrix from Cross Validation");
                GeneralConfusionMatrix gcm = result.ToConfusionMatrix(trainingset, output);
                double accuracy            = gcm.Accuracy; // should be 0.625
                Console.WriteLine(" GCM Accuracy {0}%\n", accuracy * 100);


                ConfusionMatrix cm = ConfusionMatrix.Estimate(classifier, trainingset, output);
                Console.WriteLine("Confusion Error {0}", cm.Error);
                Console.WriteLine("Confusion accuracy {0}", cm.Accuracy);
                double tp     = cm.TruePositives;
                double tn     = cm.TrueNegatives;
                double fscore = cm.FScore;
                double fp     = cm.FalsePositives;
                double fn     = cm.FalseNegatives;
                Console.WriteLine("TP = {0},TN = {1}, FP = {2}, FN = {3}, Fscore = {4} ", tp, tn, fp, fn, fscore);


                // Save the model created from the training set

                classifier.Save("NBmodel.sav", compression: SerializerCompression.None);
                Console.WriteLine("Successfully saved the model");
            }
            else
            {
                // load a previous model
                loaded_nb = Serializer.Load <NaiveBayes>(modelFname);                               // Load the model
                int[]  results  = loaded_nb.Decide(trainingset);                                    // Make preditions from the input
                double accuracy = Functions.CalculateAccuraccy(output, results);
                Console.WriteLine("Accuracy of predictions = {0}%", Math.Round(accuracy * 100, 2)); // Compare the predicions to the labels
            }
        }

Пример #20

Показать файл

        public void learn_test_simple()
        {
            #region doc_learn_simple
            // Ensure results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // This is a sample code on how to use Cross-Validation
            // to assess the performance of Support Vector Machines.

            // Consider the example binary data. We will be trying
            // to learn a XOR problem and see how well does SVMs
            // perform on this data.

            double[][] data =
            {
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
            };

            int[] xor = // result of xor for the sample input data
            {
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
            };


            // Create a new Cross-validation algorithm passing the data set size and the number of folds
            var crossvalidation = CrossValidation.Create(
                k: 3, // Use 3 folds in cross-validation

                // Indicate how learning algorithms for the models should be created
                learner: (s) => new SequentialMinimalOptimization <Linear>()
            {
                Complexity = 100
            },

                // Indicate how the performance of those models will be measured
                loss: (expected, actual, p) => new ZeroOneLoss(expected).Loss(actual),

                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: data,
                y: xor
                );

            // If needed, control the parallelization degree
            crossvalidation.ParallelOptions.MaxDegreeOfParallelism = 1;

            var result = crossvalidation.Learn(data, xor);

            // Finally, access the measured performance.
            double trainingErrors   = result.Training.Mean;
            double validationErrors = result.Validation.Mean;
            #endregion

            Assert.AreEqual(3, crossvalidation.K);
            Assert.AreEqual(0.37575757575757579, result.Training.Mean, 1e-10);
            Assert.AreEqual(0.75555555555555554, result.Validation.Mean, 1e-10);

            Assert.AreEqual(0.00044077134986225924, result.Training.Variance, 1e-10);
            Assert.AreEqual(0.0059259259259259334, result.Validation.Variance, 1e-10);

            Assert.AreEqual(0.020994555243259126, result.Training.StandardDeviation, 1e-10);
            Assert.AreEqual(0.076980035891950155, result.Validation.StandardDeviation, 1e-10);

            Assert.AreEqual(0, result.Training.PooledStandardDeviation);
            Assert.AreEqual(0, result.Validation.PooledStandardDeviation);

            Assert.AreEqual(3, crossvalidation.Folds.Length);
            Assert.AreEqual(3, result.Models.Length);
        }

Пример #21

Показать файл

Файл: Modeling.cs Проект: tshepo-me/c-sharp-machine-learning

        static void Main(string[] args)
        {
            // Read in the file we created in the Data Preparation step
            // TODO: change the path to point to your data directory
            string dataDirPath = "\\\\Mac\\Home\\Documents\\c-sharp-machine-learning\\ch.2\\output";
            // Load the data into a data frame and set the "emailNum" column as an index
            var wordVecDF = Frame.ReadCsv(
                Path.Combine(dataDirPath, "data-preparation-step\\subjectWordVec-alphaonly.csv"),
                hasHeaders: true,
                inferTypes: true
                );
            // Load the transformed data from data preparation step to get "is_ham" column
            var rawDF = Frame.ReadCsv(
                Path.Combine(dataDirPath, "data-preparation-step\\transformed.csv"),
                hasHeaders: true,
                inferTypes: false,
                schema: "int,string,string,int"
                ).IndexRows <int>("emailNum").SortRowsByKey();
            // Load Term Frequency Data
            var spamTermFrequencyDF = Frame.ReadCsv(
                Path.Combine(dataDirPath, "data-analysis-step\\frequency-alphaonly\\subject-line\\spam-frequencies-after-stopwords.csv"),
                hasHeaders: false,
                inferTypes: false,
                schema: "string,int"
                );

            spamTermFrequencyDF.RenameColumns(new string[] { "word", "num_occurences" });
            var indexedSpamTermFrequencyDF = spamTermFrequencyDF.IndexRows <string>("word");

            // Change number of features to reduce overfitting
            int minNumOccurences = 1;

            string[] wordFeatures = indexedSpamTermFrequencyDF.Where(
                x => x.Value.GetAs <int>("num_occurences") >= minNumOccurences
                ).RowKeys.ToArray();
            Console.WriteLine("Num Features Selected: {0}", wordFeatures.Count());

            // subtracting "is_ham" values from 1 to encode this target variable with 1 for spam emails
            var targetVariables = 1 - rawDF.GetColumn <int>("is_ham");

            Console.WriteLine("{0} spams vs. {1} hams", targetVariables.NumSum(), (targetVariables.KeyCount - targetVariables.NumSum()));

            // Create input and output variables from data frames, so that we can use them for Accord.NET MachineLearning models
            double[][] input = wordVecDF.Columns[wordFeatures].Rows.Select(
                x => Array.ConvertAll <object, double>(x.Value.ValuesAll.ToArray(), o => Convert.ToDouble(o))
                ).ValuesAll.ToArray();
            int[] output = targetVariables.Values.ToArray();

            // Number of folds
            int numFolds = 3;

            var cvNaiveBayesClassifier = CrossValidation.Create <NaiveBayes <BernoulliDistribution>, NaiveBayesLearning <BernoulliDistribution>, double[], int>(
                // number of folds
                k: numFolds,
                // Naive Bayes Classifier with Binomial Distribution
                learner: (p) => new NaiveBayesLearning <BernoulliDistribution>(),
                // Using Zero-One Loss Function as a Cost Function
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                // Fitting a classifier
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                // Input with Features
                x: input,
                // Output
                y: output
                );

            // Run Cross-Validation
            var result = cvNaiveBayesClassifier.Learn(input, output);

            // Sample Size
            int numberOfSamples = result.NumberOfSamples;
            int numberOfInputs  = result.NumberOfInputs;
            int numberOfOutputs = result.NumberOfOutputs;

            // Training & Validation Errors
            double trainingError   = result.Training.Mean;
            double validationError = result.Validation.Mean;

            // Confusion Matrix
            Console.WriteLine("\n---- Confusion Matrix ----");
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output);

            Console.WriteLine("");
            Console.Write("\t\tActual 0\t\tActual 1\n");
            for (int i = 0; i < gcm.Matrix.GetLength(0); i++)
            {
                Console.Write("Pred {0} :\t", i);
                for (int j = 0; j < gcm.Matrix.GetLength(1); j++)
                {
                    Console.Write(gcm.Matrix[i, j] + "\t\t\t");
                }
                Console.WriteLine();
            }

            Console.WriteLine("\n---- Sample Size ----");
            Console.WriteLine("# samples: {0}, # inputs: {1}, # outputs: {2}", numberOfSamples, numberOfInputs, numberOfOutputs);
            Console.WriteLine("training error: {0}", trainingError);
            Console.WriteLine("validation error: {0}\n", validationError);

            Console.WriteLine("\n---- Calculating Accuracy, Precision, Recall ----");

            float truePositive  = (float)gcm.Matrix[1, 1];
            float trueNegative  = (float)gcm.Matrix[0, 0];
            float falsePositive = (float)gcm.Matrix[1, 0];
            float falseNegative = (float)gcm.Matrix[0, 1];

            // Accuracy
            Console.WriteLine(
                "Accuracy: {0}",
                (truePositive + trueNegative) / numberOfSamples
                );
            // True-Positive / (True-Positive + False-Positive)
            Console.WriteLine("Precision: {0}", (truePositive / (truePositive + falsePositive)));
            // True-Positive / (True-Positive + False-Negative)
            Console.WriteLine("Recall: {0}", (truePositive / (truePositive + falseNegative)));

            Console.ReadKey();
        }

Пример #22

Показать файл

Файл: DecisionTreeTest.cs Проект: jthornca/accord-framework

        public void CrossValidationTest()
        {
            #region doc_cross_validation
            // Ensure we have reproducible results
            Accord.Math.Random.Generator.Seed = 0;

            // Get some data to be learned. We will be using the Wiconsin's
            // (Diagnostic) Breast Cancer dataset, where the goal is to determine
            // whether the characteristics extracted from a breast cancer exam
            // correspond to a malignant or benign type of cancer:
            var        data   = new WisconsinDiagnosticBreastCancer();
            double[][] input  = data.Features;    // 569 samples, 30-dimensional features
            int[]      output = data.ClassLabels; // 569 samples, 2 different class labels

            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 5 and where variables
            // are able to join the decision path at most 2 times during evaluation:
            var cv = CrossValidation.Create(

                k: 10,                            // We will be using 10-fold cross validation

                learner: (p) => new C45Learning() // here we create the learning algorithm
            {
                Join      = 2,
                MaxHeight = 5
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input, y: output
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(input, output);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;    // should be 569
            int numberOfInputs  = result.NumberOfInputs;     // should be 30
            int numberOfOutputs = result.NumberOfOutputs;    // should be 2

            double trainingError   = result.Training.Mean;   // should be 0.017771153143274855
            double validationError = result.Validation.Mean; // should be 0.0755952380952381

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output);
            double accuracy            = gcm.Accuracy; // result should be 0.92442882249560632
            #endregion

            Assert.AreEqual(569, gcm.Samples);
            Assert.AreEqual(0.92442882249560632, gcm.Accuracy);
            Assert.AreEqual(0.075571177504393683, gcm.Error);
            Assert.AreEqual(2, gcm.Classes);

            Assert.AreEqual(569, numberOfSamples);
            Assert.AreEqual(30, numberOfInputs);
            Assert.AreEqual(2, numberOfOutputs);

            Assert.AreEqual(10, cv.K);
            Assert.AreEqual(0.017771153143274855, result.Training.Mean, 1e-10);
            Assert.AreEqual(0.0755952380952381, result.Validation.Mean, 1e-10);

            Assert.AreEqual(3.0929835736884063E-05, result.Training.Variance, 1e-10);
            Assert.AreEqual(0.00096549963219103182, result.Validation.Variance, 1e-10);

            Assert.AreEqual(10, cv.Folds.Length);
            Assert.AreEqual(10, result.Models.Length);

            var tree   = result.Models[0].Model;
            int height = tree.GetHeight();
            Assert.AreEqual(5, height);

            Accord.Math.Random.Generator.Seed = 0;

            cv = CrossValidation.Create(
                k: 10,
                learner: (p) => new C45Learning()
            {
                Join         = 1,
                MaxHeight    = 1,
                MaxVariables = 1
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: input, y: output
                );

            cv.ParallelOptions.MaxDegreeOfParallelism = 1;

            result = cv.Learn(input, output);

            tree   = result.Models[0].Model;
            height = tree.GetHeight();

            Assert.AreEqual(1, height);

            Assert.AreEqual(0.24842341313352828, result.Training.Mean, 1e-10);
            Assert.AreEqual(0.25112781954887214, result.Validation.Mean, 1e-10);

            Assert.AreEqual(0.017727583138285874, result.Training.Variance, 1e-10);
            Assert.AreEqual(0.018956888182583998, result.Validation.Variance, 1e-10);
        }

Пример #23

Показать файл

Файл: ID3Tree.cs Проект: XvXAdrienXvX/SmartDietPlanner-EatHealthy-

        public void validation()
        {
            var data = path;

            var csv             = new CsvReader(File.OpenText(path));
            var myCustomObjects = csv.GetRecords <MealData>();

            DataTable dt = new DataTable("FoodDBSample");
            DataRow   row;

            dt.Columns.Add("Category", "Carb", "Protein", "Fat", "Calorie", "Fiber", "Decision");
            foreach (var record in myCustomObjects)
            {
                row = dt.NewRow();


                row["Category"] = record.Category;
                row["Carb"]     = record.Carb;
                row["Protein"]  = record.Protein;
                row["Fat"]      = record.Fat;
                row["Calorie"]  = record.Calorie;
                row["Fiber"]    = record.Fiber;
                row["Decision"] = record.Outcome;

                dt.Rows.Add(row);
            }
            var codebook = new Codification(dt);

            DataTable symbols = codebook.Apply(dt);

            int[][] inputs  = symbols.ToJagged <int>("Category", "Carb", "Protein", "Fat", "Calorie", "Fiber");
            int[]   outputs = symbols.ToArray <int>("Decision");

            //specify which columns to use for making decisions
            var id3learning = new ID3Learning()
            {
                new DecisionVariable("Category", 4),
                new DecisionVariable("Carb", 2),
                new DecisionVariable("Protein", 2),
                new DecisionVariable("Fat", 2),
                new DecisionVariable("Calorie", 2),
                new DecisionVariable("Fiber", 2)
            };



            DecisionTree tree = id3learning.Learn(inputs, outputs);

            // Compute the training error
            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            // measure the cross-validation performance of
            // a decision tree with a maximum tree height of 5. With variables
            // able to join the decision path at most 2 times during evaluation:
            var cv = CrossValidation.Create(

                k: 5,                             // 5-fold cross-validation

                learner: (p) => new ID3Learning() //create the learning algorithm
            {
                new DecisionVariable("Category", 4),
                new DecisionVariable("Carb", 2),
                new DecisionVariable("Protein", 2),
                new DecisionVariable("Fat", 2),
                new DecisionVariable("Calorie", 2),
                new DecisionVariable("Fiber", 2)
            },


                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(inputs, outputs);

            //Gather info
            int numberOfSamples = result.NumberOfSamples;
            int numberOfInputs  = result.NumberOfInputs;
            int numberOfOutputs = result.NumberOfOutputs;

            double trainingError   = result.Training.Mean;
            double validationError = result.Validation.Mean;

            System.Diagnostics.Debug.WriteLine("ID3 Mean: " + validationError);
            System.Diagnostics.Debug.WriteLine("ID3 Error: " + trainingError);
        }