예제 #1
0
        /// <summary>
        /// Classify our data using support vector machine classifer and save the model.
        /// </summary>
        /// <param name="train_data">Frame objects that we will use to train classifers.</param>
        /// <param name="test_data">Frame objects that we will use to test classifers.</param>
        /// <param name="train_label">Labels of the train data.</param>
        /// <param name="test_label">Labels of the test data.</param>
        /// <param name="Classifier_Path">Path where we want to save the classifer on the disk.</param>
        /// <param name="Classifier_Name">Name of the classifer we wnat to save.</param>
        /// <returns></returns>
        public void SVM(double[][] train_data, double[][] test_data, int[] train_label, int[] test_label, String Classifier_Path, String Classifier_Name)
        {
            var learn = new SequentialMinimalOptimization <Gaussian>()
            {
                UseComplexityHeuristic = true,
                UseKernelEstimation    = true
            };

            try
            {
                SupportVectorMachine <Gaussian> svm = learn.Learn(train_data, train_label);

                bool[] prediction = svm.Decide(test_data);

                var cm = GeneralConfusionMatrix.Estimate(svm, test_data, test_label);


                double error = cm.Error;

                Console.WriteLine(error);

                svm.Save(Path.Combine(Classifier_Path, Classifier_Name));
            }
            catch (Exception e)
            { Console.WriteLine(e.StackTrace); }
        }
예제 #2
0
        /// <summary>
        /// Classify our data using Logistic Regression classifer and save the model.
        /// </summary>
        /// <param name="train_data">Frame objects that we will use to train classifers.</param>
        /// <param name="test_data">Frame objects that we will use to test classifers.</param>
        /// <param name="train_label">Labels of the train data.</param>
        /// <param name="test_label">Labels of the test data.</param>
        /// <param name="Classifier_Path">Path where we want to save the classifer on the disk.</param>
        /// <param name="Classifier_Name">Name of the classifer we wnat to save.</param>
        /// <returns></returns>
        public void LogisticRegression(double[][] train_data, double[][] test_data, int[] train_label, int[] test_label, String Classifier_Path, String Classifier_Name)
        {
            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                Tolerance      = 1e-4,
                MaxIterations  = 100,
                Regularization = 0
            };

            LogisticRegression regression = learner.Learn(train_data, train_label);

            double ageOdds   = regression.GetOddsRatio(0);
            double smokeOdds = regression.GetOddsRatio(1);

            double[] scores = regression.Probability(test_data);

            //bool[] pre = regression.Decide(test_data);

            var cm = GeneralConfusionMatrix.Estimate(regression, test_data, test_label);

            double error = cm.Error;

            Console.WriteLine(error);

            regression.Save(Path.Combine(Classifier_Path, Classifier_Name));
        }
        public double EvaluateAccuracy()
        {
            KnnModel.K = 1;
            var cm = GeneralConfusionMatrix.Estimate(KnnModel, TrainingModelInputs, TrainingModelOutputs);

            return(cm.Accuracy);
        }
예제 #4
0
        public void learn_test()
        {
            #region doc_learn_distance
            // Create some sample learning data. In this data,
            // the first two instances belong to a class, the
            // four next belong to another class and the last
            // three to yet another.

            double[][] inputs =
            {
                // The first two are from class 0
                new double[] { -5, -2, -1 },
                new double[] { -5, -5, -6 },

                // The next four are from class 1
                new double[] {  2,  1,  1 },
                new double[] {  1,  1,  2 },
                new double[] {  1,  2,  2 },
                new double[] {  3,  1,  2 },

                // The last three are from class 2
                new double[] { 11,  5,  4 },
                new double[] { 15,  5,  6 },
                new double[] { 10,  5,  6 },
            };

            int[] outputs =
            {
                0, 0,       // First two from class 0
                1, 1, 1, 1, // Next four from class 1
                2, 2, 2     // Last three from class 2
            };


            // Now we will create the K-Nearest Neighbors algorithm. For this
            // example, we will be choosing k = 4. This means that, for a given
            // instance, its nearest 4 neighbors will be used to cast a decision.
            var knn = new KNearestNeighbors <double[]>(k: 4, distance: new SquareEuclidean());

            // We learn the algorithm:
            knn.Learn(inputs, outputs);

            // After the algorithm has been created, we can classify a new instance:
            int answer = knn.Decide(new double[] { 11, 5, 4 }); // answer will be 2.

            // Let's say we would like to compute the error matrix for the classifier:
            var cm = GeneralConfusionMatrix.Estimate(knn, inputs, outputs);

            // We can use it to estimate measures such as
            double error = cm.Error;    // should be 0
            double acc   = cm.Accuracy; // should be 1
            double kappa = cm.Kappa;    // should be 1
            #endregion

            Assert.AreEqual(2, answer);
            Assert.AreEqual(0, error);
            Assert.AreEqual(1, acc);
            Assert.AreEqual(1, kappa);
        }
예제 #5
0
 /// <summary>
 /// Calculates error after training the model.
 /// </summary>
 /// <param name="testData">The test data that would be used to calculate error.</param>
 /// <param name="testOutput">The test labels that would be used to calculate error.</param>
 public override void CalculateTrainingError(List <double[]> testData, List <int> testOutput)
 {
     TrainingError = new ZeroOneLoss(testOutput.ToArray()).Loss(Model.Decide(testData.ToArray()));
     GeneralConfusionMatrix cm = GeneralConfusionMatrix.Estimate(Model, testData.ToArray(), testOutput.ToArray());
     double error     = cm.Error;     // should be 0.066666666666666652
     double accuracy  = cm.Accuracy;  // should be 0.93333333333333335
     double kappa     = cm.Kappa;     // should be 0.9
     double chiSquare = cm.ChiSquare; // should be 248.52216748768473
 }
예제 #6
0
        public static Dictionary <int, string> KnnCreate(Dictionary <List <string>, double[][]> trainingSet)
        {
            // Create some sample learning data.
            int        labelCounter           = -1;
            List <int> classesList            = new List <int>();
            Dictionary <int, string> labelMap = new Dictionary <int, string>();

            foreach (string label in trainingSet.First().Key.ToArray())
            {
                if (!labelMap.ContainsValue(label))
                {
                    labelCounter++;
                    classesList.Add(labelCounter);
                    labelMap.Add(labelCounter, label);
                    Console.WriteLine(labelCounter + ": " + label);
                }
                else
                {
                    classesList.Add(labelCounter);
                }
            }

            int[]      classes = classesList.ToArray();
            double[][] inputs  = trainingSet.First().Value;


            // Now we will create the K-Nearest Neighbors algorithm.
            // It's possible to swtich around the k: 4 for the possibility of better accuracy
            var knn = new KNearestNeighbors(k: 5);

            // We train the algorithm:
            knn.Learn(inputs, classes);

            // Let's say we would like to compute the error matrix for the classifier:
            var cm = GeneralConfusionMatrix.Estimate(knn, inputs, classes);

            // We can use it to estimate measures such as
            double error = cm.Error;    // should be
            double acc   = cm.Accuracy; // should be
            double kappa = cm.Kappa;    // should be

            Console.WriteLine("error: " + error);
            Console.WriteLine("accuracy: " + acc);
            Console.WriteLine("kappa: " + kappa);
            Console.WriteLine("pearson: " + cm.Pearson);
            for (int i = 0; i < cm.ColumnErrors.Length; i++)
            {
                if (cm.ColumnErrors[i] != 0)
                {
                    double columnerror = double.Parse(cm.ColumnErrors[i].ToString()) / double.Parse(cm.ColumnTotals[i].ToString());
                    Console.WriteLine("Error of " + labelMap[i] + ": " + columnerror);
                }
            }
            SaveKnn(knn);
            Fingerprinting.WriteLabelMap(labelMap);
            return(labelMap);
        }
예제 #7
0
    private void calculateConfusionMatrix()
    {
        GeneralConfusionMatrix cm = GeneralConfusionMatrix.Estimate(classifier, testInputs.ToArray(), testOutputs.ToArray());

        double error    = cm.Error;
        double accuracy = cm.Accuracy;

        Debug.Log("Error - " + error);
        Debug.Log("Accuracy - " + accuracy);

        testInputs.Clear();
    }
예제 #8
0
        /// <summary>
        /// Classify our data using k-nearest neighbors classifer and save the model.
        /// </summary>
        /// <param name="train_data">Frame objects that we will use to train classifers.</param>
        /// <param name="test_data">Frame objects that we will use to test classifers.</param>
        /// <param name="train_label">Labels of the train data.</param>
        /// <param name="test_label">Labels of the test data.</param>
        /// <param name="Classifier_Path">Path where we want to save the classifer on the disk.</param>
        /// <param name="Classifier_Name">Name of the classifer we wnat to save.</param>
        /// <returns></returns>
        public void Knn(double[][] train_data, double[][] test_data, int[] train_label, int[] test_label, String Classifier_Path, String Classifier_Name)
        {
            KNearestNeighbors knn = new KNearestNeighbors(k: 5);

            knn.Learn(train_data, train_label);

            int    answer = knn.Decide(new double[] { 117.07004523277283, 119.9104585647583 });
            var    cm     = GeneralConfusionMatrix.Estimate(knn, test_data, test_label);
            double error  = cm.Error;

            Console.WriteLine(error);

            knn.Save(Path.Combine(Classifier_Path, Classifier_Name));
        }
예제 #9
0
        public void SaveAccuracy(string path = @"H:\Documents\Visual Studio 2015\Projects\ML\ML\SaveResults\")
        {
            string timeAfter = InitialTime();

            var cm = GeneralConfusionMatrix.Estimate(Сlassifier, TestInputs, TestOutputs);

            using (FileStream fs = new FileStream(path + timeAfter + "_Accuracy" + Сlassifier + ".txt", FileMode.CreateNew))
            {
                using (StreamWriter writer = new StreamWriter(fs))
                {
                    writer.WriteLine("Accuracy for {0}: {1} %", Сlassifier, Math.Round(cm.Accuracy, 3) * 100);
                }
            }
        }
예제 #10
0
        public static void ClassifierStatistics()
        {
            var cmLR = GeneralConfusionMatrix.Estimate(Predictor.MultinomialLogisticRegression, Validation.PredictorPoints,
                                                       FrequencyLabelsInt);

            Console.WriteLine($"LR CM: {cmLR} \n LR Error: {cmLR.Error} LR ACcuracy: {cmLR.Accuracy}");

            var cmTree = GeneralConfusionMatrix.Estimate(Predictor.RandomForest, PredictorPoints, FrequencyLabelsInt);

            Console.WriteLine($"RF CM: {cmTree} \n RF Error: {cmTree.Error} RF ACcuracy: {cmTree.Accuracy}");

            var cmMMD = GeneralConfusionMatrix.Estimate(Predictor.MinimumMeanDistance, PredictorPoints,
                                                        FrequencyLabelsInt);

            Console.WriteLine($"MMD CM: {cmMMD} \n MMD Error: {cmMMD.Error} MMD ACcuracy: {cmMMD.Accuracy}");
        }
예제 #11
0
        public static void CalulateTrainStatisticsClassification()
        {
            Console.WriteLine("----TRAIN Statistics----");
            var cmLR = GeneralConfusionMatrix.Estimate(Predictor.MultinomialLogisticRegression, PredictorPointsTrain,
                                                       FrequencyLabelsInt);

            Console.WriteLine($"LR CM: {cmLR} \n LR Error: {cmLR.Error} LR ACcuracy: {cmLR.Accuracy}");

            var cmMMD = GeneralConfusionMatrix.Estimate(Predictor.MinimumMeanDistance, PredictorPointsTrain,
                                                        FrequencyLabelsInt);

            Console.WriteLine($"MMD CM: {cmMMD} \n MMD Error: {cmMMD.Error} MMD ACcuracy: {cmMMD.Accuracy}");

            var cmTree = GeneralConfusionMatrix.Estimate(Predictor.RandomForest, PredictorPointsTrain, FrequencyLabelsInt);

            Console.WriteLine($"RF CM: {cmTree} \n RF Error: {cmTree.Error} RF ACcuracy: {cmTree.Accuracy}");

            var electrodeString = String.Join(",", ClassificationElectrodes);

            Console.WriteLine($"Above Results for {electrodeString}");
        }
        public void learn_test_4()
        {
            #region doc_learn_2
            // This example shows how to learn a multinomial logistic regression
            // analysis in the famous Fisher's Iris dataset. It should serve to
            // demonstrate that this class does not really need to be used with
            // DataTables, Codification codebooks and other supplementary features.

            Iris iris = new Iris();

            // Load Fisher's Iris dataset:
            double[][] x = iris.Instances;
            int[]      y = iris.ClassLabels;

            // Create a new Multinomial Logistic Regression Analysis:
            var analysis = new MultinomialLogisticRegressionAnalysis();

            // Note: we could have passed the class names from iris.ClassNames and
            // variable names from iris.VariableNames during MLR instantiation as:
            //
            // var analysis = new MultinomialLogisticRegressionAnalysis()
            // {
            //     InputNames = iris.VariableNames,
            //     OutputNames = iris.ClassNames
            // };

            // However, this example is also intended to demonstrate that
            // those are not required when learning a regression analysis.

            // Learn the regression from the input and output pairs:
            MultinomialLogisticRegression regression = analysis.Learn(x, y);

            // Let's retrieve some information about what we just learned:
            int coefficients    = analysis.Coefficients.Count; // should be 11
            int numberOfInputs  = analysis.NumberOfInputs;     // should be 4
            int numberOfOutputs = analysis.NumberOfOutputs;    // should be 3

            string[] inputNames  = analysis.InputNames;        // should be "Input 1", "Input 2", "Input 3", "Input 4"
            string[] outputNames = analysis.OutputNames;       // should be "Class 0", "class 1", "class 2"

            // The regression is best visualized when it is data-bound to a
            // Windows.Forms DataGridView or WPF DataGrid. You can get the
            // values for all different coefficients and discrete values:

            // DataGridBox.Show(regression.Coefficients); // uncomment this line

            // You can get the matrix of coefficients:
            double[][] coef = analysis.CoefficientValues;

            // Should be equal to:
            double[][] expectedCoef = new double[][]
            {
                new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 },
                new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 }
            };

            // And their associated standard errors:
            double[][] stdErr = analysis.StandardErrors;

            // Should be equal to:
            double[][] expectedErr = new double[][]
            {
                new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 },
                new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 }
            };

            // We can also get statistics and hypothesis tests:
            WaldTest[][]  wald          = analysis.WaldTests;     // should all have p < 0.05
            ChiSquareTest chiSquare     = analysis.ChiSquare;     // should be p=0
            double        logLikelihood = analysis.LogLikelihood; // should be -29.558338705646587

            // You can use the regression to predict the values:
            int[] pred = regression.Transform(x);

            // And get the accuracy of the prediction if needed:
            var cm = GeneralConfusionMatrix.Estimate(regression, x, y);

            double acc   = cm.Accuracy; // should be 0.94666666666666666
            double kappa = cm.Kappa;    // should be 0.91999999999999982
            #endregion

            Assert.AreEqual(11, coefficients);
            Assert.AreEqual(4, numberOfInputs);
            Assert.AreEqual(3, numberOfOutputs);

            Assert.AreEqual(new[] { "Input 0", "Input 1", "Input 2", "Input 3" }, inputNames);
            Assert.AreEqual(new[] { "Class 0", "Class 1", "Class 2" }, outputNames);

            Assert.AreEqual(0.94666666666666666, acc, 1e-10);
            Assert.AreEqual(0.91999999999999982, kappa, 1e-10);
            Assert.AreEqual(7.8271969268290043E-54, chiSquare.PValue, 1e-8);
            Assert.AreEqual(-29.558338705646587, logLikelihood, 1e-8);
        }
        public void learn_test()
        {
            // http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm
            #region doc_learn_1
            // This example downloads an example dataset from the web and learns a multinomial logistic
            // regression on it. However, please keep in mind that the Multinomial Logistic Regression
            // can also work without many of the elements that will be shown below, like the codebook,
            // DataTables, and a CsvReader.

            // Let's download an example dataset from the web to learn a multinomial logistic regression:
            CsvReader reader = CsvReader.FromUrl("https://raw.githubusercontent.com/rlowrance/re/master/hsbdemo.csv", hasHeaders: true);

            // Let's read the CSV into a DataTable. As mentioned above, this step
            // can help, but is not necessarily required for learning a the model:
            DataTable table = reader.ToTable();

            // We will learn a MLR regression between the following input and output fields of this table:
            string[] inputNames  = new[] { "write", "ses" };
            string[] outputNames = new[] { "prog" };

            // Now let's create a codification codebook to convert the string fields in the data
            // into integer symbols. This is required because the MLR model can only learn from
            // numeric data, so strings have to be transformed first. We can force a particular
            // interpretation for those columns if needed, as shown in the initializer below:
            var codification = new Codification()
            {
                { "write", CodificationVariable.Continuous },
                { "ses", CodificationVariable.CategoricalWithBaseline, new[] { "low", "middle", "high" } },
                { "prog", CodificationVariable.Categorical, new[] { "academic", "general" } },
            };

            // Learn the codification
            codification.Learn(table);

            // Now, transform symbols into a vector representation, growing the number of inputs:
            double[][] x = codification.Transform(table, inputNames, out inputNames).ToDouble();
            double[][] y = codification.Transform(table, outputNames, out outputNames).ToDouble();

            // Create a new Multinomial Logistic Regression Analysis:
            var analysis = new MultinomialLogisticRegressionAnalysis()
            {
                InputNames  = inputNames,
                OutputNames = outputNames,
            };

            // Learn the regression from the input and output pairs:
            MultinomialLogisticRegression regression = analysis.Learn(x, y);

            // Let's retrieve some information about what we just learned:
            int coefficients    = analysis.Coefficients.Count; // should be 9
            int numberOfInputs  = analysis.NumberOfInputs;     // should be 3
            int numberOfOutputs = analysis.NumberOfOutputs;    // should be 3

            inputNames  = analysis.InputNames;                 // should be "write", "ses: middle", "ses: high"
            outputNames = analysis.OutputNames;                // should be "prog: academic", "prog: general", "prog: vocation"

            // The regression is best visualized when it is data-bound to a
            // Windows.Forms DataGridView or WPF DataGrid. You can get the
            // values for all different coefficients and discrete values:

            // DataGridBox.Show(regression.Coefficients); // uncomment this line

            // You can get the matrix of coefficients:
            double[][] coef = analysis.CoefficientValues;

            // Should be equal to:
            double[][] expectedCoef = new double[][]
            {
                new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 },
                new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 }
            };

            // And their associated standard errors:
            double[][] stdErr = analysis.StandardErrors;

            // Should be equal to:
            double[][] expectedErr = new double[][]
            {
                new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 },
                new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 }
            };

            // We can also get statistics and hypothesis tests:
            WaldTest[][]  wald          = analysis.WaldTests;     // should all have p < 0.05
            ChiSquareTest chiSquare     = analysis.ChiSquare;     // should be p=1.06300120956871E-08
            double        logLikelihood = analysis.LogLikelihood; // should be -179.98173272217591

            // You can use the regression to predict the values:
            int[] pred = regression.Transform(x);

            // And get the accuracy of the prediction if needed:
            var cm = GeneralConfusionMatrix.Estimate(regression, x, y.ArgMax(dimension: 1));

            double acc   = cm.Accuracy; // should be 0.61
            double kappa = cm.Kappa;    // should be 0.2993487536492252
            #endregion


            Assert.AreEqual(9, coefficients);
            Assert.AreEqual(3, numberOfInputs);
            Assert.AreEqual(3, numberOfOutputs);

            Assert.AreEqual(new[] { "write", "ses: middle", "ses: high" }, inputNames);
            Assert.AreEqual(new[] { "prog: academic", "prog: general", "prog: vocation" }, outputNames);

            Assert.AreEqual(0.61, acc, 1e-10);
            Assert.AreEqual(0.2993487536492252, kappa, 1e-10);
            Assert.AreEqual(1.06300120956871E-08, chiSquare.PValue, 1e-8);
            Assert.AreEqual(-179.98172637136295, logLikelihood, 1e-8);

            testmlr(analysis);
        }
예제 #14
0
        public void learn_test1()
        {
            string basePath = NUnit.Framework.TestContext.CurrentContext.TestDirectory;

            #region doc_learn
            // Create some sample learning data. In this data,
            // the first two instances belong to a class, the
            // four next belong to another class and the last
            // three to yet another.

            double[][] inputs =
            {
                // The first two are from class 0
                new double[] { -5, -2, -1 },
                new double[] { -5, -5, -6 },

                // The next four are from class 1
                new double[] {  2,  1,  1 },
                new double[] {  1,  1,  2 },
                new double[] {  1,  2,  2 },
                new double[] {  3,  1,  2 },

                // The last three are from class 2
                new double[] { 11,  5,  4 },
                new double[] { 15,  5,  6 },
                new double[] { 10,  5,  6 },
            };

            int[] outputs =
            {
                0, 0,       // First two from class 0
                1, 1, 1, 1, // Next four from class 1
                2, 2, 2     // Last three from class 2
            };


            // Now we will create the K-Nearest Neighbors algorithm. For this
            // example, we will be choosing k = 4. This means that, for a given
            // instance, its nearest 4 neighbors will be used to cast a decision.
            var knn = new KNearestNeighbors(k: 4);

            // We learn the algorithm:
            knn.Learn(inputs, outputs);

            // After the algorithm has been created, we can classify a new instance:
            int answer = knn.Decide(new double[] { 11, 5, 4 }); // answer will be 2.

            // Let's say we would like to compute the error matrix for the classifier:
            var cm = GeneralConfusionMatrix.Estimate(knn, inputs, outputs);

            // We can use it to estimate measures such as
            double error = cm.Error;    // should be
            double acc   = cm.Accuracy; // should be
            double kappa = cm.Kappa;    // should be
            #endregion

            Assert.AreEqual(2, answer);
            Assert.AreEqual(0, error);
            Assert.AreEqual(1, acc);
            Assert.AreEqual(1, kappa);

#if !NO_BINARY_SERIALIZATION
            #region doc_serialization
            // After we have created and learned our model, let's say we would
            // like to save it to disk. For this, we can import the Accord.IO
            // namespace at the top of our source file namespace, and then use
            // Serializer's extension method Save:

            // Save to a file called "knn.bin" in the basePath directory:
            knn.Save(Path.Combine(basePath, "knn.bin"));

            // To load it back from the disk, we might need to use the Serializer class directly:
            var loaded_knn = Serializer.Load <KNearestNeighbors>(Path.Combine(basePath, "knn.bin"));

            // At this point, knn and loaded_knn should be
            // two different instances of identical objects.
            #endregion

            // Make sure the loaded classifier is still working
            Assert.AreEqual(2, loaded_knn.Decide(new double[] { 11, 5, 4 }));
            cm = GeneralConfusionMatrix.Estimate(loaded_knn, inputs, outputs);
            Assert.AreEqual(0, cm.Error);
            Assert.AreEqual(1, cm.Accuracy);
            Assert.AreEqual(1, cm.Kappa);

            Assert.AreEqual(knn.ClassCount, loaded_knn.ClassCount);
            Assert.AreEqual(knn.Distance, loaded_knn.Distance);
            Assert.AreEqual(knn.K, loaded_knn.K);
            Assert.AreEqual(knn.NumberOfClasses, loaded_knn.NumberOfClasses);
            Assert.AreEqual(knn.NumberOfInputs, loaded_knn.NumberOfInputs);
            Assert.AreEqual(knn.NumberOfOutputs, loaded_knn.NumberOfOutputs);
            Assert.AreEqual(knn.Outputs, loaded_knn.Outputs);
            Assert.AreEqual(knn.Token, loaded_knn.Token);
#endif
        }
예제 #15
0
        /// <summary>
        /// Вывод точности в процентах.
        /// Для вычисления должен быть параметр testOutputs - ожидаемые значения
        /// </summary>
        public void PrintAccuracy()
        {
            var cm = GeneralConfusionMatrix.Estimate(Сlassifier, TestInputs, TestOutputs);

            Console.WriteLine("Accuracy for {0}: {1} %", Сlassifier, Math.Round(cm.Accuracy, 3) * 100);
        }
예제 #16
0
        private void generateButton_Click(object sender, RoutedEventArgs e)
        {
            Dictionary <string, int> classesDict = new Dictionary <string, int>();

            double[][] inputs;
            inputs = paramPassList.Select(list => list.ToArray()).ToArray();
            var        knn          = new KNearestNeighbors(k: 4);
            List <int> groupClasses = new List <int>();
            int        clasessCount = 0;
            string     currGroup    = groupList[0].groupName;

            classesDict.Add(currGroup, clasessCount);
            foreach (var group in groupList)
            {
                if (!currGroup.Equals(group.groupName))
                {
                    clasessCount++;
                    currGroup = group.groupName;
                    classesDict.Add(currGroup, clasessCount);
                }
                for (int i = 0; i < group.groupSize; i++)
                {
                    groupClasses.Add(clasessCount);
                    objNumber++;
                }
            }
            objNumber++;
            int[] outputs;
            outputs = groupClasses.ToArray();
            // We learn the algorithm:
            knn.Learn(inputs, outputs);
            var cm = GeneralConfusionMatrix.Estimate(knn, inputs, outputs);

            // We can use it to estimate measures such as
            double error = cm.Error;    // should be
            double acc   = cm.Accuracy; // should be
            double kappa = cm.Kappa;    // should be


            List <int>      testOutputsList = new List <int>();
            List <double[]> testInputsList  = new List <double[]>();


            using (var dialog = new System.Windows.Forms.FolderBrowserDialog())
            {
                System.Windows.Forms.DialogResult result = dialog.ShowDialog();
                if (result == System.Windows.Forms.DialogResult.OK)
                {
                    string resultsToWrite = "";
                    foreach (var item in groupScoreList)
                    {
                        int counter = 0;
                        for (int i = 0; i < item.groupSize; i++)
                        {
                            while (counter != 100)
                            {
                                List <double> next = new List <double>();
                                for (int j = 0; j < paramSize; j++)
                                {
                                    next.Add(rnd.Next(2));
                                }
                                double[] paramValues = next.ToArray();

                                double scoreValue = knn.Score(paramValues, classesDict[item.groupName]);
                                if ((scoreValue * 100) > item.groupScore)
                                {
                                    resultsToWrite += "Obiekt" + objNumber + " - " + item.groupName + ",";
                                    objNumber++;
                                    foreach (var para in paramValues)
                                    {
                                        resultsToWrite += para.ToString() + ",";
                                    }
                                    resultsToWrite  = resultsToWrite.TrimEnd(',');
                                    resultsToWrite += Environment.NewLine;

                                    testInputsList.Add(paramValues);                  //////add to test inputs
                                    testOutputsList.Add(classesDict[item.groupName]); ///////and outputs

                                    break;
                                }
                            }
                            counter = 0;
                        }
                    }
                    var knntest = new KNearestNeighbors(k: 4);
                    knntest.Learn(testInputsList.ToArray(), testOutputsList.ToArray());
                    var cmtest = GeneralConfusionMatrix.Estimate(knntest, testInputsList.ToArray(), testOutputsList.ToArray());
                    // We can use it to estimate measures such as
                    double errortest = cmtest.Error;    // should be
                    double acctest   = cmtest.Accuracy; // should be
                    double kappatest = cmtest.Kappa;    // should be


                    int percent70 = (int)(outputs.Length * 0.7);
                    int percent30 = outputs.Length - percent70;

                    int[] randompicks70 = new int[percent70];
                    int[] randompicks30 = new int[percent30];

                    int random;
                    for (int i = 0; i < percent70; i++)
                    {
                        do
                        {
                            random = rnd.Next(outputs.Length);
                        } while (randompicks70.Contains(random));
                        randompicks70[i] = random;
                    }
                    int random30counter = 0;
                    for (int i = 0; i < outputs.Length; i++)
                    {
                        if (!randompicks70.Contains(i))
                        {
                            randompicks30[random30counter] = i;
                            random30counter++;
                        }
                    }

                    int[]      outputs70 = new int[percent70];
                    int[]      outputs30 = new int[percent30];
                    double[][] inputs70  = new double[percent70][];
                    double[][] inputs30  = new double[percent30][];

                    for (int i = 0; i < percent70; i++)
                    {
                        inputs70[i]  = inputs[randompicks70[i]];
                        outputs70[i] = outputs[randompicks70[i]];
                    }
                    for (int i = 0; i < percent30; i++)
                    {
                        inputs30[i]  = inputs[randompicks30[i]];
                        outputs30[i] = outputs[randompicks30[i]];
                    }
                    var knn70percent = new KNearestNeighbors(k: 4);
                    knn70percent.Learn(inputs70, outputs70);
                    var cm70percent = GeneralConfusionMatrix.Estimate(knn70percent, inputs70, outputs70);
                    // We can use it to estimate measures such as
                    double error70percent = cm70percent.Error;    // should be
                    double acc70percent   = cm70percent.Accuracy; // should be
                    double kappa70percent = cm70percent.Kappa;    // should be



                    double score70   = 0;
                    double scoretest = 0;
                    for (int i = 0; i < inputs30.Length; i++)
                    {
                        var testvalue1 = knn70percent.Score(inputs30[i], outputs30[i]);
                        var testvalue2 = knntest.Score(inputs30[i], outputs30[i]);
                        score70   += testvalue1;
                        scoretest += testvalue2;
                    }
                    score70   = score70 / inputs30.Length;
                    scoretest = scoretest / inputs30.Length;
                    ReadFromFileValidation validateWindow = new ReadFromFileValidation(cm, cmtest, cm70percent, score70, scoretest);
                    validateWindow.Show();
                    try
                    {
                        string path = dialog.SelectedPath + "\\\\" + "ExtendedExamples.txt";
                        System.IO.File.WriteAllText(path, resultsToWrite);
                    }
                    catch (Exception)
                    {
                        MessageBox.Show("Coś Poszło nie tak", "Wynik Generacji", MessageBoxButton.OK, MessageBoxImage.Warning);
                        throw;
                    }
                    MessageBox.Show("Wygenerowano Plik", "Wynik Generacji", MessageBoxButton.OK);
                }
            }
        }
        public void learn()
        {
            string basePath = Path.Combine(NUnit.Framework.TestContext.CurrentContext.TestDirectory, "learn");

            #region doc_learn
            // Ensure results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // The Bag-of-Audio-Words model converts audio signals of arbitrary
            // size into fixed-length feature vectors. In this example, we
            // will be setting the codebook size to 10. This means all feature
            // vectors that will be generated will have the same length of 10.

            // By default, the BoW object will use the MFCC extractor as the
            // feature extractor and K-means as the clustering algorithm.

            // Create a new Bag-of-Audio-Words (BoW) model
            var bow = BagOfAudioWords.Create(numberOfWords: 32);
            // Note: a simple BoW model can also be created using
            // var bow = new BagOfAudioWords(numberOfWords: 10);

            // Get some training images
            FreeSpokenDigitsDataset fsdd = new FreeSpokenDigitsDataset(basePath);
            string[] trainFileNames      = fsdd.Training.LocalPaths;
            int[]    trainOutputs        = fsdd.Training.Digits;

            // Compute the model
            bow.Learn(trainFileNames);

            // After this point, we will be able to translate
            // the signals into double[] feature vectors using
            double[][] trainInputs = bow.Transform(trainFileNames);

            // We can also check some statistics about the dataset:
            int numberOfSignals = bow.Statistics.TotalNumberOfInstances; // 1350

            // Statistics about all the descriptors that have been extracted:
            int      totalDescriptors = bow.Statistics.TotalNumberOfDescriptors;                     // 29106
            double   totalMean        = bow.Statistics.TotalNumberOfDescriptorsPerInstance.Mean;     // 21.56
            double   totalVar         = bow.Statistics.TotalNumberOfDescriptorsPerInstance.Variance; // 52.764002965159314
            IntRange totalRange       = bow.Statistics.TotalNumberOfDescriptorsPerInstanceRange;     // [8, 115]

            // Statistics only about the descriptors that have been actually used:
            int      takenDescriptors = bow.Statistics.NumberOfDescriptorsTaken;                     // 29106
            double   takenMean        = bow.Statistics.NumberOfDescriptorsTakenPerInstance.Mean;     // 21.56
            double   takenVar         = bow.Statistics.NumberOfDescriptorsTakenPerInstance.Variance; // 52.764002965159314
            IntRange takenRange       = bow.Statistics.NumberOfDescriptorsTakenPerInstanceRange;     // [8, 115]
            #endregion

            Assert.AreEqual(1350, numberOfSignals);

            Assert.AreEqual(29106, totalDescriptors);
            Assert.AreEqual(21.56, totalMean);
            Assert.AreEqual(52.764002965159314, totalVar, 1e-8);
            Assert.AreEqual(new IntRange(8, 115), totalRange);

            Assert.AreEqual(29106, takenDescriptors);
            Assert.AreEqual(21.56, takenMean);
            Assert.AreEqual(52.764002965159314, takenVar, 1e-8);
            Assert.AreEqual(new IntRange(8, 115), takenRange);


            var kmeans = bow.Clustering as KMeans;
            Assert.AreEqual(13, kmeans.Clusters.NumberOfInputs);
            Assert.AreEqual(32, kmeans.Clusters.NumberOfOutputs);
            Assert.AreEqual(32, kmeans.Clusters.NumberOfClasses);

            #region doc_classification

            // Now, the features can be used to train any classification
            // algorithm as if they were the signals themselves. For example,
            // we can use them to train an Chi-square SVM as shown below:

            // Create the SMO algorithm to learn a Chi-Square kernel SVM
            var teacher = new MulticlassSupportVectorLearning <ChiSquare>()
            {
                Learner = (p) => new SequentialMinimalOptimization <ChiSquare>()
            };

            // Obtain a learned machine
            var svm = teacher.Learn(trainInputs, trainOutputs);

            // Use the machine to classify the features
            int[] output = svm.Decide(trainInputs);

            // Compute the error between the expected and predicted labels for the training set:
            var    trainMetrics = GeneralConfusionMatrix.Estimate(svm, trainInputs, trainOutputs);
            double trainAcc     = trainMetrics.Accuracy; // should be around 0.97259259259259256

            // Now, we can evaluate the performance of the model on the testing set:
            string[] testFileNames = fsdd.Testing.LocalPaths;
            int[]    testOutputs   = fsdd.Testing.Digits;

            // First we transform the testing set to double[]:
            double[][] testInputs = bow.Transform(testFileNames);

            // Then we compute the error between expected and predicted for the testing set:
            var    testMetrics = GeneralConfusionMatrix.Estimate(svm, testInputs, testOutputs);
            double testAcc     = testMetrics.Accuracy; // should be around 0.8666666666666667
            #endregion

            Assert.AreEqual(0.97259259259259256, trainAcc, 1e-8);
            Assert.AreEqual(0.8666666666666667, testAcc, 1e-8);
        }
예제 #18
0
        static KNearestNeighbors kNearestNeighbours(List <int[]> trainingData, List <int[]> testingData, out double precision)
        {
            KNearestNeighbors temp   = null;
            int    testingCount      = testingData.Count / 10;
            int    trainingCount     = testingData.Count - testingCount;
            double errorAverage      = 0;
            double prec              = 0;
            int    indexTestingStart = testingData.Count - testingCount;
            int    indexTestingEnd   = testingData.Count;

            Console.WriteLine("k nearest neighbours Classification");
            for (int i = 0; i < 10; i++)
            {
                var     watch = System.Diagnostics.Stopwatch.StartNew();
                int[][] inputData, testinputData;
                int[]   outputData, testoutputData;
                PrepareInputOutput(out inputData, out outputData, out testinputData, out testoutputData, trainingData, testingData, indexTestingStart, indexTestingEnd);
                double[][] input = new double[inputData.GetLength(0)][];
                double     a     = 0;
                for (int j = 0; j < inputData.GetLength(0); j++)
                {
                    input[j] = new double[10];
                    for (int k = 0; k < 10; k++)
                    {
                        a           = Convert.ToDouble(inputData[j][k]);
                        input[j][k] = a;
                    }
                }
                double[][] testin = new double[testinputData.Length / 1000][];
                for (int j = 0; j < testinputData.Length / 1000; j++)
                {
                    testin[j] = new double[10];
                    for (int k = 0; k < 10; k++)
                    {
                        testin[j][k] = testinputData[j][k];
                    }
                }
                int[] testout = new int[testinputData.Length / 1000];
                for (int j = 0; j < testinputData.Length / 1000; j++)
                {
                    testout[j] = testoutputData[j];
                }
                var knn = new KNearestNeighbors(k: 4);
                knn.Learn(input, outputData);
                var    cm    = GeneralConfusionMatrix.Estimate(knn, testin, testout);
                double error = cm.Error;
                double acc   = cm.Accuracy;
                double kappa = cm.Kappa;
                watch.Stop();
                var elapsedMs = watch.ElapsedMilliseconds;
                Console.WriteLine("Iteracija baigta per: {0}ms", elapsedMs);
                Console.WriteLine("Iteracijos tikslumas: {0}", acc);
                if (acc > prec)
                {
                    prec = acc;
                    temp = knn;
                }
                indexTestingEnd    = indexTestingStart;
                indexTestingStart -= testingCount;
            }
            precision = 1 - (errorAverage / iterations);
            return(temp);
        }