/// <summary> /// Classify our data using support vector machine classifer and save the model. /// </summary> /// <param name="train_data">Frame objects that we will use to train classifers.</param> /// <param name="test_data">Frame objects that we will use to test classifers.</param> /// <param name="train_label">Labels of the train data.</param> /// <param name="test_label">Labels of the test data.</param> /// <param name="Classifier_Path">Path where we want to save the classifer on the disk.</param> /// <param name="Classifier_Name">Name of the classifer we wnat to save.</param> /// <returns></returns> public void SVM(double[][] train_data, double[][] test_data, int[] train_label, int[] test_label, String Classifier_Path, String Classifier_Name) { var learn = new SequentialMinimalOptimization <Gaussian>() { UseComplexityHeuristic = true, UseKernelEstimation = true }; try { SupportVectorMachine <Gaussian> svm = learn.Learn(train_data, train_label); bool[] prediction = svm.Decide(test_data); var cm = GeneralConfusionMatrix.Estimate(svm, test_data, test_label); double error = cm.Error; Console.WriteLine(error); svm.Save(Path.Combine(Classifier_Path, Classifier_Name)); } catch (Exception e) { Console.WriteLine(e.StackTrace); } }
/// <summary> /// Classify our data using Logistic Regression classifer and save the model. /// </summary> /// <param name="train_data">Frame objects that we will use to train classifers.</param> /// <param name="test_data">Frame objects that we will use to test classifers.</param> /// <param name="train_label">Labels of the train data.</param> /// <param name="test_label">Labels of the test data.</param> /// <param name="Classifier_Path">Path where we want to save the classifer on the disk.</param> /// <param name="Classifier_Name">Name of the classifer we wnat to save.</param> /// <returns></returns> public void LogisticRegression(double[][] train_data, double[][] test_data, int[] train_label, int[] test_label, String Classifier_Path, String Classifier_Name) { var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, MaxIterations = 100, Regularization = 0 }; LogisticRegression regression = learner.Learn(train_data, train_label); double ageOdds = regression.GetOddsRatio(0); double smokeOdds = regression.GetOddsRatio(1); double[] scores = regression.Probability(test_data); //bool[] pre = regression.Decide(test_data); var cm = GeneralConfusionMatrix.Estimate(regression, test_data, test_label); double error = cm.Error; Console.WriteLine(error); regression.Save(Path.Combine(Classifier_Path, Classifier_Name)); }
public double EvaluateAccuracy() { KnnModel.K = 1; var cm = GeneralConfusionMatrix.Estimate(KnnModel, TrainingModelInputs, TrainingModelOutputs); return(cm.Accuracy); }
public void learn_test() { #region doc_learn_distance // Create some sample learning data. In this data, // the first two instances belong to a class, the // four next belong to another class and the last // three to yet another. double[][] inputs = { // The first two are from class 0 new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, // The next four are from class 1 new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, // The last three are from class 2 new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; int[] outputs = { 0, 0, // First two from class 0 1, 1, 1, 1, // Next four from class 1 2, 2, 2 // Last three from class 2 }; // Now we will create the K-Nearest Neighbors algorithm. For this // example, we will be choosing k = 4. This means that, for a given // instance, its nearest 4 neighbors will be used to cast a decision. var knn = new KNearestNeighbors <double[]>(k: 4, distance: new SquareEuclidean()); // We learn the algorithm: knn.Learn(inputs, outputs); // After the algorithm has been created, we can classify a new instance: int answer = knn.Decide(new double[] { 11, 5, 4 }); // answer will be 2. // Let's say we would like to compute the error matrix for the classifier: var cm = GeneralConfusionMatrix.Estimate(knn, inputs, outputs); // We can use it to estimate measures such as double error = cm.Error; // should be 0 double acc = cm.Accuracy; // should be 1 double kappa = cm.Kappa; // should be 1 #endregion Assert.AreEqual(2, answer); Assert.AreEqual(0, error); Assert.AreEqual(1, acc); Assert.AreEqual(1, kappa); }
/// <summary> /// Calculates error after training the model. /// </summary> /// <param name="testData">The test data that would be used to calculate error.</param> /// <param name="testOutput">The test labels that would be used to calculate error.</param> public override void CalculateTrainingError(List <double[]> testData, List <int> testOutput) { TrainingError = new ZeroOneLoss(testOutput.ToArray()).Loss(Model.Decide(testData.ToArray())); GeneralConfusionMatrix cm = GeneralConfusionMatrix.Estimate(Model, testData.ToArray(), testOutput.ToArray()); double error = cm.Error; // should be 0.066666666666666652 double accuracy = cm.Accuracy; // should be 0.93333333333333335 double kappa = cm.Kappa; // should be 0.9 double chiSquare = cm.ChiSquare; // should be 248.52216748768473 }
public static Dictionary <int, string> KnnCreate(Dictionary <List <string>, double[][]> trainingSet) { // Create some sample learning data. int labelCounter = -1; List <int> classesList = new List <int>(); Dictionary <int, string> labelMap = new Dictionary <int, string>(); foreach (string label in trainingSet.First().Key.ToArray()) { if (!labelMap.ContainsValue(label)) { labelCounter++; classesList.Add(labelCounter); labelMap.Add(labelCounter, label); Console.WriteLine(labelCounter + ": " + label); } else { classesList.Add(labelCounter); } } int[] classes = classesList.ToArray(); double[][] inputs = trainingSet.First().Value; // Now we will create the K-Nearest Neighbors algorithm. // It's possible to swtich around the k: 4 for the possibility of better accuracy var knn = new KNearestNeighbors(k: 5); // We train the algorithm: knn.Learn(inputs, classes); // Let's say we would like to compute the error matrix for the classifier: var cm = GeneralConfusionMatrix.Estimate(knn, inputs, classes); // We can use it to estimate measures such as double error = cm.Error; // should be double acc = cm.Accuracy; // should be double kappa = cm.Kappa; // should be Console.WriteLine("error: " + error); Console.WriteLine("accuracy: " + acc); Console.WriteLine("kappa: " + kappa); Console.WriteLine("pearson: " + cm.Pearson); for (int i = 0; i < cm.ColumnErrors.Length; i++) { if (cm.ColumnErrors[i] != 0) { double columnerror = double.Parse(cm.ColumnErrors[i].ToString()) / double.Parse(cm.ColumnTotals[i].ToString()); Console.WriteLine("Error of " + labelMap[i] + ": " + columnerror); } } SaveKnn(knn); Fingerprinting.WriteLabelMap(labelMap); return(labelMap); }
private void calculateConfusionMatrix() { GeneralConfusionMatrix cm = GeneralConfusionMatrix.Estimate(classifier, testInputs.ToArray(), testOutputs.ToArray()); double error = cm.Error; double accuracy = cm.Accuracy; Debug.Log("Error - " + error); Debug.Log("Accuracy - " + accuracy); testInputs.Clear(); }
/// <summary> /// Classify our data using k-nearest neighbors classifer and save the model. /// </summary> /// <param name="train_data">Frame objects that we will use to train classifers.</param> /// <param name="test_data">Frame objects that we will use to test classifers.</param> /// <param name="train_label">Labels of the train data.</param> /// <param name="test_label">Labels of the test data.</param> /// <param name="Classifier_Path">Path where we want to save the classifer on the disk.</param> /// <param name="Classifier_Name">Name of the classifer we wnat to save.</param> /// <returns></returns> public void Knn(double[][] train_data, double[][] test_data, int[] train_label, int[] test_label, String Classifier_Path, String Classifier_Name) { KNearestNeighbors knn = new KNearestNeighbors(k: 5); knn.Learn(train_data, train_label); int answer = knn.Decide(new double[] { 117.07004523277283, 119.9104585647583 }); var cm = GeneralConfusionMatrix.Estimate(knn, test_data, test_label); double error = cm.Error; Console.WriteLine(error); knn.Save(Path.Combine(Classifier_Path, Classifier_Name)); }
public void SaveAccuracy(string path = @"H:\Documents\Visual Studio 2015\Projects\ML\ML\SaveResults\") { string timeAfter = InitialTime(); var cm = GeneralConfusionMatrix.Estimate(Сlassifier, TestInputs, TestOutputs); using (FileStream fs = new FileStream(path + timeAfter + "_Accuracy" + Сlassifier + ".txt", FileMode.CreateNew)) { using (StreamWriter writer = new StreamWriter(fs)) { writer.WriteLine("Accuracy for {0}: {1} %", Сlassifier, Math.Round(cm.Accuracy, 3) * 100); } } }
public static void ClassifierStatistics() { var cmLR = GeneralConfusionMatrix.Estimate(Predictor.MultinomialLogisticRegression, Validation.PredictorPoints, FrequencyLabelsInt); Console.WriteLine($"LR CM: {cmLR} \n LR Error: {cmLR.Error} LR ACcuracy: {cmLR.Accuracy}"); var cmTree = GeneralConfusionMatrix.Estimate(Predictor.RandomForest, PredictorPoints, FrequencyLabelsInt); Console.WriteLine($"RF CM: {cmTree} \n RF Error: {cmTree.Error} RF ACcuracy: {cmTree.Accuracy}"); var cmMMD = GeneralConfusionMatrix.Estimate(Predictor.MinimumMeanDistance, PredictorPoints, FrequencyLabelsInt); Console.WriteLine($"MMD CM: {cmMMD} \n MMD Error: {cmMMD.Error} MMD ACcuracy: {cmMMD.Accuracy}"); }
public static void CalulateTrainStatisticsClassification() { Console.WriteLine("----TRAIN Statistics----"); var cmLR = GeneralConfusionMatrix.Estimate(Predictor.MultinomialLogisticRegression, PredictorPointsTrain, FrequencyLabelsInt); Console.WriteLine($"LR CM: {cmLR} \n LR Error: {cmLR.Error} LR ACcuracy: {cmLR.Accuracy}"); var cmMMD = GeneralConfusionMatrix.Estimate(Predictor.MinimumMeanDistance, PredictorPointsTrain, FrequencyLabelsInt); Console.WriteLine($"MMD CM: {cmMMD} \n MMD Error: {cmMMD.Error} MMD ACcuracy: {cmMMD.Accuracy}"); var cmTree = GeneralConfusionMatrix.Estimate(Predictor.RandomForest, PredictorPointsTrain, FrequencyLabelsInt); Console.WriteLine($"RF CM: {cmTree} \n RF Error: {cmTree.Error} RF ACcuracy: {cmTree.Accuracy}"); var electrodeString = String.Join(",", ClassificationElectrodes); Console.WriteLine($"Above Results for {electrodeString}"); }
public void learn_test_4() { #region doc_learn_2 // This example shows how to learn a multinomial logistic regression // analysis in the famous Fisher's Iris dataset. It should serve to // demonstrate that this class does not really need to be used with // DataTables, Codification codebooks and other supplementary features. Iris iris = new Iris(); // Load Fisher's Iris dataset: double[][] x = iris.Instances; int[] y = iris.ClassLabels; // Create a new Multinomial Logistic Regression Analysis: var analysis = new MultinomialLogisticRegressionAnalysis(); // Note: we could have passed the class names from iris.ClassNames and // variable names from iris.VariableNames during MLR instantiation as: // // var analysis = new MultinomialLogisticRegressionAnalysis() // { // InputNames = iris.VariableNames, // OutputNames = iris.ClassNames // }; // However, this example is also intended to demonstrate that // those are not required when learning a regression analysis. // Learn the regression from the input and output pairs: MultinomialLogisticRegression regression = analysis.Learn(x, y); // Let's retrieve some information about what we just learned: int coefficients = analysis.Coefficients.Count; // should be 11 int numberOfInputs = analysis.NumberOfInputs; // should be 4 int numberOfOutputs = analysis.NumberOfOutputs; // should be 3 string[] inputNames = analysis.InputNames; // should be "Input 1", "Input 2", "Input 3", "Input 4" string[] outputNames = analysis.OutputNames; // should be "Class 0", "class 1", "class 2" // The regression is best visualized when it is data-bound to a // Windows.Forms DataGridView or WPF DataGrid. You can get the // values for all different coefficients and discrete values: // DataGridBox.Show(regression.Coefficients); // uncomment this line // You can get the matrix of coefficients: double[][] coef = analysis.CoefficientValues; // Should be equal to: double[][] expectedCoef = new double[][] { new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 }, new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 } }; // And their associated standard errors: double[][] stdErr = analysis.StandardErrors; // Should be equal to: double[][] expectedErr = new double[][] { new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 }, new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 } }; // We can also get statistics and hypothesis tests: WaldTest[][] wald = analysis.WaldTests; // should all have p < 0.05 ChiSquareTest chiSquare = analysis.ChiSquare; // should be p=0 double logLikelihood = analysis.LogLikelihood; // should be -29.558338705646587 // You can use the regression to predict the values: int[] pred = regression.Transform(x); // And get the accuracy of the prediction if needed: var cm = GeneralConfusionMatrix.Estimate(regression, x, y); double acc = cm.Accuracy; // should be 0.94666666666666666 double kappa = cm.Kappa; // should be 0.91999999999999982 #endregion Assert.AreEqual(11, coefficients); Assert.AreEqual(4, numberOfInputs); Assert.AreEqual(3, numberOfOutputs); Assert.AreEqual(new[] { "Input 0", "Input 1", "Input 2", "Input 3" }, inputNames); Assert.AreEqual(new[] { "Class 0", "Class 1", "Class 2" }, outputNames); Assert.AreEqual(0.94666666666666666, acc, 1e-10); Assert.AreEqual(0.91999999999999982, kappa, 1e-10); Assert.AreEqual(7.8271969268290043E-54, chiSquare.PValue, 1e-8); Assert.AreEqual(-29.558338705646587, logLikelihood, 1e-8); }
public void learn_test() { // http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm #region doc_learn_1 // This example downloads an example dataset from the web and learns a multinomial logistic // regression on it. However, please keep in mind that the Multinomial Logistic Regression // can also work without many of the elements that will be shown below, like the codebook, // DataTables, and a CsvReader. // Let's download an example dataset from the web to learn a multinomial logistic regression: CsvReader reader = CsvReader.FromUrl("https://raw.githubusercontent.com/rlowrance/re/master/hsbdemo.csv", hasHeaders: true); // Let's read the CSV into a DataTable. As mentioned above, this step // can help, but is not necessarily required for learning a the model: DataTable table = reader.ToTable(); // We will learn a MLR regression between the following input and output fields of this table: string[] inputNames = new[] { "write", "ses" }; string[] outputNames = new[] { "prog" }; // Now let's create a codification codebook to convert the string fields in the data // into integer symbols. This is required because the MLR model can only learn from // numeric data, so strings have to be transformed first. We can force a particular // interpretation for those columns if needed, as shown in the initializer below: var codification = new Codification() { { "write", CodificationVariable.Continuous }, { "ses", CodificationVariable.CategoricalWithBaseline, new[] { "low", "middle", "high" } }, { "prog", CodificationVariable.Categorical, new[] { "academic", "general" } }, }; // Learn the codification codification.Learn(table); // Now, transform symbols into a vector representation, growing the number of inputs: double[][] x = codification.Transform(table, inputNames, out inputNames).ToDouble(); double[][] y = codification.Transform(table, outputNames, out outputNames).ToDouble(); // Create a new Multinomial Logistic Regression Analysis: var analysis = new MultinomialLogisticRegressionAnalysis() { InputNames = inputNames, OutputNames = outputNames, }; // Learn the regression from the input and output pairs: MultinomialLogisticRegression regression = analysis.Learn(x, y); // Let's retrieve some information about what we just learned: int coefficients = analysis.Coefficients.Count; // should be 9 int numberOfInputs = analysis.NumberOfInputs; // should be 3 int numberOfOutputs = analysis.NumberOfOutputs; // should be 3 inputNames = analysis.InputNames; // should be "write", "ses: middle", "ses: high" outputNames = analysis.OutputNames; // should be "prog: academic", "prog: general", "prog: vocation" // The regression is best visualized when it is data-bound to a // Windows.Forms DataGridView or WPF DataGrid. You can get the // values for all different coefficients and discrete values: // DataGridBox.Show(regression.Coefficients); // uncomment this line // You can get the matrix of coefficients: double[][] coef = analysis.CoefficientValues; // Should be equal to: double[][] expectedCoef = new double[][] { new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 }, new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 } }; // And their associated standard errors: double[][] stdErr = analysis.StandardErrors; // Should be equal to: double[][] expectedErr = new double[][] { new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 }, new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 } }; // We can also get statistics and hypothesis tests: WaldTest[][] wald = analysis.WaldTests; // should all have p < 0.05 ChiSquareTest chiSquare = analysis.ChiSquare; // should be p=1.06300120956871E-08 double logLikelihood = analysis.LogLikelihood; // should be -179.98173272217591 // You can use the regression to predict the values: int[] pred = regression.Transform(x); // And get the accuracy of the prediction if needed: var cm = GeneralConfusionMatrix.Estimate(regression, x, y.ArgMax(dimension: 1)); double acc = cm.Accuracy; // should be 0.61 double kappa = cm.Kappa; // should be 0.2993487536492252 #endregion Assert.AreEqual(9, coefficients); Assert.AreEqual(3, numberOfInputs); Assert.AreEqual(3, numberOfOutputs); Assert.AreEqual(new[] { "write", "ses: middle", "ses: high" }, inputNames); Assert.AreEqual(new[] { "prog: academic", "prog: general", "prog: vocation" }, outputNames); Assert.AreEqual(0.61, acc, 1e-10); Assert.AreEqual(0.2993487536492252, kappa, 1e-10); Assert.AreEqual(1.06300120956871E-08, chiSquare.PValue, 1e-8); Assert.AreEqual(-179.98172637136295, logLikelihood, 1e-8); testmlr(analysis); }
public void learn_test1() { string basePath = NUnit.Framework.TestContext.CurrentContext.TestDirectory; #region doc_learn // Create some sample learning data. In this data, // the first two instances belong to a class, the // four next belong to another class and the last // three to yet another. double[][] inputs = { // The first two are from class 0 new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, // The next four are from class 1 new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, // The last three are from class 2 new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; int[] outputs = { 0, 0, // First two from class 0 1, 1, 1, 1, // Next four from class 1 2, 2, 2 // Last three from class 2 }; // Now we will create the K-Nearest Neighbors algorithm. For this // example, we will be choosing k = 4. This means that, for a given // instance, its nearest 4 neighbors will be used to cast a decision. var knn = new KNearestNeighbors(k: 4); // We learn the algorithm: knn.Learn(inputs, outputs); // After the algorithm has been created, we can classify a new instance: int answer = knn.Decide(new double[] { 11, 5, 4 }); // answer will be 2. // Let's say we would like to compute the error matrix for the classifier: var cm = GeneralConfusionMatrix.Estimate(knn, inputs, outputs); // We can use it to estimate measures such as double error = cm.Error; // should be double acc = cm.Accuracy; // should be double kappa = cm.Kappa; // should be #endregion Assert.AreEqual(2, answer); Assert.AreEqual(0, error); Assert.AreEqual(1, acc); Assert.AreEqual(1, kappa); #if !NO_BINARY_SERIALIZATION #region doc_serialization // After we have created and learned our model, let's say we would // like to save it to disk. For this, we can import the Accord.IO // namespace at the top of our source file namespace, and then use // Serializer's extension method Save: // Save to a file called "knn.bin" in the basePath directory: knn.Save(Path.Combine(basePath, "knn.bin")); // To load it back from the disk, we might need to use the Serializer class directly: var loaded_knn = Serializer.Load <KNearestNeighbors>(Path.Combine(basePath, "knn.bin")); // At this point, knn and loaded_knn should be // two different instances of identical objects. #endregion // Make sure the loaded classifier is still working Assert.AreEqual(2, loaded_knn.Decide(new double[] { 11, 5, 4 })); cm = GeneralConfusionMatrix.Estimate(loaded_knn, inputs, outputs); Assert.AreEqual(0, cm.Error); Assert.AreEqual(1, cm.Accuracy); Assert.AreEqual(1, cm.Kappa); Assert.AreEqual(knn.ClassCount, loaded_knn.ClassCount); Assert.AreEqual(knn.Distance, loaded_knn.Distance); Assert.AreEqual(knn.K, loaded_knn.K); Assert.AreEqual(knn.NumberOfClasses, loaded_knn.NumberOfClasses); Assert.AreEqual(knn.NumberOfInputs, loaded_knn.NumberOfInputs); Assert.AreEqual(knn.NumberOfOutputs, loaded_knn.NumberOfOutputs); Assert.AreEqual(knn.Outputs, loaded_knn.Outputs); Assert.AreEqual(knn.Token, loaded_knn.Token); #endif }
/// <summary> /// Вывод точности в процентах. /// Для вычисления должен быть параметр testOutputs - ожидаемые значения /// </summary> public void PrintAccuracy() { var cm = GeneralConfusionMatrix.Estimate(Сlassifier, TestInputs, TestOutputs); Console.WriteLine("Accuracy for {0}: {1} %", Сlassifier, Math.Round(cm.Accuracy, 3) * 100); }
private void generateButton_Click(object sender, RoutedEventArgs e) { Dictionary <string, int> classesDict = new Dictionary <string, int>(); double[][] inputs; inputs = paramPassList.Select(list => list.ToArray()).ToArray(); var knn = new KNearestNeighbors(k: 4); List <int> groupClasses = new List <int>(); int clasessCount = 0; string currGroup = groupList[0].groupName; classesDict.Add(currGroup, clasessCount); foreach (var group in groupList) { if (!currGroup.Equals(group.groupName)) { clasessCount++; currGroup = group.groupName; classesDict.Add(currGroup, clasessCount); } for (int i = 0; i < group.groupSize; i++) { groupClasses.Add(clasessCount); objNumber++; } } objNumber++; int[] outputs; outputs = groupClasses.ToArray(); // We learn the algorithm: knn.Learn(inputs, outputs); var cm = GeneralConfusionMatrix.Estimate(knn, inputs, outputs); // We can use it to estimate measures such as double error = cm.Error; // should be double acc = cm.Accuracy; // should be double kappa = cm.Kappa; // should be List <int> testOutputsList = new List <int>(); List <double[]> testInputsList = new List <double[]>(); using (var dialog = new System.Windows.Forms.FolderBrowserDialog()) { System.Windows.Forms.DialogResult result = dialog.ShowDialog(); if (result == System.Windows.Forms.DialogResult.OK) { string resultsToWrite = ""; foreach (var item in groupScoreList) { int counter = 0; for (int i = 0; i < item.groupSize; i++) { while (counter != 100) { List <double> next = new List <double>(); for (int j = 0; j < paramSize; j++) { next.Add(rnd.Next(2)); } double[] paramValues = next.ToArray(); double scoreValue = knn.Score(paramValues, classesDict[item.groupName]); if ((scoreValue * 100) > item.groupScore) { resultsToWrite += "Obiekt" + objNumber + " - " + item.groupName + ","; objNumber++; foreach (var para in paramValues) { resultsToWrite += para.ToString() + ","; } resultsToWrite = resultsToWrite.TrimEnd(','); resultsToWrite += Environment.NewLine; testInputsList.Add(paramValues); //////add to test inputs testOutputsList.Add(classesDict[item.groupName]); ///////and outputs break; } } counter = 0; } } var knntest = new KNearestNeighbors(k: 4); knntest.Learn(testInputsList.ToArray(), testOutputsList.ToArray()); var cmtest = GeneralConfusionMatrix.Estimate(knntest, testInputsList.ToArray(), testOutputsList.ToArray()); // We can use it to estimate measures such as double errortest = cmtest.Error; // should be double acctest = cmtest.Accuracy; // should be double kappatest = cmtest.Kappa; // should be int percent70 = (int)(outputs.Length * 0.7); int percent30 = outputs.Length - percent70; int[] randompicks70 = new int[percent70]; int[] randompicks30 = new int[percent30]; int random; for (int i = 0; i < percent70; i++) { do { random = rnd.Next(outputs.Length); } while (randompicks70.Contains(random)); randompicks70[i] = random; } int random30counter = 0; for (int i = 0; i < outputs.Length; i++) { if (!randompicks70.Contains(i)) { randompicks30[random30counter] = i; random30counter++; } } int[] outputs70 = new int[percent70]; int[] outputs30 = new int[percent30]; double[][] inputs70 = new double[percent70][]; double[][] inputs30 = new double[percent30][]; for (int i = 0; i < percent70; i++) { inputs70[i] = inputs[randompicks70[i]]; outputs70[i] = outputs[randompicks70[i]]; } for (int i = 0; i < percent30; i++) { inputs30[i] = inputs[randompicks30[i]]; outputs30[i] = outputs[randompicks30[i]]; } var knn70percent = new KNearestNeighbors(k: 4); knn70percent.Learn(inputs70, outputs70); var cm70percent = GeneralConfusionMatrix.Estimate(knn70percent, inputs70, outputs70); // We can use it to estimate measures such as double error70percent = cm70percent.Error; // should be double acc70percent = cm70percent.Accuracy; // should be double kappa70percent = cm70percent.Kappa; // should be double score70 = 0; double scoretest = 0; for (int i = 0; i < inputs30.Length; i++) { var testvalue1 = knn70percent.Score(inputs30[i], outputs30[i]); var testvalue2 = knntest.Score(inputs30[i], outputs30[i]); score70 += testvalue1; scoretest += testvalue2; } score70 = score70 / inputs30.Length; scoretest = scoretest / inputs30.Length; ReadFromFileValidation validateWindow = new ReadFromFileValidation(cm, cmtest, cm70percent, score70, scoretest); validateWindow.Show(); try { string path = dialog.SelectedPath + "\\\\" + "ExtendedExamples.txt"; System.IO.File.WriteAllText(path, resultsToWrite); } catch (Exception) { MessageBox.Show("Coś Poszło nie tak", "Wynik Generacji", MessageBoxButton.OK, MessageBoxImage.Warning); throw; } MessageBox.Show("Wygenerowano Plik", "Wynik Generacji", MessageBoxButton.OK); } } }
public void learn() { string basePath = Path.Combine(NUnit.Framework.TestContext.CurrentContext.TestDirectory, "learn"); #region doc_learn // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // The Bag-of-Audio-Words model converts audio signals of arbitrary // size into fixed-length feature vectors. In this example, we // will be setting the codebook size to 10. This means all feature // vectors that will be generated will have the same length of 10. // By default, the BoW object will use the MFCC extractor as the // feature extractor and K-means as the clustering algorithm. // Create a new Bag-of-Audio-Words (BoW) model var bow = BagOfAudioWords.Create(numberOfWords: 32); // Note: a simple BoW model can also be created using // var bow = new BagOfAudioWords(numberOfWords: 10); // Get some training images FreeSpokenDigitsDataset fsdd = new FreeSpokenDigitsDataset(basePath); string[] trainFileNames = fsdd.Training.LocalPaths; int[] trainOutputs = fsdd.Training.Digits; // Compute the model bow.Learn(trainFileNames); // After this point, we will be able to translate // the signals into double[] feature vectors using double[][] trainInputs = bow.Transform(trainFileNames); // We can also check some statistics about the dataset: int numberOfSignals = bow.Statistics.TotalNumberOfInstances; // 1350 // Statistics about all the descriptors that have been extracted: int totalDescriptors = bow.Statistics.TotalNumberOfDescriptors; // 29106 double totalMean = bow.Statistics.TotalNumberOfDescriptorsPerInstance.Mean; // 21.56 double totalVar = bow.Statistics.TotalNumberOfDescriptorsPerInstance.Variance; // 52.764002965159314 IntRange totalRange = bow.Statistics.TotalNumberOfDescriptorsPerInstanceRange; // [8, 115] // Statistics only about the descriptors that have been actually used: int takenDescriptors = bow.Statistics.NumberOfDescriptorsTaken; // 29106 double takenMean = bow.Statistics.NumberOfDescriptorsTakenPerInstance.Mean; // 21.56 double takenVar = bow.Statistics.NumberOfDescriptorsTakenPerInstance.Variance; // 52.764002965159314 IntRange takenRange = bow.Statistics.NumberOfDescriptorsTakenPerInstanceRange; // [8, 115] #endregion Assert.AreEqual(1350, numberOfSignals); Assert.AreEqual(29106, totalDescriptors); Assert.AreEqual(21.56, totalMean); Assert.AreEqual(52.764002965159314, totalVar, 1e-8); Assert.AreEqual(new IntRange(8, 115), totalRange); Assert.AreEqual(29106, takenDescriptors); Assert.AreEqual(21.56, takenMean); Assert.AreEqual(52.764002965159314, takenVar, 1e-8); Assert.AreEqual(new IntRange(8, 115), takenRange); var kmeans = bow.Clustering as KMeans; Assert.AreEqual(13, kmeans.Clusters.NumberOfInputs); Assert.AreEqual(32, kmeans.Clusters.NumberOfOutputs); Assert.AreEqual(32, kmeans.Clusters.NumberOfClasses); #region doc_classification // Now, the features can be used to train any classification // algorithm as if they were the signals themselves. For example, // we can use them to train an Chi-square SVM as shown below: // Create the SMO algorithm to learn a Chi-Square kernel SVM var teacher = new MulticlassSupportVectorLearning <ChiSquare>() { Learner = (p) => new SequentialMinimalOptimization <ChiSquare>() }; // Obtain a learned machine var svm = teacher.Learn(trainInputs, trainOutputs); // Use the machine to classify the features int[] output = svm.Decide(trainInputs); // Compute the error between the expected and predicted labels for the training set: var trainMetrics = GeneralConfusionMatrix.Estimate(svm, trainInputs, trainOutputs); double trainAcc = trainMetrics.Accuracy; // should be around 0.97259259259259256 // Now, we can evaluate the performance of the model on the testing set: string[] testFileNames = fsdd.Testing.LocalPaths; int[] testOutputs = fsdd.Testing.Digits; // First we transform the testing set to double[]: double[][] testInputs = bow.Transform(testFileNames); // Then we compute the error between expected and predicted for the testing set: var testMetrics = GeneralConfusionMatrix.Estimate(svm, testInputs, testOutputs); double testAcc = testMetrics.Accuracy; // should be around 0.8666666666666667 #endregion Assert.AreEqual(0.97259259259259256, trainAcc, 1e-8); Assert.AreEqual(0.8666666666666667, testAcc, 1e-8); }
static KNearestNeighbors kNearestNeighbours(List <int[]> trainingData, List <int[]> testingData, out double precision) { KNearestNeighbors temp = null; int testingCount = testingData.Count / 10; int trainingCount = testingData.Count - testingCount; double errorAverage = 0; double prec = 0; int indexTestingStart = testingData.Count - testingCount; int indexTestingEnd = testingData.Count; Console.WriteLine("k nearest neighbours Classification"); for (int i = 0; i < 10; i++) { var watch = System.Diagnostics.Stopwatch.StartNew(); int[][] inputData, testinputData; int[] outputData, testoutputData; PrepareInputOutput(out inputData, out outputData, out testinputData, out testoutputData, trainingData, testingData, indexTestingStart, indexTestingEnd); double[][] input = new double[inputData.GetLength(0)][]; double a = 0; for (int j = 0; j < inputData.GetLength(0); j++) { input[j] = new double[10]; for (int k = 0; k < 10; k++) { a = Convert.ToDouble(inputData[j][k]); input[j][k] = a; } } double[][] testin = new double[testinputData.Length / 1000][]; for (int j = 0; j < testinputData.Length / 1000; j++) { testin[j] = new double[10]; for (int k = 0; k < 10; k++) { testin[j][k] = testinputData[j][k]; } } int[] testout = new int[testinputData.Length / 1000]; for (int j = 0; j < testinputData.Length / 1000; j++) { testout[j] = testoutputData[j]; } var knn = new KNearestNeighbors(k: 4); knn.Learn(input, outputData); var cm = GeneralConfusionMatrix.Estimate(knn, testin, testout); double error = cm.Error; double acc = cm.Accuracy; double kappa = cm.Kappa; watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine("Iteracija baigta per: {0}ms", elapsedMs); Console.WriteLine("Iteracijos tikslumas: {0}", acc); if (acc > prec) { prec = acc; temp = knn; } indexTestingEnd = indexTestingStart; indexTestingStart -= testingCount; } precision = 1 - (errorAverage / iterations); return(temp); }