public void nativeBayesValidation() { var learn = new NaiveBayesLearning(); NaiveBayes nb = learn.Learn(inputsInt, outputs); var cv = CrossValidation.Create( k: 3, learner: (p) => new NaiveBayesLearning(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: inputsInt, y: outputs ); var result = cv.Learn(inputsInt, outputs); int numberOfSamples = result.NumberOfSamples; int numberOfInputs = result.NumberOfInputs; int numberOfOutputs = result.NumberOfOutputs; double trainingError = result.Training.Mean; double validationError = result.Validation.Mean; GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputsInt, outputs); double accuracy = gcm.Accuracy; message += "Native Bayes Validacja\n"; message += "trainingError " + trainingError.ToString() + "\n"; message += "validationError " + validationError.ToString() + "\n"; message += "accuracy " + accuracy.ToString() + "\n\n"; }
public void knnValidation() { var crossvalidation = CrossValidation.Create( k: 3, learner: (p) => new KNearestNeighbors(k: 4), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: inputs, y: outputs ); var result = crossvalidation.Learn(inputs, outputs); // We can grab some information about the problem: int numberOfSamples = result.NumberOfSamples; int numberOfInputs = result.NumberOfInputs; int numberOfOutputs = result.NumberOfOutputs; double trainingError = result.Training.Mean; double validationError = result.Validation.Mean; // If desired, compute an aggregate confusion matrix for the validation sets: GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs); double accuracy = gcm.Accuracy; message += "Knn Validacja\n"; message += "trainingError " + trainingError.ToString() + "\n"; message += "validationError " + validationError.ToString() + "\n"; message += "accuracy " + accuracy.ToString() + "\n\n"; }
public override void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs) { crossValidation = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new KernelDiscriminantAnalysis() // here we create the learning algorithm { Kernel = new Quadratic() // We can choose any kernel function }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: inputs, y: outputs ); var result = crossValidation.Learn(inputs, outputs); ConfusionMatrix = result.ToConfusionMatrix(inputs, outputs).Matrix; }
public override void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs) { crossValidation = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new MulticlassSupportVectorLearning <Gaussian>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { // Estimate a suitable guess for the Gaussian kernel's parameters. // This estimate can serve as a starting point for a grid search. UseKernelEstimation = true } }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: inputs, y: outputs ); var result = crossValidation.Learn(inputs, outputs); ConfusionMatrix = result.ToConfusionMatrix(inputs, outputs).Matrix; //throw new NotImplementedException(); }
public void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs) { crossValidation = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new B.NaiveBayesLearning() // here we create the learning algorithm { }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: inputs.Select(v => v.Select(u => Convert.ToInt32(u)).ToArray()).ToArray(), y: outputs ); var result = crossValidation.Learn(inputs.Select(u => u.Select(v => Convert.ToInt32(v)).ToArray()).ToArray(), outputs); ConfusionMatrix = result.ToConfusionMatrix(inputs.Select(v => v.Select(u => Convert.ToInt32(u)).ToArray()).ToArray(), outputs).Matrix; }
private static void breastCancerExample() { // Ensure we have reproducible results Accord.Math.Random.Generator.Seed = 0; // Get some data to be learned. We will be using the Wiconsin's // (Diagnostic) Breast Cancer dataset, where the goal is to determine // whether the characteristics extracted from a breast cancer exam // correspond to a malignant or benign type of cancer: var data = new WisconsinDiagnosticBreastCancer(); double[][] input = data.Features; // 569 samples, 30-dimensional features int[] output = data.ClassLabels; // 569 samples, 2 different class labels // Let's say we want to measure the cross-validation performance of // a decision tree with a maximum tree height of 5 and where variables // are able to join the decision path at most 2 times during evaluation: var cv = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new C45Learning() // here we create the learning algorithm { Join = 2, MaxHeight = 5 }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: input, y: output ); // After the cross-validation object has been created, // we can call its .Learn method with the input and // output data that will be partitioned into the folds: var result = cv.Learn(input, output); // We can grab some information about the problem: int numberOfSamples = result.NumberOfSamples; // should be 569 int numberOfInputs = result.NumberOfInputs; // should be 30 int numberOfOutputs = result.NumberOfOutputs; // should be 2 double trainingError = result.Training.Mean; // should be 0.017771153143274855 double validationError = result.Validation.Mean; // should be 0.0755952380952381 // If desired, compute an aggregate confusion matrix for the validation sets: GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output); double accuracy = gcm.Accuracy; // result should be 0.92442882249560632 Console.WriteLine("C45Learning learning algorithm accuracy is %" + (accuracy * 100).ToString("N2")); }
private void trainingC45lib() { Accord.Math.Random.Generator.Seed = 0; c45Learning = new C45Learning() { Join = 2, MaxHeight = 5 }; int size = trainingSets.Count; double[][] inputs1 = new double[size][]; int[] outputs1 = new int[size]; int i = 0; foreach (Patient patient in trainingSets) { double[] aux = new double[9]; for (int j = 1; j <= 9; j++) { if (j == 1) { aux[j - 1] = patient.get(j) < 30 ? 0 : patient.get(j) < 60 ? 1 : 2; } else { aux[j - 1] = patient.get(j); } } inputs1[i] = aux; outputs1[i] = patient.get(10); i++; } var crossValidation = CrossValidation.Create( k: 5, learner: (p) => new C45Learning() { Join = 2, MaxHeight = 5 }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: inputs1, y: outputs1 ); decisionTreeLib = c45Learning.Learn(inputs1, outputs1); var result = crossValidation.Learn(inputs1, outputs1); GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs1, outputs1); accuracyC45lib = Math.Round(gcm.Accuracy, 3); }
static public int [] MultiNomialLogRegressionLowerBoundNewtonRaphson(double [][] input1, int[] labels, string SaveFile) { // http://accord-framework.net/docs/html/T_Accord_Statistics_Models_Regression_MultinomialLogisticRegression.htm // Create a estimation algorithm to estimate the regression LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson() { MaxIterations = 10, Tolerance = 1e-6 }; // ******************************************************************************* var cv = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation // First we define the learning algorithm: learner: (p) => new LowerBoundNewtonRaphson(), // Now we have to specify how the n.b. performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teach, x, y, w) => teach.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: input1, y: labels ); // Genrate a cross validation of the data var cvresult = cv.Learn(input1, labels); // iteratively estimate the model MultinomialLogisticRegression mlr = lbnr.Learn(input1, labels); // Generate statistics from confusion matrices ConfusionMatrix cm = ConfusionMatrix.Estimate(mlr, input1, labels); GeneralConfusionMatrix gcm = cvresult.ToConfusionMatrix(input1, labels); Funcs.Utility.OutPutStats(cvresult.NumberOfSamples, cvresult.NumberOfInputs, cvresult.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore); // We can compute the model answers int[] answers = mlr.Decide(input1); string modelsavefile = SaveFile.Replace(".csv", ".MLR.save"); mlr.Save(modelsavefile, compression: SerializerCompression.None); return(answers); }
static public int[] MultiNomialLogisticRegressionBFGS(double [][] input, int [] labels, string fName) { /* The L-BFGS algorithm is a member of the broad family of quasi-Newton optimization methods. * L-BFGS stands for 'Limited memory BFGS'. Indeed, L-BFGS uses a limited memory variation of * the Broyden–Fletcher–Goldfarb–Shanno (BFGS) update to approximate the inverse Hessian matrix * (denoted by Hk). Unlike the original BFGS method which stores a dense approximation, L-BFGS * stores only a few vectors that represent the approximation implicitly. Due to its moderate * memory requirement, L-BFGS method is particularly well suited for optimization problems with * a large number of variables. */ // Create a lbfgs model var mlbfgs = new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>(); // Estimate using the data against a logistic regression MultinomialLogisticRegression mlr = mlbfgs.Learn(input, labels); // // Create a cross validation model derived from the training set to measure the performance of this // predictive model and estimate how well we expect the model will generalize. The algorithm executes // multiple rounds of cross validation on different partitions and averages the results. // int folds = 4; // could play around with this later var cv = CrossValidation.Create(k: folds, learner: (p) => new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: input, y: labels); var result = cv.Learn(input, labels); GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, labels); ConfusionMatrix cm = ConfusionMatrix.Estimate(mlr, input, labels); // //output relevant statistics // Funcs.Utility.OutPutStats(result.NumberOfSamples, result.NumberOfInputs, result.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore); // Compute the model predictions and return the values int[] answers = mlr.Decide(input); // And also the probability of each of the answers double[][] probabilities = mlr.Probabilities(input); // Now we can check how good our model is at predicting double error = new Accord.Math.Optimization.Losses.ZeroOneLoss(labels).Loss(answers); mlr.Save(fName, compression: SerializerCompression.None); return(answers); }
private void ClassifyDataByNaiveBayes(int numOfFolds = 3, int minOccurences = 1) { CalcInputAndOutputVariables(minOccurences); var cvNaiveBayesClassifier = CrossValidation.Create( k: numOfFolds, learner: p => new NaiveBayesLearning <BernoulliDistribution>(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: InputVariables, y: OutputVariables ); // Run Cross-Validation Result = cvNaiveBayesClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>; }
public double Accuracy() { // Let's say we want to measure the cross-validation performance of // a decision tree with a maximum tree height of 6 and where variables // are able to join the decision path at most 1 times during evaluation: var cv = CrossValidation.Create( k: 5, // We will be using 5-fold cross validation learner: (p) => new ID3Learning() // here we create the learning algorithm { Join = 1, MaxHeight = 0 }, // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: inputs, y: outputs ); // After the cross-validation object has been created, // we can call its .Learn method with the input and // output data that will be partitioned into the folds: var result = cv.Learn(inputs, outputs); // We can grab some information about the problem: int numberOfSamples = result.NumberOfSamples; // should be 1000 int numberOfInputs = result.NumberOfInputs; // should be 4 int numberOfOutputs = result.NumberOfOutputs; // should be 6 double trainingError = result.Training.Mean; double validationError = result.Validation.Mean; // If desired, compute an aggregate confusion matrix for the validation sets: GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs); return(gcm.Accuracy * 100); }
private void ClassifyDataByLogisticRegression(int numOfFolds = 3, int minOccurences = 1, int maxIterations = 100) { CalcInputAndOutputVariables(minOccurences); var cvLogisticRegressionClassifier = CrossValidation.Create( k: numOfFolds, learner: (p) => new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 100, Regularization = 1e-6 }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: InputVariables, y: OutputVariables ); // Run Cross-Validation Result = cvLogisticRegressionClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>; }
public CrossValidationResult <LogisticRegression, double[], int> BuildModel(double[][] inputs, int[] outputs) { var cvLogisticRegressionClassifier = CrossValidation.Create <LogisticRegression, IterativeReweightedLeastSquares <LogisticRegression>, double[], int>( k: _appSettings.ModelNumFolds, learner: (p) => new IterativeReweightedLeastSquares <LogisticRegression> { MaxIterations = 100, Regularization = 1e-6 }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: inputs, y: outputs ); var result = cvLogisticRegressionClassifier.Learn(inputs, outputs); return(result); }
private void Button5_Click(object sender, RoutedEventArgs e) //Generate { if (classes.Count < 2) { var dialogResult = System.Windows.MessageBox.Show( "Please have at least two classes created to generate", "Data generating error", System.Windows.MessageBoxButton.OK, System.Windows.MessageBoxImage.Warning); return; } if (attrs.Count < 1) { var dialogResult = System.Windows.MessageBox.Show( "Please have at least one attribute created to generate", "Data generating error", System.Windows.MessageBoxButton.OK, System.Windows.MessageBoxImage.Warning); return; } using (var dirB = new System.Windows.Forms.SaveFileDialog()) { dirB.Filter = "Text Files | *.txt"; dirB.DefaultExt = "txt"; var res = dirB.ShowDialog(); if (res == System.Windows.Forms.DialogResult.OK) { List <float[]> attrValues = new List <float[]>(); List <int> classValues = new List <int>(); using (var file = new System.IO.StreamWriter(dirB.FileName)) { string line;// = "Class"; //foreach (var v in attrs) // line += "," + v.Key; //file.WriteLine(line); for (int v = 0; v < classes.Count; v++) { //foreach (var v in classes) for (int n = 0; n < classes[v].Value; n++) { line = classes[v].Key; classValues.Add(v); List <float> aVals = new List <float>(); for (int t = 0; t < classAttrs[v].Count; t++) { float aVal = attrs[t].Value.genetare(classAttrs[v][t]); aVals.Add(aVal); line += "," + aVal.ToString(System.Globalization.CultureInfo.InvariantCulture); } attrValues.Add(aVals.ToArray <float>()); file.WriteLine(line); } } } var dialogResult = System.Windows.MessageBox.Show("Do you want to test the generated data?", "Data testing - crossvalidation", System.Windows.MessageBoxButton.YesNo); if (dialogResult == MessageBoxResult.Yes) { float[][] inputs = attrValues.ToArray(); double[][] inputs_d = inputs.Select(xa => xa.Select(ya => (double)ya).ToArray()).ToArray(); int[][] inputs_i = inputs.Select(xa => xa.Select(ya => (int)Math.Round(ya * 100)).ToArray()).ToArray(); int[] outputs = classValues.ToArray(); //var learn = new NaiveBayesLearning(); //NaiveBayes nb = learn.Learn(inputs, outputs); var cv = CrossValidation.Create( k: 4, learner: (p) => new NaiveBayesLearning(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: inputs_i, y: outputs ); var result = cv.Learn(inputs_i, outputs); int numberOfSamples = result.NumberOfSamples; int numberOfInputs = result.NumberOfInputs; int numberOfOutputs = result.NumberOfOutputs; double trainingError = result.Training.Mean; double validationError = result.Validation.Mean; GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs_i, outputs); double nb_accuracy = gcm.Accuracy; //.................. int classesSqrt = (int)Math.Round(Math.Sqrt(outputs.Length)); var crossvalidation = CrossValidation.Create( k: 4, learner: (p) => new KNearestNeighbors(k: classesSqrt), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: inputs_d, y: outputs ); var result2 = crossvalidation.Learn(inputs_d, outputs); // We can grab some information about the problem: numberOfSamples = result2.NumberOfSamples; numberOfInputs = result2.NumberOfInputs; numberOfOutputs = result2.NumberOfOutputs; trainingError = result2.Training.Mean; validationError = result2.Validation.Mean; // If desired, compute an aggregate confusion matrix for the validation sets: gcm = result2.ToConfusionMatrix(inputs_d, outputs); double knn_accuracy = gcm.Accuracy; //............................ var crossvalidationsvm = CrossValidation.Create( k: 4, learner: (p) => new MulticlassSupportVectorLearning <Gaussian>() { Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { UseKernelEstimation = true } }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: inputs_d, y: outputs ); //crossvalidationReadsvm.ParallelOptions.MaxDegreeOfParallelism = 1; var resultsvm = crossvalidationsvm.Learn(inputs_d, outputs); // We can grab some information about the problem: var numberOfSamplessvm = resultsvm.NumberOfSamples; var numberOfInputssvm = resultsvm.NumberOfInputs; var numberOfOutputssvm = resultsvm.NumberOfOutputs; var trainingErrorsvm = resultsvm.Training.Mean; var validationErrorsvm = resultsvm.Validation.Mean; var CMsvm = resultsvm.ToConfusionMatrix(inputs_d, outputs); double svm_accuracy = CMsvm.Accuracy; System.Windows.MessageBox.Show("Naive Bayes Accuracy: " + (nb_accuracy * 100) .ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "\nk Nearest Neighbors Accuracy: " + (knn_accuracy * 100) .ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "\nSupport Vector Machine Accuracy: " + (svm_accuracy * 100) .ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n", "Data testing - crossvalidation", System.Windows.MessageBoxButton.OK); using (var write = new System.IO.StreamWriter("TestDataDump.txt")) { write.WriteLine("GeneratedDataAmt," + outputs.Length); write.WriteLine("Accuracy," + (100.0 * knn_accuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," + (100.0 * nb_accuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," + (100.0 * svm_accuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)); } //System.Diagnostics.Process.Start("TestDataDump.txt"); dialogResult = System.Windows.MessageBox.Show("Do you want to open the file with generated data?", "Data testing - extended data", System.Windows.MessageBoxButton.YesNo); if (dialogResult == MessageBoxResult.Yes) { System.Diagnostics.Process.Start(dirB.FileName); } } } } }
static public int[] ProbabilisticCoordinateDescent(double[][] input1, int[] labels, string SaveFile) { // http://accord-framework.net/docs/html/T_Accord_MachineLearning_VectorMachines_Learning_ProbabilisticCoordinateDescent.htm /* This class implements a SupportVectorMachine learning algorithm specifically crafted for * probabilistic linear machines only. It provides a L1- regularized coordinate descent learning * algorithm for optimizing the learning problem. The code has been based on liblinear's method * solve_l1r_lr method, whose original description is provided below. * * Liblinear's solver -s 6: L1R_LR. A coordinate descent algorithm for L1-regularized logistic * regression (probabilistic svm) problems. */ int folds = 5; Accord.Math.Random.Generator.Seed = 0; var cv = CrossValidation.Create( k: folds, // We will be using 10-fold cross validation // First we define the learning algorithm: learner: (p) => new ProbabilisticCoordinateDescent(), // Now we have to specify how the n.b. performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teach, x, y, w) => teach.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: input1, y: labels ); var cvresult = cv.Learn(input1, labels); GeneralConfusionMatrix gcm = cvresult.ToConfusionMatrix(input1, labels); var teacher = new ProbabilisticCoordinateDescent() { Tolerance = 1e-10, Complexity = 1e+10, // learn a hard-margin model /* Complexity (cost) parameter C. Increasing the value of C forces the creation of a more * accurate model that may not generalize well. If this value is not set and UseComplexityHeuristic * is set to true, the framework will automatically guess a value for C. If this value is manually * set to something else, then UseComplexityHeuristic will be automatically disabled and the given * value will be used instead. */ }; var svm = teacher.Learn(input1, labels); var svmregression = (LogisticRegression)svm; ConfusionMatrix cm = ConfusionMatrix.Estimate(svm, input1, labels); // accuracy, TP, FP, FN, TN and FScore Diagonal Utility.OutPutStats(cvresult.NumberOfSamples, cvresult.NumberOfInputs, cvresult.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore); // Write the model out to a save file string modelsavefilename = SaveFile.Replace(".csv", ".PCD.save"); svmregression.Save(modelsavefilename, compression: SerializerCompression.None); bool[] answers = svmregression.Decide(input1); return(Funcs.Utility.BoolToInt(answers)); }
private void GenerateBasedOnData() { List <string[]> generating = new List <string[]>(); // do ewentualnego sprawdzania var attrType = RemoveAt(this.attrType, 0); //tutaj dorzucam tworzenie wykresu ciągłego prawdopodobieństwa Spline3Deg[,] probabilities = new Spline3Deg[classes, attribs]; for (int i = 0; i < attribs; i++) { if (attrType[i].Equals("double") || attrType[i].Equals("integer")) { for (int j = 0; j < classes; j++) { int c = values.ElementAt(j).Value.Item2.ElementAt(i).Value.Count; double[] y, x = new double[c]; SortedList <double, int> temp = new SortedList <double, int>(); foreach (var v in values.ElementAt(j).Value.Item2.ElementAt(i).Value) { int tI = v.Value; double tD = Double.Parse(v.Key.Replace(" ", string.Empty), System.Globalization.NumberStyles.AllowDecimalPoint, System.Globalization.NumberFormatInfo.InvariantInfo); temp.Add(tD, tI); } y = temp.Keys.ToArray(); x[0] = 0; for (int k = 1; k < temp.Count; k++) { x[k] = x[k - 1] + temp.ElementAt(k - 1).Value + temp.ElementAt(k).Value; } probabilities[j, i] = new Spline3Deg(x, y); } } } //do sprawdzania punktacji później //podzielić dane wejściowe i wygenerowane na klasy i artybuty var readClass = new int[reading.Count]; var readAttr_d = new double[reading.Count, reading.ElementAt(0).Length - 1].ToJagged(); var stringIntCheatSheet = new Dictionary <string, int> [reading.ElementAt(0).Length]; for (int i = 0; i < stringIntCheatSheet.Length; i++) { stringIntCheatSheet[i] = new Dictionary <string, int>(); } for (int x = 0; x < reading.Count; x++) { for (int y = 0; y < reading.ElementAt(0).Length; y++) { double rr = 0; string ss = reading.ElementAt(x)[y]; if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint, System.Globalization.NumberFormatInfo.InvariantInfo, out rr) || y == 0) { if (!stringIntCheatSheet[y].ContainsKey(ss)) { stringIntCheatSheet[y].Add(ss, stringIntCheatSheet[y].Count); } rr = stringIntCheatSheet[y][ss]; } if (y == 0) { readClass[x] = (int)rr; } else { readAttr_d[x][y - 1] = rr; } } } int readClassesSqrt = (int)Math.Round(Math.Sqrt(reading.Count)), genClassesSqrt, mixClassesSqrt; var learnKnn = new KNearestNeighbors(readClassesSqrt); var knn = learnKnn.Learn(readAttr_d, readClass); double[] attrcr = new double[attribs]; string[] bestattr = new string[attribs]; double bestscore; //czas generować ten szajs var newStuff = new string[newData, attribs + 1]; for (int it = 0; it < newStuff.GetLength(0); it++) { bestscore = 0; int cl = rnd.Next(classes); //rnd to zadelkarowany wcześniej Random //losowanie klasy newStuff[it, 0] = values.ElementAt(cl).Key; int safety = 0; do { for (int v = 1; v <= attribs; v++) { //losowanie wartości atrybutu if (attrType[v - 1].Equals("string")) { //funkcja dyskretna int val = rnd.Next(values.ElementAt(cl).Value.Item1); int b = 0; foreach (var a in values.ElementAt(cl).Value.Item2[v]) { if (val < (b += a.Value)) { newStuff[it, v] = a.Key; //na Monte Carlo break; } } } else { //funkcja ciągła Tuple <double, double> extr = probabilities[cl, v - 1].Limits(); double val = rnd.Next((int)extr.Item1, (int)extr.Item2) + rnd.NextDouble(); double r = probabilities[cl, v - 1].y(val); if (attrType[v - 1].Equals("double")) { newStuff[it, v] = r.ToString(fltPrec, System.Globalization.CultureInfo.InvariantCulture); } else //if (attrType[v - 1].Equals("integer")) { newStuff[it, v] = Math.Round(r).ToString(); } }//koniec losowania wartości atrybutu ///ekstra warunek bezpieczeństwa, bo czasami trafiają się NULLe if (string.IsNullOrEmpty(newStuff[it, v])) { v--; continue; //jeśli atrybut ma nulla, powtórz pętlę } ///koniec ekstra warunku bespieczeństwa }//koniec generowania obiektu //do tabliczki do sprawdzenia punktacji for (int v = 1; v <= attribs; v++) { double rr = 0; string ss = newStuff[it, v]; if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint, System.Globalization.NumberFormatInfo.InvariantInfo, out rr)) { if (!stringIntCheatSheet[v].ContainsKey(ss)) { stringIntCheatSheet[v].Add(ss, stringIntCheatSheet[v].Count); } rr = stringIntCheatSheet[v][ss]; } attrcr[v - 1] = rr; } if (knn.Score(attrcr, cl) > bestscore) { for (int iter = 0; iter < attribs; iter++) { bestattr[iter] = newStuff[it, iter + 1]; } } } while (knn.Score(attrcr, cl) < scoreH / 100 && ++safety < 1000); for (int iter = 0; iter < attribs; iter++) { newStuff[it, iter + 1] = bestattr[iter]; } }//koniec całego generowania //tu dać zapis do pliku string savefiledir = ""; using (var dirB = new System.Windows.Forms.SaveFileDialog()) { dirB.Filter = "Text Files | *.txt"; dirB.DefaultExt = "txt"; var res = dirB.ShowDialog(); if (res == System.Windows.Forms.DialogResult.OK) { using (var write = new System.IO.StreamWriter(savefiledir = dirB.FileName)) { for (int x = 0; x < newStuff.GetLength(0); x++) { string line = ""; for (int y = 0; y < newStuff.GetLength(1); y++) { line += newStuff[x, y] + ','; } line = line.Remove(line.Length - 1); string[] temp = line.Split(','); generating.Add(line.Split(',')); swap(ref temp[0], ref temp[clsCol]); line = ""; for (int y = 0; y < temp.Length; y++) { line += temp[y] + ','; } line = line.Remove(line.Length - 1); write.WriteLine(line); } } } else { return; } } //tu dać walidację wygenerowanych danych var dialogResult = System.Windows.MessageBox.Show("Do you want to test the generated data?", "Data testing - extended data", System.Windows.MessageBoxButton.YesNo); if (dialogResult == MessageBoxResult.Yes) { var genClass = new int[generating.Count]; //var genAttr = new int[generating.Count, generating.ElementAt(0).Length - 1].ToJagged(); var genAttr_d = new double[generating.Count, generating.ElementAt(0).Length - 1].ToJagged(); for (int x = 0; x < generating.Count; x++) { for (int y = 0; y < generating.ElementAt(0).Length; y++) { double rr = 0; string ss = generating.ElementAt(x)[y]; if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint, System.Globalization.NumberFormatInfo.InvariantInfo, out rr) || y == 0) { if (!stringIntCheatSheet[y].ContainsKey(ss)) { stringIntCheatSheet[y].Add(ss, stringIntCheatSheet[y].Count); } rr = stringIntCheatSheet[y][ss]; } if (y == 0) { genClass[x] = (int)rr; } else { genAttr_d[x][y - 1] = rr; } } } //przerobienie na tablicę intów, z przesunięciem dobli o precyzję var genAttr_i = new int[generating.Count, generating.ElementAt(0).Length - 1].ToJagged(); var readAttr_i = new int[reading.Count, reading.ElementAt(0).Length - 1].ToJagged(); int shift = (int)Math.Pow(10, FltPrecBox.SelectedIndex + 1); for (int x = 0; x < generating.Count; x++) { for (int y = 0; y < generating.ElementAt(0).Length - 1; y++) { if (attrType[y].Equals("double")) { genAttr_i[x][y] = (int)(genAttr_d[x][y] * shift); } else { genAttr_i[x][y] = (int)genAttr_d[x][y]; } } } for (int x = 0; x < reading.Count; x++) { for (int y = 0; y < reading.ElementAt(0).Length - 1; y++) { if (attrType[y].Equals("double")) { readAttr_i[x][y] = (int)(readAttr_d[x][y] * shift); } else { readAttr_i[x][y] = (int)readAttr_d[x][y]; } } } int correctnb = 0, incorrectnb = 0, correctknn = 0, incorrectknn = 0, correctsvm = 0, incorrectsvm = 0; var learn = new NaiveBayesLearning(); NaiveBayes nb = learn.Learn(readAttr_i, readClass); var test = nb.Decide(genAttr_i); foreach (var v in test) { if (v.Equals(genClass[test.IndexOf(v)])) { correctnb++; } else { incorrectnb++; } } ///////////////////////////////////////////////////////////////////////// var testknn = knn.Decide(genAttr_d); for (int i = 0; i < testknn.Length; i++) //foreach (var v in testknn) { if (testknn[i].Equals(genClass[i])) { correctknn++; } else { incorrectknn++; } } ///////////////////////////////////////////////////////////////////////// try { var teach = new MulticlassSupportVectorLearning <Gaussian>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { // Estimate a suitable guess for the Gaussian kernel's parameters. // This estimate can serve as a starting point for a grid search. UseKernelEstimation = true } }; var svm = teach.Learn(readAttr_d, readClass); var testsvm = svm.Decide(genAttr_d); for (int i = 0; i < testsvm.Length; i++) //foreach (var v in testknn) { if (testsvm[i].Equals(genClass[i])) { correctsvm++; } else { incorrectsvm++; } } } catch (AggregateException) { } //////////////////////////////////////////////////////////// double[][] mixAttr_d = new double[genAttr_d.GetLength(0) + readAttr_d.GetLength(0), genAttr_d[0].Length].ToJagged(); int[] mixClass = new int[genClass.Length + readClass.Length]; Array.Copy(readClass, mixClass, readClass.Length); Array.Copy(genClass, 0, mixClass, readClass.Length, genClass.Length); Array.Copy(readAttr_d, mixAttr_d, readAttr_d.Length); Array.Copy(genAttr_d, 0, mixAttr_d, readAttr_d.Length, genAttr_d.Length); int[][] mixAttr_i = new int[genAttr_i.GetLength(0) + readAttr_i.GetLength(0), genAttr_i[0].Length].ToJagged(); Array.Copy(readAttr_i, mixAttr_i, readAttr_i.Length); Array.Copy(genAttr_i, 0, mixAttr_i, readAttr_i.Length, genAttr_i.Length); //KROSWALIDACJAAAAAAAAAAAAAAAAAA genClassesSqrt = (int)Math.Round(Math.Sqrt(genClass.Length)); mixClassesSqrt = (int)Math.Round(Math.Sqrt(mixClass.Length)); //KNN var crossvalidationRead = CrossValidation.Create( k: 4, learner: (p) => new KNearestNeighbors(k: readClassesSqrt), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: readAttr_d, y: readClass ); var resultRead = crossvalidationRead.Learn(readAttr_d, readClass); // We can grab some information about the problem: var numberOfSamplesRead = resultRead.NumberOfSamples; var numberOfInputsRead = resultRead.NumberOfInputs; var numberOfOutputsRead = resultRead.NumberOfOutputs; var trainingErrorRead = resultRead.Training.Mean; var validationErrorRead = resultRead.Validation.Mean; var readCM = resultRead.ToConfusionMatrix(readAttr_d, readClass); double readAccuracy = readCM.Accuracy; ////////////////////////////////////////////////////////// var crossvalidationGen = CrossValidation.Create( k: 4, learner: (p) => new KNearestNeighbors(k: genClassesSqrt), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: genAttr_d, y: genClass ); var resultGen = crossvalidationGen.Learn(genAttr_d, genClass); // We can grab some information about the problem: var numberOfSamplesGen = resultGen.NumberOfSamples; var numberOfInputsGen = resultGen.NumberOfInputs; var numberOfOutputsGen = resultGen.NumberOfOutputs; var trainingErrorGen = resultGen.Training.Mean; var validationErrorGen = resultGen.Validation.Mean; var genCM = resultGen.ToConfusionMatrix(genAttr_d, genClass); double genAccuracy = genCM.Accuracy; ////////////////////////////////////////////////////////// var crossvalidationMix = CrossValidation.Create( k: 4, learner: (p) => new KNearestNeighbors(k: mixClassesSqrt), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: mixAttr_d, y: mixClass ); var resultMix = crossvalidationMix.Learn(readAttr_d, readClass); // We can grab some information about the problem: var numberOfSamplesMix = resultMix.NumberOfSamples; var numberOfInputsMix = resultMix.NumberOfInputs; var numberOfOutputsMix = resultMix.NumberOfOutputs; var trainingErrorMix = resultMix.Training.Mean; var validationErrorMix = resultMix.Validation.Mean; var mixCM = resultMix.ToConfusionMatrix(mixAttr_d, mixClass); double mixAccuracy = mixCM.Accuracy; //NB var crossvalidationReadnb = CrossValidation.Create( k: 4, learner: (p) => new NaiveBayesLearning(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: readAttr_i, y: readClass ); var resultReadnb = crossvalidationReadnb.Learn(readAttr_i, readClass); // We can grab some information about the problem: var numberOfSamplesReadnb = resultReadnb.NumberOfSamples; var numberOfInputsReadnb = resultReadnb.NumberOfInputs; var numberOfOutputsReadnb = resultReadnb.NumberOfOutputs; var trainingErrorReadnb = resultReadnb.Training.Mean; var validationErrorReadnb = resultReadnb.Validation.Mean; var readCMnb = resultReadnb.ToConfusionMatrix(readAttr_i, readClass); double readAccuracynb = readCMnb.Accuracy; ////////////////////////////////////////////////////////// var crossvalidationGennb = CrossValidation.Create( k: 4, learner: (p) => new NaiveBayesLearning(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: genAttr_i, y: genClass ); var resultGennb = crossvalidationGennb.Learn(genAttr_i, genClass); // We can grab some information about the problem: var numberOfSamplesGennb = resultGennb.NumberOfSamples; var numberOfInputsGennb = resultGennb.NumberOfInputs; var numberOfOutputsGennb = resultGennb.NumberOfOutputs; var trainingErrorGennb = resultGennb.Training.Mean; var validationErrorGennb = resultGennb.Validation.Mean; var genCMnb = resultGennb.ToConfusionMatrix(genAttr_i, genClass); double genAccuracynb = genCMnb.Accuracy; ////////////////////////////////////////////////////////// var crossvalidationMixnb = CrossValidation.Create( k: 4, learner: (p) => new NaiveBayesLearning(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: mixAttr_i, y: mixClass ); var resultMixnb = crossvalidationMixnb.Learn(mixAttr_i, mixClass); // We can grab some information about the problem: var numberOfSamplesMixnb = resultMixnb.NumberOfSamples; var numberOfInputsMixnb = resultMixnb.NumberOfInputs; var numberOfOutputsMixnb = resultMixnb.NumberOfOutputs; var trainingErrorMixnb = resultMixnb.Training.Mean; var validationErrorMixnb = resultMixnb.Validation.Mean; var mixCMnb = resultMixnb.ToConfusionMatrix(mixAttr_i, mixClass); double mixAccuracynb = mixCMnb.Accuracy; //SVM double readAccuracysvm = 0, genAccuracysvm = 0, mixAccuracysvm = 0; try { var crossvalidationReadsvm = CrossValidation.Create( k: 4, learner: (p) => new MulticlassSupportVectorLearning <Gaussian>() { Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { UseKernelEstimation = true } }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: readAttr_d, y: readClass ); //crossvalidationReadsvm.ParallelOptions.MaxDegreeOfParallelism = 1; var resultReadsvm = crossvalidationReadsvm.Learn(readAttr_d, readClass); // We can grab some information about the problem: var numberOfSamplesReadsvm = resultReadsvm.NumberOfSamples; var numberOfInputsReadsvm = resultReadsvm.NumberOfInputs; var numberOfOutputsReadsvm = resultReadsvm.NumberOfOutputs; var trainingErrorReadsvm = resultReadsvm.Training.Mean; var validationErrorReadsvm = resultReadsvm.Validation.Mean; var readCMsvm = resultReadsvm.ToConfusionMatrix(readAttr_d, readClass); readAccuracysvm = readCMsvm.Accuracy; } catch (AggregateException) { } ////////////////////////////////////////////////////////// try { var crossvalidationGensvm = CrossValidation.Create( k: 4, learner: (p) => new MulticlassSupportVectorLearning <Gaussian>() { Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { UseKernelEstimation = true } }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: genAttr_d, y: genClass ); var resultGensvm = crossvalidationGensvm.Learn(genAttr_d, genClass); // We can grab some information about the problem: var numberOfSamplesGensvm = resultGensvm.NumberOfSamples; var numberOfInputsGensvm = resultGensvm.NumberOfInputs; var numberOfOutputsGensvm = resultGensvm.NumberOfOutputs; var trainingErrorGensvm = resultGensvm.Training.Mean; var validationErrorGensvm = resultGensvm.Validation.Mean; var genCMsvm = resultGensvm.ToConfusionMatrix(genAttr_d, genClass); genAccuracysvm = genCMsvm.Accuracy; } catch (AggregateException) { } ////////////////////////////////////////////////////////// try { var crossvalidationMixsvm = CrossValidation.Create( k: 4, learner: (p) => new MulticlassSupportVectorLearning <Gaussian>() { Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { UseKernelEstimation = true } }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: mixAttr_d, y: mixClass ); var resultMixsvm = crossvalidationMixsvm.Learn(mixAttr_d, mixClass); // We can grab some information about the problem: var numberOfSamplesMixsvm = resultMixsvm.NumberOfSamples; var numberOfInputsMixsvm = resultMixsvm.NumberOfInputs; var numberOfOutputsMixsvm = resultMixsvm.NumberOfOutputs; var trainingErrorMixsvm = resultMixsvm.Training.Mean; var validationErrorMixsvm = resultMixsvm.Validation.Mean; var mixCMsvm = resultMixsvm.ToConfusionMatrix(mixAttr_d, mixClass); mixAccuracysvm = mixCMsvm.Accuracy; } catch (AggregateException) { } ///////////////////////////////////////////////// if (correctsvm == 0 && incorrectsvm == 0) { incorrectsvm = 1; } double knnRatio = 100.0 * correctknn / (correctknn + incorrectknn), nbRatio = 100.0 * correctnb / (correctnb + incorrectnb), svmRatio = 100.0 * correctsvm / (correctsvm + incorrectsvm); System.Windows.MessageBox.Show( "K Nearest Neighbours Classification:\nGenerated Data Correct Ratio: " + knnRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "Original Data X-Validation Accuracy: " + (100.0 * readAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "Generated Data X-Validation Accuracy: " + (100.0 * genAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "Mixed Data X-Validation Accuracy: " + (100.0 * mixAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "\n\n" + "Naive Bayes Classification:\nGenerated Data Correct Ratio: " + nbRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "Original Data X-Validation Accuracy: " + (100.0 * readAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "Generated Data X-Validation Accuracy: " + (100.0 * genAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "Mixed Data X-Validation Accuracy: " + (100.0 * mixAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "\n\n" + "Support Vector Machine Classification:\nGenerated Data Correct Ratio: " + svmRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "Original Data X-Validation Accuracy: " + (100.0 * readAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "Generated Data X-Validation Accuracy: " + (100.0 * genAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" + "Mixed Data X-Validation Accuracy: " + (100.0 * mixAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n", "Data Testing - extending dataset", System.Windows.MessageBoxButton.OK); /* * ///TEMP - do eksportowania danych do arkusza * * using (var write = new System.IO.StreamWriter("TestDataDump.txt")){ * write.WriteLine("ScoreTreshold," + scoreH.ToString()); * write.WriteLine("NewDataAmt," + newData.ToString()); * write.WriteLine("Generated Data Correct Ratio," + * knnRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," + * nbRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) +"," + * svmRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)); * write.WriteLine("Original Data X-Validation Accuracy," + * (100.0 * readAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," + * (100.0 * readAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," + * (100.0 * readAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)); * write.WriteLine("Generated Data X-Validation Accuracy," + * (100.0 * genAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," + * (100.0 * genAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," + * (100.0 * genAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)); * write.WriteLine("Mixed Data X-Validation Accuracy," + * (100.0 * mixAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," + * (100.0 * mixAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," + * (100.0 * mixAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)); * * } * System.Diagnostics.Process.Start("TestDataDump.txt"); */ } dialogResult = System.Windows.MessageBox.Show("Do you want to open the file with generated data?", "Data testing - extended data", System.Windows.MessageBoxButton.YesNo); if (dialogResult == MessageBoxResult.Yes) { System.Diagnostics.Process.Start(savefiledir); } }
public void CrossValidationTest() { #region doc_cross_validation // Ensure we have reproducible results Accord.Math.Random.Generator.Seed = 0; // Get some data to be learned. We will be using the Wiconsin's // (Diagnostic) Breast Cancer dataset, where the goal is to determine // whether the characteristics extracted from a breast cancer exam // correspond to a malignant or benign type of cancer: var data = new WisconsinDiagnosticBreastCancer(); double[][] input = data.Features; // 569 samples, 30-dimensional features int[] output = data.ClassLabels; // 569 samples, 2 different class labels // Let's say we want to measure the cross-validation performance of // a decision tree with a maximum tree height of 5 and where variables // are able to join the decision path at most 2 times during evaluation: var cv = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new C45Learning() // here we create the learning algorithm { Join = 2, MaxHeight = 5 }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: input, y: output ); // After the cross-validation object has been created, // we can call its .Learn method with the input and // output data that will be partitioned into the folds: var result = cv.Learn(input, output); // We can grab some information about the problem: int numberOfSamples = result.NumberOfSamples; // should be 569 int numberOfInputs = result.NumberOfInputs; // should be 30 int numberOfOutputs = result.NumberOfOutputs; // should be 2 double trainingError = result.Training.Mean; // should be 0 double validationError = result.Validation.Mean; // should be 0.089661654135338359 #endregion Assert.AreEqual(569, numberOfSamples); Assert.AreEqual(30, numberOfInputs); Assert.AreEqual(2, numberOfOutputs); Assert.AreEqual(10, cv.K); Assert.AreEqual(0.017770391691033137, result.Training.Mean, 1e-10); Assert.AreEqual(0.077318295739348369, result.Validation.Mean, 1e-10); Assert.AreEqual(3.0913682243756776E-05, result.Training.Variance, 1e-10); Assert.AreEqual(0.00090104473101439207, result.Validation.Variance, 1e-10); Assert.AreEqual(10, cv.Folds.Length); Assert.AreEqual(10, result.Models.Length); var tree = result.Models[0].Model; int height = tree.GetHeight(); Assert.AreEqual(5, height); cv = CrossValidation.Create( k: 10, learner: (p) => new C45Learning() { Join = 1, MaxHeight = 1, MaxVariables = 1 }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: input, y: output ); result = cv.Learn(input, output); tree = result.Models[0].Model; height = tree.GetHeight(); Assert.AreEqual(1, height); Assert.AreEqual(0.10896305433723197, result.Training.Mean, 5e-3); Assert.AreEqual(0.1125, result.Validation.Mean, 1e-10); Assert.AreEqual(2.1009258672955873E-05, result.Training.Variance, 1e-10); Assert.AreEqual(0.0017292179645018977, result.Validation.Variance, 1e-10); }
public static void DecisionTree_crossValidation(double[][] inputs, int[] outputs) { // Ensure we have reproducible results Accord.Math.Random.Generator.Seed = 0; // Let's say we want to measure the cross-validation performance of // a decision tree with a maximum tree height of 5 and where variables // are able to join the decision path at most 2 times during evaluation: var cv = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new C45Learning() // here we create the learning algorithm { Join = 2, MaxHeight = 5 }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: inputs, y: outputs ); // After the cross-validation object has been created, // we can call its .Learn method with the input and // output data that will be partitioned into the folds: var result = cv.Learn(inputs, outputs); // We can grab some information about the problem: int numberOfSamples = result.NumberOfSamples; // should be 569 int numberOfInputs = result.NumberOfInputs; // should be 30 int numberOfOutputs = result.NumberOfOutputs; // should be 2 double trainingError = result.Training.Mean; // should be 0.017771153143274855 double validationError = result.Validation.Mean; // should be 0.0755952380952381 // If desired, compute an aggregate confusion matrix for the validation sets: GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs); double accuracy = gcm.Accuracy; // result should be 0.92442882249560632 Console.WriteLine("Accuracy:" + gcm.Accuracy); Console.WriteLine("Error:" + gcm.Error); Console.WriteLine("Not Anomaly Precision:" + gcm.Precision[0]); Console.WriteLine("Not Anomaly Recall:" + gcm.Recall[0]); Console.WriteLine("Anomaly Precision:" + gcm.Precision[1]); Console.WriteLine("Anomaly Recall:" + gcm.Recall[1]); double anomalyFScore = 2 * (gcm.Precision[1] * gcm.Recall[1]) / (gcm.Precision[1] + gcm.Recall[1]); double NotAnomalyFScore = 2 * (gcm.Precision[0] * gcm.Recall[0]) / (gcm.Precision[0] + gcm.Recall[0]); Console.WriteLine("Not ANomaly F-score:" + NotAnomalyFScore); Console.WriteLine("Anomaly F-score:" + anomalyFScore); }
static void Main(string[] args) { /* * Takes a csv files as input and trains a naive bayes classfier, if the test flag is set the rountine * will calculate the accuracy of the input files using the previous saved model in the exeution directioy * If the test flag is set a new classifier is not trainied * but the previous model is loaded and used agains the test data. * * arg 1 = training file or test file * arg 2 = label file * arg 3 = test flag (-s or -S) * arg 4 = Specify file name of model file */ const int minargs = 2; const int maxargs = 4; const int Folds = 4; Accord.Math.Random.Generator.Seed = 0; string trainingFname = null; string labelFname = null; string modelFname = "NBmodel.sav"; // Default model file name bool NoTrain = false; Functions.Welcome(); int numArgs = Functions.parseCommandLine(args, maxargs, minargs); if (numArgs == 0) { Console.WriteLine(Strings.resources.usage); System.Environment.Exit(1); } if (numArgs == 2) { trainingFname = args[0]; labelFname = args[1]; } if (numArgs == 3) // no use for third parameter yet! { if (args[2] == ("-s") | args[2] == ("-S")) { NoTrain = true; trainingFname = args[0]; labelFname = args[1]; } else { Console.WriteLine(Strings.resources.usage); System.Environment.Exit(1); } } if (numArgs == 4) { NoTrain = true; trainingFname = args[0]; labelFname = args[1]; modelFname = args[3]; } // // Check if the training and label files exist and are not locked by anohter process // if (!Utility.Functions.checkFile(trainingFname)) { Console.WriteLine("Error opening file{0}", trainingFname); System.Environment.Exit(1); } if (!Functions.checkFile(labelFname)) { Console.WriteLine("Error opening file {0}", labelFname); System.Environment.Exit(1); } // // Read in the training and label files, CSV format // CsvReader training_samples = new CsvReader(trainingFname, false); int[,] MatrixIn = training_samples.ToMatrix <int>(); int[][] trainingset = Functions.convertToJaggedArray(MatrixIn); // // Naive Bayes gets trained on integer arrays or arrays of "strings" // CsvReader label_samples = new CsvReader(labelFname, false); int[,] labelsIn = label_samples.ToMatrix <int>(); // COnvert the labels to a matrix and then to jagged array int[][] LabelSet = Functions.convertToJaggedArray(labelsIn); int[] output = Functions.convertTointArray(LabelSet); NaiveBayes loaded_nb; // setup for loading a trained model if one exists if (!NoTrain) { // Create a new Naive Bayes learning instance var learner = new NaiveBayesLearning(); // Create a Naive Bayes classifier and train with the input datasets NaiveBayes classifier = learner.Learn(trainingset, output); /* Cross-validation is a technique for estimating the performance of a predictive model. * It can be used to measure how the results of a statistical analysis will generalize to * an independent data set. It is mainly used in settings where the goal is prediction, and * one wants to estimate how accurately a predictive model will perform in practice. * * One round of cross-validation involves partitioning a sample of data into complementary * subsets, performing the analysis on one subset (called the training set), and validating * the analysis on the other subset (called the validation set or testing set). To reduce * variability, multiple rounds of cross-validation are performed using different partitions, * and the validation results are averaged over the rounds */ // Gets results based on performing a k-fold cross validation based on the input training set // Create a cross validation instance var cv = CrossValidation.Create(k: Folds, learner: (p) => new NaiveBayesLearning(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: trainingset, y: output); var result = cv.Learn(trainingset, output); Console.WriteLine("Performing n-fold cross validation where n = {0}", cv.K); // We can grab some information about the problem: Console.WriteLine("Cross Validation Results"); Console.WriteLine(" number of samples {0}", result.NumberOfSamples); Console.WriteLine(" number of features: {0}", result.NumberOfInputs); Console.WriteLine(" number of outputs {0}", result.NumberOfOutputs); Console.WriteLine(" Training Error: {0:n2}", result.Training.Mean); // should be 0 or no Console.WriteLine(" Validation Mean: {0}\n", result.Validation.Mean); Console.WriteLine("Creating General Confusion Matrix from Cross Validation"); GeneralConfusionMatrix gcm = result.ToConfusionMatrix(trainingset, output); double accuracy = gcm.Accuracy; // should be 0.625 Console.WriteLine(" GCM Accuracy {0}%\n", accuracy * 100); ConfusionMatrix cm = ConfusionMatrix.Estimate(classifier, trainingset, output); Console.WriteLine("Confusion Error {0}", cm.Error); Console.WriteLine("Confusion accuracy {0}", cm.Accuracy); double tp = cm.TruePositives; double tn = cm.TrueNegatives; double fscore = cm.FScore; double fp = cm.FalsePositives; double fn = cm.FalseNegatives; Console.WriteLine("TP = {0},TN = {1}, FP = {2}, FN = {3}, Fscore = {4} ", tp, tn, fp, fn, fscore); // Save the model created from the training set classifier.Save("NBmodel.sav", compression: SerializerCompression.None); Console.WriteLine("Successfully saved the model"); } else { // load a previous model loaded_nb = Serializer.Load <NaiveBayes>(modelFname); // Load the model int[] results = loaded_nb.Decide(trainingset); // Make preditions from the input double accuracy = Functions.CalculateAccuraccy(output, results); Console.WriteLine("Accuracy of predictions = {0}%", Math.Round(accuracy * 100, 2)); // Compare the predicions to the labels } }
public void learn_test_simple() { #region doc_learn_simple // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // This is a sample code on how to use Cross-Validation // to assess the performance of Support Vector Machines. // Consider the example binary data. We will be trying // to learn a XOR problem and see how well does SVMs // perform on this data. double[][] data = { new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, }; int[] xor = // result of xor for the sample input data { -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, }; // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = CrossValidation.Create( k: 3, // Use 3 folds in cross-validation // Indicate how learning algorithms for the models should be created learner: (s) => new SequentialMinimalOptimization <Linear>() { Complexity = 100 }, // Indicate how the performance of those models will be measured loss: (expected, actual, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: data, y: xor ); // If needed, control the parallelization degree crossvalidation.ParallelOptions.MaxDegreeOfParallelism = 1; var result = crossvalidation.Learn(data, xor); // Finally, access the measured performance. double trainingErrors = result.Training.Mean; double validationErrors = result.Validation.Mean; #endregion Assert.AreEqual(3, crossvalidation.K); Assert.AreEqual(0.37575757575757579, result.Training.Mean, 1e-10); Assert.AreEqual(0.75555555555555554, result.Validation.Mean, 1e-10); Assert.AreEqual(0.00044077134986225924, result.Training.Variance, 1e-10); Assert.AreEqual(0.0059259259259259334, result.Validation.Variance, 1e-10); Assert.AreEqual(0.020994555243259126, result.Training.StandardDeviation, 1e-10); Assert.AreEqual(0.076980035891950155, result.Validation.StandardDeviation, 1e-10); Assert.AreEqual(0, result.Training.PooledStandardDeviation); Assert.AreEqual(0, result.Validation.PooledStandardDeviation); Assert.AreEqual(3, crossvalidation.Folds.Length); Assert.AreEqual(3, result.Models.Length); }
static void Main(string[] args) { // Read in the file we created in the Data Preparation step // TODO: change the path to point to your data directory string dataDirPath = "\\\\Mac\\Home\\Documents\\c-sharp-machine-learning\\ch.2\\output"; // Load the data into a data frame and set the "emailNum" column as an index var wordVecDF = Frame.ReadCsv( Path.Combine(dataDirPath, "data-preparation-step\\subjectWordVec-alphaonly.csv"), hasHeaders: true, inferTypes: true ); // Load the transformed data from data preparation step to get "is_ham" column var rawDF = Frame.ReadCsv( Path.Combine(dataDirPath, "data-preparation-step\\transformed.csv"), hasHeaders: true, inferTypes: false, schema: "int,string,string,int" ).IndexRows <int>("emailNum").SortRowsByKey(); // Load Term Frequency Data var spamTermFrequencyDF = Frame.ReadCsv( Path.Combine(dataDirPath, "data-analysis-step\\frequency-alphaonly\\subject-line\\spam-frequencies-after-stopwords.csv"), hasHeaders: false, inferTypes: false, schema: "string,int" ); spamTermFrequencyDF.RenameColumns(new string[] { "word", "num_occurences" }); var indexedSpamTermFrequencyDF = spamTermFrequencyDF.IndexRows <string>("word"); // Change number of features to reduce overfitting int minNumOccurences = 1; string[] wordFeatures = indexedSpamTermFrequencyDF.Where( x => x.Value.GetAs <int>("num_occurences") >= minNumOccurences ).RowKeys.ToArray(); Console.WriteLine("Num Features Selected: {0}", wordFeatures.Count()); // subtracting "is_ham" values from 1 to encode this target variable with 1 for spam emails var targetVariables = 1 - rawDF.GetColumn <int>("is_ham"); Console.WriteLine("{0} spams vs. {1} hams", targetVariables.NumSum(), (targetVariables.KeyCount - targetVariables.NumSum())); // Create input and output variables from data frames, so that we can use them for Accord.NET MachineLearning models double[][] input = wordVecDF.Columns[wordFeatures].Rows.Select( x => Array.ConvertAll <object, double>(x.Value.ValuesAll.ToArray(), o => Convert.ToDouble(o)) ).ValuesAll.ToArray(); int[] output = targetVariables.Values.ToArray(); // Number of folds int numFolds = 3; var cvNaiveBayesClassifier = CrossValidation.Create <NaiveBayes <BernoulliDistribution>, NaiveBayesLearning <BernoulliDistribution>, double[], int>( // number of folds k: numFolds, // Naive Bayes Classifier with Binomial Distribution learner: (p) => new NaiveBayesLearning <BernoulliDistribution>(), // Using Zero-One Loss Function as a Cost Function loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // Fitting a classifier fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Input with Features x: input, // Output y: output ); // Run Cross-Validation var result = cvNaiveBayesClassifier.Learn(input, output); // Sample Size int numberOfSamples = result.NumberOfSamples; int numberOfInputs = result.NumberOfInputs; int numberOfOutputs = result.NumberOfOutputs; // Training & Validation Errors double trainingError = result.Training.Mean; double validationError = result.Validation.Mean; // Confusion Matrix Console.WriteLine("\n---- Confusion Matrix ----"); GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output); Console.WriteLine(""); Console.Write("\t\tActual 0\t\tActual 1\n"); for (int i = 0; i < gcm.Matrix.GetLength(0); i++) { Console.Write("Pred {0} :\t", i); for (int j = 0; j < gcm.Matrix.GetLength(1); j++) { Console.Write(gcm.Matrix[i, j] + "\t\t\t"); } Console.WriteLine(); } Console.WriteLine("\n---- Sample Size ----"); Console.WriteLine("# samples: {0}, # inputs: {1}, # outputs: {2}", numberOfSamples, numberOfInputs, numberOfOutputs); Console.WriteLine("training error: {0}", trainingError); Console.WriteLine("validation error: {0}\n", validationError); Console.WriteLine("\n---- Calculating Accuracy, Precision, Recall ----"); float truePositive = (float)gcm.Matrix[1, 1]; float trueNegative = (float)gcm.Matrix[0, 0]; float falsePositive = (float)gcm.Matrix[1, 0]; float falseNegative = (float)gcm.Matrix[0, 1]; // Accuracy Console.WriteLine( "Accuracy: {0}", (truePositive + trueNegative) / numberOfSamples ); // True-Positive / (True-Positive + False-Positive) Console.WriteLine("Precision: {0}", (truePositive / (truePositive + falsePositive))); // True-Positive / (True-Positive + False-Negative) Console.WriteLine("Recall: {0}", (truePositive / (truePositive + falseNegative))); Console.ReadKey(); }
public void CrossValidationTest() { #region doc_cross_validation // Ensure we have reproducible results Accord.Math.Random.Generator.Seed = 0; // Get some data to be learned. We will be using the Wiconsin's // (Diagnostic) Breast Cancer dataset, where the goal is to determine // whether the characteristics extracted from a breast cancer exam // correspond to a malignant or benign type of cancer: var data = new WisconsinDiagnosticBreastCancer(); double[][] input = data.Features; // 569 samples, 30-dimensional features int[] output = data.ClassLabels; // 569 samples, 2 different class labels // Let's say we want to measure the cross-validation performance of // a decision tree with a maximum tree height of 5 and where variables // are able to join the decision path at most 2 times during evaluation: var cv = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new C45Learning() // here we create the learning algorithm { Join = 2, MaxHeight = 5 }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: input, y: output ); // After the cross-validation object has been created, // we can call its .Learn method with the input and // output data that will be partitioned into the folds: var result = cv.Learn(input, output); // We can grab some information about the problem: int numberOfSamples = result.NumberOfSamples; // should be 569 int numberOfInputs = result.NumberOfInputs; // should be 30 int numberOfOutputs = result.NumberOfOutputs; // should be 2 double trainingError = result.Training.Mean; // should be 0.017771153143274855 double validationError = result.Validation.Mean; // should be 0.0755952380952381 // If desired, compute an aggregate confusion matrix for the validation sets: GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output); double accuracy = gcm.Accuracy; // result should be 0.92442882249560632 #endregion Assert.AreEqual(569, gcm.Samples); Assert.AreEqual(0.92442882249560632, gcm.Accuracy); Assert.AreEqual(0.075571177504393683, gcm.Error); Assert.AreEqual(2, gcm.Classes); Assert.AreEqual(569, numberOfSamples); Assert.AreEqual(30, numberOfInputs); Assert.AreEqual(2, numberOfOutputs); Assert.AreEqual(10, cv.K); Assert.AreEqual(0.017771153143274855, result.Training.Mean, 1e-10); Assert.AreEqual(0.0755952380952381, result.Validation.Mean, 1e-10); Assert.AreEqual(3.0929835736884063E-05, result.Training.Variance, 1e-10); Assert.AreEqual(0.00096549963219103182, result.Validation.Variance, 1e-10); Assert.AreEqual(10, cv.Folds.Length); Assert.AreEqual(10, result.Models.Length); var tree = result.Models[0].Model; int height = tree.GetHeight(); Assert.AreEqual(5, height); Accord.Math.Random.Generator.Seed = 0; cv = CrossValidation.Create( k: 10, learner: (p) => new C45Learning() { Join = 1, MaxHeight = 1, MaxVariables = 1 }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: input, y: output ); cv.ParallelOptions.MaxDegreeOfParallelism = 1; result = cv.Learn(input, output); tree = result.Models[0].Model; height = tree.GetHeight(); Assert.AreEqual(1, height); Assert.AreEqual(0.24842341313352828, result.Training.Mean, 1e-10); Assert.AreEqual(0.25112781954887214, result.Validation.Mean, 1e-10); Assert.AreEqual(0.017727583138285874, result.Training.Variance, 1e-10); Assert.AreEqual(0.018956888182583998, result.Validation.Variance, 1e-10); }
public void validation() { var data = path; var csv = new CsvReader(File.OpenText(path)); var myCustomObjects = csv.GetRecords <MealData>(); DataTable dt = new DataTable("FoodDBSample"); DataRow row; dt.Columns.Add("Category", "Carb", "Protein", "Fat", "Calorie", "Fiber", "Decision"); foreach (var record in myCustomObjects) { row = dt.NewRow(); row["Category"] = record.Category; row["Carb"] = record.Carb; row["Protein"] = record.Protein; row["Fat"] = record.Fat; row["Calorie"] = record.Calorie; row["Fiber"] = record.Fiber; row["Decision"] = record.Outcome; dt.Rows.Add(row); } var codebook = new Codification(dt); DataTable symbols = codebook.Apply(dt); int[][] inputs = symbols.ToJagged <int>("Category", "Carb", "Protein", "Fat", "Calorie", "Fiber"); int[] outputs = symbols.ToArray <int>("Decision"); //specify which columns to use for making decisions var id3learning = new ID3Learning() { new DecisionVariable("Category", 4), new DecisionVariable("Carb", 2), new DecisionVariable("Protein", 2), new DecisionVariable("Fat", 2), new DecisionVariable("Calorie", 2), new DecisionVariable("Fiber", 2) }; DecisionTree tree = id3learning.Learn(inputs, outputs); // Compute the training error double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs)); // measure the cross-validation performance of // a decision tree with a maximum tree height of 5. With variables // able to join the decision path at most 2 times during evaluation: var cv = CrossValidation.Create( k: 5, // 5-fold cross-validation learner: (p) => new ID3Learning() //create the learning algorithm { new DecisionVariable("Category", 4), new DecisionVariable("Carb", 2), new DecisionVariable("Protein", 2), new DecisionVariable("Fat", 2), new DecisionVariable("Calorie", 2), new DecisionVariable("Fiber", 2) }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // pass the input and output data // that will be used in cross-validation. x: inputs, y: outputs ); // After the cross-validation object has been created, // we can call its .Learn method with the input and // output data that will be partitioned into the folds: var result = cv.Learn(inputs, outputs); //Gather info int numberOfSamples = result.NumberOfSamples; int numberOfInputs = result.NumberOfInputs; int numberOfOutputs = result.NumberOfOutputs; double trainingError = result.Training.Mean; double validationError = result.Validation.Mean; System.Diagnostics.Debug.WriteLine("ID3 Mean: " + validationError); System.Diagnostics.Debug.WriteLine("ID3 Error: " + trainingError); }