public void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs) { crossValidation = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new B.NaiveBayesLearning() // here we create the learning algorithm { }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: inputs.Select(v => v.Select(u => Convert.ToInt32(u)).ToArray()).ToArray(), y: outputs ); var result = crossValidation.Learn(inputs.Select(u => u.Select(v => Convert.ToInt32(v)).ToArray()).ToArray(), outputs); ConfusionMatrix = result.ToConfusionMatrix(inputs.Select(v => v.Select(u => Convert.ToInt32(u)).ToArray()).ToArray(), outputs).Matrix; }
public void SupportVectorMachinePerformanceTest() { ex = null; var cv = new CrossValidation(); cv.Algorithm = new SupportVectorRegression(); var rand = new HeuristicLab.Random.MersenneTwister(); double[,] data = GenerateData(1000, rand); List<string> variables = new List<string>() { "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "y" }; Dataset ds = new Dataset(variables, data); cv.Problem.ProblemDataParameter.ActualValue = new RegressionProblemData(ds, variables.Take(10), variables.Last()); cv.Folds.Value = 5; cv.SamplesStart.Value = 0; cv.SamplesEnd.Value = 999; cv.ExceptionOccurred += new EventHandler<EventArgs<Exception>>(cv_ExceptionOccurred); cv.Stopped += new EventHandler(cv_Stopped); cv.Prepare(); cv.Start(); trigger.WaitOne(); if (ex != null) throw ex; TestContext.WriteLine("Runtime: {0}", cv.ExecutionTime.ToString()); }
public override void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs) { crossValidation = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new MulticlassSupportVectorLearning <Gaussian>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { // Estimate a suitable guess for the Gaussian kernel's parameters. // This estimate can serve as a starting point for a grid search. UseKernelEstimation = true } }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: inputs, y: outputs ); var result = crossValidation.Learn(inputs, outputs); ConfusionMatrix = result.ToConfusionMatrix(inputs, outputs).Matrix; //throw new NotImplementedException(); }
public void nativeBayesValidation() { var learn = new NaiveBayesLearning(); NaiveBayes nb = learn.Learn(inputsInt, outputs); var cv = CrossValidation.Create( k: 3, learner: (p) => new NaiveBayesLearning(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: inputsInt, y: outputs ); var result = cv.Learn(inputsInt, outputs); int numberOfSamples = result.NumberOfSamples; int numberOfInputs = result.NumberOfInputs; int numberOfOutputs = result.NumberOfOutputs; double trainingError = result.Training.Mean; double validationError = result.Validation.Mean; GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputsInt, outputs); double accuracy = gcm.Accuracy; message += "Native Bayes Validacja\n"; message += "trainingError " + trainingError.ToString() + "\n"; message += "validationError " + validationError.ToString() + "\n"; message += "accuracy " + accuracy.ToString() + "\n\n"; }
public void SupportVectorMachinePerformanceTest() { ex = null; var cv = new CrossValidation(); cv.Algorithm = new SupportVectorRegression(); var rand = new HeuristicLab.Random.MersenneTwister(); double[,] data = GenerateData(1000, rand); List <string> variables = new List <string>() { "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "y" }; Dataset ds = new Dataset(variables, data); cv.Problem.ProblemDataParameter.ActualValue = new RegressionProblemData(ds, variables.Take(10), variables.Last()); cv.Folds.Value = 5; cv.SamplesStart.Value = 0; cv.SamplesEnd.Value = 999; cv.ExceptionOccurred += new EventHandler <EventArgs <Exception> >(cv_ExceptionOccurred); cv.Prepare(); cv.Start(); if (ex != null) { throw ex; } TestContext.WriteLine("Runtime: {0}", cv.ExecutionTime.ToString()); }
public void NotEnoughSamplesTest2() { Accord.Math.Tools.SetupGenerator(0); int[] labels = Matrix.Vector(10, 1).Concatenate(Matrix.Vector(30, 0)); Vector.Shuffle(labels); var crossvalidation = new CrossValidation <MulticlassSupportVectorMachine>(labels, 2, folds: 10) { RunInParallel = false, Fitting = (int index, int[] indicesTrain, int[] indicesValidation) => { var labelsValidation = labels.Submatrix(indicesValidation); int countValidation = labelsValidation.Count(x => x == 1); Assert.AreEqual(1, countValidation); var labelsTraining = labels.Submatrix(indicesTrain); int countTraining = labelsTraining.Count(x => x == 1); Assert.AreEqual(9, countTraining); return(new CrossValidationValues <MulticlassSupportVectorMachine>(null, 0, 0)); } }; crossvalidation.Compute(); }
public static void TestSVM(double[][] inputs, int[] outputs) { var crossValidation = new CrossValidation(inputs.Length, 10); crossValidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { var trainingInputs = inputs.Submatrix(indicesTrain); var trainingOutputs = outputs.Submatrix(indicesTrain); // And now the validation data: var validationInputs = inputs.Submatrix(indicesValidation); var validationOutputs = outputs.Submatrix(indicesValidation); var sw1 = Stopwatch.StartNew(); var svm = new SVM(); var trainingError = svm.TrainSVM(new RationalQuadratic(1), 3, trainingInputs, trainingOutputs); sw1.Stop(); Console.WriteLine("Training for: " + sw1.ElapsedMilliseconds + "ms with errors: " + trainingError); var validationError = svm.GetSMO().ComputeError(validationInputs, validationOutputs); // Return a new information structure containing the model and the errors achieved. return(new CrossValidationValues(svm, trainingError, validationError)); }; // Compute the cross-validation var result = crossValidation.Compute(); // Finally, access the measured performance. var trainingErrors = result.Training.Mean; var validationErrors = result.Validation.Mean; Console.WriteLine("Finished with " + trainingErrors + " training errors and " + validationErrors + " validation errors"); }
public GridSearchModelSelection(ModelSelectionParameters model_parameters, CrossValidation cross_validation) : this(modshogunPINVOKE.new_GridSearchModelSelection__SWIG_1(ModelSelectionParameters.getCPtr(model_parameters), CrossValidation.getCPtr(cross_validation)), true) { if (modshogunPINVOKE.SWIGPendingException.Pending) { throw modshogunPINVOKE.SWIGPendingException.Retrieve(); } }
public static void TestKNN(double[][] inputs, int[] outputs, int kValue) { var crossValidation = new CrossValidation(inputs[0].Length, 10); crossValidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { var trainingInputs = inputs.Submatrix(indicesTrain); var trainingOutputs = outputs.Submatrix(indicesTrain); // And now the validation data: var validationInputs = inputs.Submatrix(indicesValidation); var validationOutputs = outputs.Submatrix(indicesValidation); var sw = Stopwatch.StartNew(); var knn = new KNN(); knn.TrainKNN(trainingInputs, trainingOutputs, kValue); sw.Stop(); //Console.WriteLine("Training for: " + sw.ElapsedMilliseconds + "ms"); var error = knn.ComputeError(validationInputs, validationOutputs); return(new CrossValidationValues(knn, 0, error)); }; // Compute the cross-validation var result = crossValidation.Compute(); // Finally, access the measured performance. var trainingErrors = result.Training.Mean; var validationErrors = result.Validation.Mean; Console.WriteLine("Finished with " + trainingErrors + " training errors and " + validationErrors + " validation errors"); }
public CrossValidationResult <object> Validate(IClassifier classifier, TrainingData trainingData, int folds = 10) { var crossValidation = new CrossValidation(size: trainingData.Inputs.Length, folds: folds); crossValidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { var trainingInputs = trainingData.Inputs.Get(indicesTrain); var trainingOutputs = trainingData.Outputs.Get(indicesTrain); var validationInputs = trainingData.Inputs.Get(indicesValidation); var validationOutputs = trainingData.Outputs.Get(indicesValidation); var foldClassifier = classifier.CreateInstance(trainingData.FeatureDefaultsValueTypes, trainingData.FeatureGranularities); foldClassifier.Train(trainingInputs, trainingOutputs); var trainingPredicted = foldClassifier.Decide(trainingInputs); var validationPredicted = foldClassifier.Decide(validationInputs); double trainingError = new ZeroOneLoss(trainingOutputs).Loss(trainingPredicted); double validationError = new ZeroOneLoss(validationOutputs).Loss(validationPredicted); var confusionMatrix = new ConfusionMatrix(validationPredicted, validationOutputs, positiveValue: 1, negativeValue: 0); Console.WriteLine($"{k}\t{trainingError}\t{validationError}\t{confusionMatrix.Accuracy}\t{confusionMatrix.TruePositives}\t{confusionMatrix.TrueNegatives}\t{confusionMatrix.FalsePositives}\t{confusionMatrix.FalseNegatives}\t{confusionMatrix.FalsePositiveRate}"); return(new CrossValidationValues(foldClassifier, trainingError, validationError)); }; var result = crossValidation.Compute(); return(result); }
public void knnValidation() { var crossvalidation = CrossValidation.Create( k: 3, learner: (p) => new KNearestNeighbors(k: 4), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: inputs, y: outputs ); var result = crossvalidation.Learn(inputs, outputs); // We can grab some information about the problem: int numberOfSamples = result.NumberOfSamples; int numberOfInputs = result.NumberOfInputs; int numberOfOutputs = result.NumberOfOutputs; double trainingError = result.Training.Mean; double validationError = result.Validation.Mean; // If desired, compute an aggregate confusion matrix for the validation sets: GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs); double accuracy = gcm.Accuracy; message += "Knn Validacja\n"; message += "trainingError " + trainingError.ToString() + "\n"; message += "validationError " + validationError.ToString() + "\n"; message += "accuracy " + accuracy.ToString() + "\n\n"; }
public override void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs) { crossValidation = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new KernelDiscriminantAnalysis() // here we create the learning algorithm { Kernel = new Quadratic() // We can choose any kernel function }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: inputs, y: outputs ); var result = crossValidation.Learn(inputs, outputs); ConfusionMatrix = result.ToConfusionMatrix(inputs, outputs).Matrix; }
private static void breastCancerExample() { // Ensure we have reproducible results Accord.Math.Random.Generator.Seed = 0; // Get some data to be learned. We will be using the Wiconsin's // (Diagnostic) Breast Cancer dataset, where the goal is to determine // whether the characteristics extracted from a breast cancer exam // correspond to a malignant or benign type of cancer: var data = new WisconsinDiagnosticBreastCancer(); double[][] input = data.Features; // 569 samples, 30-dimensional features int[] output = data.ClassLabels; // 569 samples, 2 different class labels // Let's say we want to measure the cross-validation performance of // a decision tree with a maximum tree height of 5 and where variables // are able to join the decision path at most 2 times during evaluation: var cv = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation learner: (p) => new C45Learning() // here we create the learning algorithm { Join = 2, MaxHeight = 5 }, // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: input, y: output ); // After the cross-validation object has been created, // we can call its .Learn method with the input and // output data that will be partitioned into the folds: var result = cv.Learn(input, output); // We can grab some information about the problem: int numberOfSamples = result.NumberOfSamples; // should be 569 int numberOfInputs = result.NumberOfInputs; // should be 30 int numberOfOutputs = result.NumberOfOutputs; // should be 2 double trainingError = result.Training.Mean; // should be 0.017771153143274855 double validationError = result.Validation.Mean; // should be 0.0755952380952381 // If desired, compute an aggregate confusion matrix for the validation sets: GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output); double accuracy = gcm.Accuracy; // result should be 0.92442882249560632 Console.WriteLine("C45Learning learning algorithm accuracy is %" + (accuracy * 100).ToString("N2")); }
protected Performance.Performance runExperiment(Classifier.Classifier classifier, Parameter.Parameter parameter, CrossValidation <Instance.Instance> crossValidation) { var trainSet = new InstanceList.InstanceList(crossValidation.GetTrainFold(0)); var testSet = new InstanceList.InstanceList(crossValidation.GetTestFold(0)); return(classifier.SingleRun(parameter, trainSet, testSet)); }
public void FittingTest() { int[] folds = CrossValidation.Splittings(100, 10); int[] samples = Matrix.Indices(0, 100); CrossValidation val = new CrossValidation(folds, 10); val.RunInParallel = false; int current = 0; val.Fitting = (k, trainingSamples, validationSamples) => { Assert.AreEqual(current, k); Assert.AreEqual(90, trainingSamples.Length); Assert.AreEqual(10, validationSamples.Length); int[] trainingSet = samples.Submatrix(trainingSamples); int[] validationSet = samples.Submatrix(validationSamples); for (int i = 0; i < trainingSet.Length; i++) { Assert.AreEqual(samples[trainingSamples[i]], trainingSet[i]); } for (int i = 0; i < validationSet.Length; i++) { Assert.AreEqual(samples[validationSamples[i]], validationSet[i]); } current++; return(new CrossValidationValues(k, 2 * k)); }; var result = val.Compute(); Assert.AreEqual(10, current); Assert.AreEqual(4.5, result.Training.Mean); Assert.AreEqual(9.0, result.Validation.Mean); Assert.AreEqual( 2 * result.Training.StandardDeviation, result.Validation.StandardDeviation); Assert.AreEqual(val.Folds.Length, result.Training.Sizes.Length); Assert.AreEqual(val.Folds.Length, result.Validation.Sizes.Length); for (int i = 0; i < result.Training.Sizes.Length; i++) { Assert.AreEqual(90, result.Training.Sizes[i]); } for (int i = 0; i < result.Validation.Sizes.Length; i++) { Assert.AreEqual(10, result.Validation.Sizes[i]); } }
private void trainingC45lib() { Accord.Math.Random.Generator.Seed = 0; c45Learning = new C45Learning() { Join = 2, MaxHeight = 5 }; int size = trainingSets.Count; double[][] inputs1 = new double[size][]; int[] outputs1 = new int[size]; int i = 0; foreach (Patient patient in trainingSets) { double[] aux = new double[9]; for (int j = 1; j <= 9; j++) { if (j == 1) { aux[j - 1] = patient.get(j) < 30 ? 0 : patient.get(j) < 60 ? 1 : 2; } else { aux[j - 1] = patient.get(j); } } inputs1[i] = aux; outputs1[i] = patient.get(10); i++; } var crossValidation = CrossValidation.Create( k: 5, learner: (p) => new C45Learning() { Join = 2, MaxHeight = 5 }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: inputs1, y: outputs1 ); decisionTreeLib = c45Learning.Learn(inputs1, outputs1); var result = crossValidation.Learn(inputs1, outputs1); GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs1, outputs1); accuracyC45lib = Math.Round(gcm.Accuracy, 3); }
double CrossValidate(int folds) { var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet(); var sut = new CrossValidation <double>(new RandomIndexSampler <double>(42), folds); var predictions = sut.CrossValidate(new RegressionDecisionTreeLearner(), observations, targets); var metric = new MeanSquaredErrorRegressionMetric(); return(metric.Error(targets, predictions)); }
public override Task <List <GeneralConfusionMatrix> > ComputeFoldedConfusionMatrixAsync(ClassificationModel classificationModel, int folds) { return(Task.Factory.StartNew(() => { int numFeatures = classificationModel.FeatureVectors.Count; DecisionVariable[] decisionVariables = Enumerable.ToArray(classificationModel.Bands.Select(b => DecisionVariable.Continuous(b.ToString()))); double[][] input = new double[numFeatures][]; int[] responses = new int[numFeatures]; for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex) { var featureVector = classificationModel.FeatureVectors[featureIndex]; input[featureIndex] = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue); responses[featureIndex] = featureVector.FeatureClass; } List <GeneralConfusionMatrix> confusionMatrices = new List <GeneralConfusionMatrix>(); // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = new CrossValidation(input.Length, folds); crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { // Lets now grab the training data: var trainingInputs = input.Get(indicesTrain); var trainingOutputs = responses.Get(indicesTrain); // And now the validation data: var validationInputs = input.Get(indicesValidation); var validationOutputs = responses.Get(indicesValidation); var tree = new DecisionTree(decisionVariables, Enum.GetValues(typeof(LandcoverTypeViewModel)).Length); C45Learning id3Learning = new C45Learning(tree); id3Learning.Learn(trainingInputs, trainingOutputs); var predictedTraining = tree.Decide(trainingInputs); var predictedValidation = tree.Decide(validationInputs); double trainingError = new ZeroOneLoss(trainingOutputs).Loss(predictedTraining); double validationError = new ZeroOneLoss(validationOutputs).Loss(predictedValidation); GeneralConfusionMatrix confusionMatrix = new GeneralConfusionMatrix(Enum.GetValues(typeof(LandcoverTypeViewModel)).Length - 1, validationOutputs, predictedValidation); confusionMatrices.Add(confusionMatrix); // Return a new information structure containing the model and the errors achieved. return new CrossValidationValues(trainingError, validationError); }; var result = crossvalidation.Compute(); return confusionMatrices; })); }
protected void RunExperiment(Classifier.Classifier classifier, Parameter.Parameter parameter, ExperimentPerformance experimentPerformance, CrossValidation <Instance.Instance> crossValidation, InstanceList.InstanceList testSet) { for (var i = 0; i < K; i++) { var trainSet = new InstanceList.InstanceList(crossValidation.GetTrainFold(i)); classifier.Train(trainSet, parameter); experimentPerformance.Add(classifier.Test(testSet)); } }
public void SplittingsTest() { int[] folds = CrossValidation.Splittings(100, 10); for (int i = 0; i < 10; i++) { int actual = folds.Count(x => x == i); int expected = 10; Assert.AreEqual(expected, actual); } }
public void NotEnoughSamplesTest1() { Accord.Math.Random.Generator.Seed = 0; int[] labels = Matrix.Vector(10, 1).Concatenate(Matrix.Vector(30, 0)); Vector.Shuffle(labels); var crossvalidation = new CrossValidation <MulticlassSupportVectorMachine>(size: 40, folds: 10) { RunInParallel = false, Fitting = (int index, int[] indicesTrain, int[] indicesValidation) => { var labelsValidation = labels.Submatrix(indicesValidation); int countValidation = labelsValidation.Count(x => x == 1); Assert.AreEqual(2, countValidation); var labelsTraining = labels.Submatrix(indicesTrain); int countTraining = labelsTraining.Count(x => x == 1); Assert.AreEqual(9 * 2, countTraining); return(new CrossValidationValues <MulticlassSupportVectorMachine>(null, 0, 0)); } }; bool thrown = false; try { crossvalidation.Compute(); } catch (Exception) { thrown = true; } Assert.IsTrue(thrown); crossvalidation = new CrossValidation <MulticlassSupportVectorMachine>(labels, 2, folds: 10) { RunInParallel = false, Fitting = (int index, int[] indicesTrain, int[] indicesValidation) => { var labelsValidation = labels.Submatrix(indicesValidation); int countValidation = labelsValidation.Count(x => x == 1); Assert.AreEqual(1, countValidation); var labelsTraining = labels.Submatrix(indicesTrain); int countTraining = labelsTraining.Count(x => x == 1); Assert.AreEqual(9, countTraining); return(new CrossValidationValues <MulticlassSupportVectorMachine>(null, 0, 0)); } }; crossvalidation.Compute(); }
static public int [] MultiNomialLogRegressionLowerBoundNewtonRaphson(double [][] input1, int[] labels, string SaveFile) { // http://accord-framework.net/docs/html/T_Accord_Statistics_Models_Regression_MultinomialLogisticRegression.htm // Create a estimation algorithm to estimate the regression LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson() { MaxIterations = 10, Tolerance = 1e-6 }; // ******************************************************************************* var cv = CrossValidation.Create( k: 10, // We will be using 10-fold cross validation // First we define the learning algorithm: learner: (p) => new LowerBoundNewtonRaphson(), // Now we have to specify how the n.b. performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teach, x, y, w) => teach.Learn(x, y, w), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: input1, y: labels ); // Genrate a cross validation of the data var cvresult = cv.Learn(input1, labels); // iteratively estimate the model MultinomialLogisticRegression mlr = lbnr.Learn(input1, labels); // Generate statistics from confusion matrices ConfusionMatrix cm = ConfusionMatrix.Estimate(mlr, input1, labels); GeneralConfusionMatrix gcm = cvresult.ToConfusionMatrix(input1, labels); Funcs.Utility.OutPutStats(cvresult.NumberOfSamples, cvresult.NumberOfInputs, cvresult.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore); // We can compute the model answers int[] answers = mlr.Decide(input1); string modelsavefile = SaveFile.Replace(".csv", ".MLR.save"); mlr.Save(modelsavefile, compression: SerializerCompression.None); return(answers); }
double CrossValidate(int folds) { var targetName = "T"; var parser = new CsvParser(() => new StringReader(Resources.DecisionTreeData)); var observations = parser.EnumerateRows(v => !v.Contains(targetName)).ToF64Matrix(); var targets = parser.EnumerateRows(targetName).ToF64Vector(); var sut = new CrossValidation <double>(new RandomIndexSampler <double>(42), folds); var predictions = sut.CrossValidate(new RegressionDecisionTreeLearner(), observations, targets); var metric = new MeanSquaredErrorRegressionMetric(); return(metric.Error(targets, predictions)); }
public override Task <List <GeneralConfusionMatrix> > ComputeFoldedConfusionMatrixAsync(ClassificationModel classificationModel, int folds) { return(Task.Factory.StartNew(() => { int numFeatures = classificationModel.FeatureVectors.Count; double[][] input = new double[numFeatures][]; int[] responses = new int[numFeatures]; for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex) { var featureVector = classificationModel.FeatureVectors[featureIndex]; input[featureIndex] = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue); responses[featureIndex] = featureVector.FeatureClass; } List <GeneralConfusionMatrix> confusionMatrices = new List <GeneralConfusionMatrix>(); // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = new CrossValidation(input.Length, folds); crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { // Lets now grab the training data: var trainingInputs = input.Get(indicesTrain); var trainingOutputs = responses.Get(indicesTrain); // And now the validation data: var validationInputs = input.Get(indicesValidation); var validationOutputs = responses.Get(indicesValidation); int[] predictedTraining; int[] predictedValidation; TrainAndPredict(Complexity, Gamma, Degree, trainingInputs, trainingOutputs, validationInputs, out predictedTraining, out predictedValidation); double trainingError = new ZeroOneLoss(trainingOutputs).Loss(predictedTraining); double validationError = new ZeroOneLoss(validationOutputs).Loss(predictedValidation); GeneralConfusionMatrix confusionMatrix = new GeneralConfusionMatrix(classificationModel.LandCoverTypes.Count, validationOutputs, predictedValidation); confusionMatrices.Add(confusionMatrix); // Return a new information structure containing the model and the errors achieved. return new CrossValidationValues(trainingError, validationError); }; crossvalidation.Compute(); return confusionMatrices; })); }
public void FittingTest() { int[] folds = CrossValidation.Splittings(100, 10); int[] samples = Matrix.Indices(0, 100); CrossValidation val = new CrossValidation(folds, 10); val.RunInParallel = false; int current = 0; val.Fitting = (k, trainingSamples, validationSamples) => { Assert.AreEqual(current, k); Assert.AreEqual(90, trainingSamples.Length); Assert.AreEqual(10, validationSamples.Length); int[] trainingSet = samples.Submatrix(trainingSamples); int[] validationSet = samples.Submatrix(validationSamples); for (int i = 0; i < trainingSet.Length; i++) Assert.AreEqual(samples[trainingSamples[i]], trainingSet[i]); for (int i = 0; i < validationSet.Length; i++) Assert.AreEqual(samples[validationSamples[i]], validationSet[i]); current++; return new CrossValidationValues<object>(new object(), k, 2 * k); }; var result = val.Compute(); Assert.AreEqual(10, current); Assert.AreEqual(4.5, result.Training.Mean); Assert.AreEqual(9.0, result.Validation.Mean); Assert.AreEqual( 2 * result.Training.StandardDeviation, result.Validation.StandardDeviation); Assert.AreEqual(val.Folds.Length, result.Training.Sizes.Length); Assert.AreEqual(val.Folds.Length, result.Validation.Sizes.Length); for (int i = 0; i < result.Training.Sizes.Length; i++) Assert.AreEqual(90, result.Training.Sizes[i]); for (int i = 0; i < result.Validation.Sizes.Length; i++) Assert.AreEqual(10, result.Validation.Sizes[i]); }
static public int[] MultiNomialLogisticRegressionBFGS(double [][] input, int [] labels, string fName) { /* The L-BFGS algorithm is a member of the broad family of quasi-Newton optimization methods. * L-BFGS stands for 'Limited memory BFGS'. Indeed, L-BFGS uses a limited memory variation of * the Broyden–Fletcher–Goldfarb–Shanno (BFGS) update to approximate the inverse Hessian matrix * (denoted by Hk). Unlike the original BFGS method which stores a dense approximation, L-BFGS * stores only a few vectors that represent the approximation implicitly. Due to its moderate * memory requirement, L-BFGS method is particularly well suited for optimization problems with * a large number of variables. */ // Create a lbfgs model var mlbfgs = new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>(); // Estimate using the data against a logistic regression MultinomialLogisticRegression mlr = mlbfgs.Learn(input, labels); // // Create a cross validation model derived from the training set to measure the performance of this // predictive model and estimate how well we expect the model will generalize. The algorithm executes // multiple rounds of cross validation on different partitions and averages the results. // int folds = 4; // could play around with this later var cv = CrossValidation.Create(k: folds, learner: (p) => new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: input, y: labels); var result = cv.Learn(input, labels); GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, labels); ConfusionMatrix cm = ConfusionMatrix.Estimate(mlr, input, labels); // //output relevant statistics // Funcs.Utility.OutPutStats(result.NumberOfSamples, result.NumberOfInputs, result.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore); // Compute the model predictions and return the values int[] answers = mlr.Decide(input); // And also the probability of each of the answers double[][] probabilities = mlr.Probabilities(input); // Now we can check how good our model is at predicting double error = new Accord.Math.Optimization.Losses.ZeroOneLoss(labels).Loss(answers); mlr.Save(fName, compression: SerializerCompression.None); return(answers); }
private void button1_Click(object sender, EventArgs e) { // Creates a matrix from the source data table double[,] sourceMatrix = (dgvLearningSource.DataSource as DataTable).ToMatrix(out sourceColumns); // Get only the input vector values var inputs = sourceMatrix.Submatrix(0, sourceMatrix.GetLength(0) - 1, 0, 1).ToArray(); // Get only the label outputs var outputs = new int[sourceMatrix.GetLength(0)]; for (int i = 0; i < outputs.Length; i++) { outputs[i] = (int)sourceMatrix[i, 2]; } var cv = new CrossValidation <KernelSupportVectorMachine>(inputs.Length, 10); cv.Fitting = (int k, int[] training, int[] testing) => { var trainingInputs = inputs.Submatrix(training); var trainingOutputs = outputs.Submatrix(training); var testingInputs = inputs.Submatrix(testing); var testingOutputs = outputs.Submatrix(testing); // Create the specified Kernel IKernel kernel = getKernel(); // Creates the Support Vector Machine using the selected kernel var svm = new KernelSupportVectorMachine(kernel, 2); // Creates a new instance of the SMO Learning Algortihm var smo = new SequentialMinimalOptimization(svm, trainingInputs, trainingOutputs); // Set learning parameters smo.Complexity = (double)numC.Value; smo.Tolerance = (double)numT.Value; // Run double trainingError = smo.Run(); double validationError = smo.ComputeError(testingInputs, testingOutputs); return(new CrossValidationValues <KernelSupportVectorMachine>(svm, trainingError, validationError)); }; var result = cv.Compute(); }
private void ClassifyDataByNaiveBayes(int numOfFolds = 3, int minOccurences = 1) { CalcInputAndOutputVariables(minOccurences); var cvNaiveBayesClassifier = CrossValidation.Create( k: numOfFolds, learner: p => new NaiveBayesLearning <BernoulliDistribution>(), loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: InputVariables, y: OutputVariables ); // Run Cross-Validation Result = cvNaiveBayesClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>; }
public static void Main(string[] args) { // create data set from csv file MultiLayerPerceptron neuralNet = (MultiLayerPerceptron)NeuralNetwork.createFromFile("irisNet.nnet"); DataSet dataSet = DataSet.createFromFile("data_sets/iris_data_normalised.txt", 4, 3, ","); string[] classNames = new string[] { "Virginica", "Setosa", "Versicolor" }; CrossValidation crossval = new CrossValidation(neuralNet, dataSet, 5); crossval.addEvaluator(new ClassifierEvaluator.MultiClass(classNames)); crossval.run(); CrossValidationResult results = crossval.Result; Console.WriteLine(results); }
public double Accuracy() { // Let's say we want to measure the cross-validation performance of // a decision tree with a maximum tree height of 6 and where variables // are able to join the decision path at most 1 times during evaluation: var cv = CrossValidation.Create( k: 5, // We will be using 5-fold cross validation learner: (p) => new ID3Learning() // here we create the learning algorithm { Join = 1, MaxHeight = 0 }, // This function can be used to perform any special // operations before the actual learning is done, but // here we will just leave it as simple as it can be: fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Now we have to specify how the tree performance should be measured: loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), // Finally, we have to pass the input and output data // that will be used in cross-validation. x: inputs, y: outputs ); // After the cross-validation object has been created, // we can call its .Learn method with the input and // output data that will be partitioned into the folds: var result = cv.Learn(inputs, outputs); // We can grab some information about the problem: int numberOfSamples = result.NumberOfSamples; // should be 1000 int numberOfInputs = result.NumberOfInputs; // should be 4 int numberOfOutputs = result.NumberOfOutputs; // should be 6 double trainingError = result.Training.Mean; double validationError = result.Validation.Mean; // If desired, compute an aggregate confusion matrix for the validation sets: GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs); return(gcm.Accuracy * 100); }
/// <param name="dataSet"> training set used for error estimation </param> /// <returns> neural network model with optimized architecture for provided data set </returns> public virtual NeuralNetwork createOptimalModel(DataSet dataSet) { List <int> neurons = new List <int>(); neurons.Add(minNeuronsPerLayer); findArchitectures(1, minNeuronsPerLayer, neurons); LOG.info("Total [{}] different network topologies found", allArchitectures.Count); foreach (List <int> architecture in allArchitectures) { architecture.Insert(0, dataSet.InputSize); architecture.Add(dataSet.OutputSize); LOG.info("Architecture: [{}]", architecture); MultiLayerPerceptron network = new MultiLayerPerceptron(architecture); LearningListener listener = new LearningListener(10, learningRule.MaxIterations); learningRule.addListener(listener); network.LearningRule = learningRule; errorEstimationMethod = new CrossValidation(network, dataSet, 10); errorEstimationMethod.run(); // FIX var evaluator = errorEstimationMethod.getEvaluator <ClassifierEvaluator.MultiClass>(typeof(ClassifierEvaluator.MultiClass)); ClassificationMetrics[] result = ClassificationMetrics.createFromMatrix(evaluator.Result); // nadji onaj sa najmanjim f measure if (optimalResult == null || optimalResult.FMeasure < result[0].FMeasure) { LOG.info("Architecture [{}] became optimal architecture with metrics {}", architecture, result); optimalResult = result[0]; optimalClassifier = network; optimalArchitecure = architecture; } LOG.info("#################################################################"); } LOG.info("Optimal Architecture: {}", optimalArchitecure); return(optimalClassifier); }
private void ClassifyDataByLogisticRegression(int numOfFolds = 3, int minOccurences = 1, int maxIterations = 100) { CalcInputAndOutputVariables(minOccurences); var cvLogisticRegressionClassifier = CrossValidation.Create( k: numOfFolds, learner: (p) => new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 100, Regularization = 1e-6 }, loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual), fit: (teacher, x, y, w) => teacher.Learn(x, y, w), x: InputVariables, y: OutputVariables ); // Run Cross-Validation Result = cvLogisticRegressionClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>; }
public void CrossValidation(string name, double minAccuracy, int folds, params IEnumerable<Sentence>[] sentences) { CrossValidation<LuMiiTagger> evaluation = new CrossValidation<LuMiiTagger>(); evaluation.Folds = folds; evaluation.Randomize = true; evaluation.RandomSeed = 1; foreach (Sentence[] s in sentences) evaluation.Sentences.AddRange(s); Assert.Greater(evaluation.Sentences.Count, 0); var results = evaluation.Evaluate(); Debug.WriteLine("{0}-fold cross validation for {1}", evaluation.Folds, name); Debug.WriteLine("{0} sentences, {1} tokens", evaluation.Sentences.Count, evaluation.Sentences.SelectMany(t => t).Count()); Debug.WriteLine("Mean: {0:0.00}% [{1:0.00}..{2:0.00} @ 99%]", results.Mean, results.ConfidenceIntervalAt99.Lower, results.ConfidenceIntervalAt99.Upper); foreach (var fold in results.OrderBy(f => f.Fold)) Debug.WriteLine("Fold {0}: {1:0.00}%", fold.Fold, fold.CorrectPercentage); Debug.WriteLine("Duration: {0}", results.Duration); Assert.Greater(results.Mean, minAccuracy < 1 ? minAccuracy * 100 : minAccuracy); Assert.Less(results.Mean, 0.97 * 100); }
internal static HandleRef getCPtr(CrossValidation obj) { return (obj == null) ? new HandleRef(null, IntPtr.Zero) : obj.swigCPtr; }
private void button1_Click(object sender, EventArgs e) { // Creates a matrix from the source data table double[,] sourceMatrix = (dgvLearningSource.DataSource as DataTable).ToMatrix(out sourceColumns); // Get only the input vector values var inputs = sourceMatrix.Submatrix(0, sourceMatrix.GetLength(0) - 1, 0, 1).ToArray(); // Get only the label outputs var outputs = new int[sourceMatrix.GetLength(0)]; for (int i = 0; i < outputs.Length; i++) outputs[i] = (int)sourceMatrix[i, 2]; var cv = new CrossValidation<KernelSupportVectorMachine>(inputs.Length, 10); cv.Fitting = (int k, int[] training, int[] testing) => { var trainingInputs = inputs.Submatrix(training); var trainingOutputs = outputs.Submatrix(training); var testingInputs = inputs.Submatrix(testing); var testingOutputs = outputs.Submatrix(testing); // Create the specified Kernel IKernel kernel = getKernel(); // Creates the Support Vector Machine using the selected kernel var svm = new KernelSupportVectorMachine(kernel, 2); // Creates a new instance of the SMO Learning Algortihm var smo = new SequentialMinimalOptimization(svm, trainingInputs, trainingOutputs); // Set learning parameters smo.Complexity = (double)numC.Value; smo.Tolerance = (double)numT.Value; // Run double trainingError = smo.Run(); double validationError = smo.ComputeError(testingInputs, testingOutputs); return new CrossValidationValues<KernelSupportVectorMachine>(svm, trainingError, validationError); }; var result = cv.Compute(); }
public void NotEnoughSamplesTest1() { Accord.Math.Random.Generator.Seed = 0; int[] labels = Matrix.Vector(10, 1).Concatenate(Matrix.Vector(30, 0)); Vector.Shuffle(labels); var crossvalidation = new CrossValidation<MulticlassSupportVectorMachine>(size: 40, folds: 10) { RunInParallel = false, Fitting = (int index, int[] indicesTrain, int[] indicesValidation) => { var labelsValidation = labels.Submatrix(indicesValidation); int countValidation = labelsValidation.Count(x => x == 1); Assert.AreEqual(2, countValidation); var labelsTraining = labels.Submatrix(indicesTrain); int countTraining = labelsTraining.Count(x => x == 1); Assert.AreEqual(9 * 2, countTraining); return new CrossValidationValues<MulticlassSupportVectorMachine>(null, 0, 0); } }; bool thrown = false; try { crossvalidation.Compute(); } catch (Exception) { thrown = true; } Assert.IsTrue(thrown); crossvalidation = new CrossValidation<MulticlassSupportVectorMachine>(labels, 2, folds: 10) { RunInParallel = false, Fitting = (int index, int[] indicesTrain, int[] indicesValidation) => { var labelsValidation = labels.Submatrix(indicesValidation); int countValidation = labelsValidation.Count(x => x == 1); Assert.AreEqual(1, countValidation); var labelsTraining = labels.Submatrix(indicesTrain); int countTraining = labelsTraining.Count(x => x == 1); Assert.AreEqual(9, countTraining); return new CrossValidationValues<MulticlassSupportVectorMachine>(null, 0, 0); } }; crossvalidation.Compute(); }
private void Test_Load(object sender, EventArgs e) { // TODO: This line of code loads data into the 'diabetesDataSetB.ContinuousData' table. You can move, or remove it, as needed. this.continuousDataTableAdapter.Fill(this.diabetesDataSetB.ContinuousData); // This is a sample code on how to use Cross-Validation // to access the performance of Support Vector Machines. // Consider the example binary data. We will be trying // to learn a XOR problem and see how well does SVMs // perform on this data. double[][] data = { new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, }; int[] xor = // result of xor for the sample input data { -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, }; // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = new CrossValidation<KernelSupportVectorMachine>(size: data.Length, folds: 3); // Define a fitting function using Support Vector Machines. The objective of this // function is to learn a SVM in the subset of the data dicted by cross-validation. crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { // The fitting function is passing the indices of the original set which // should be considered training data and the indices of the original set // which should be considered validation data. // Lets now grab the training data: var trainingInputs = data.Submatrix(indicesTrain); var trainingOutputs = xor.Submatrix(indicesTrain); // And now the validation data: var validationInputs = data.Submatrix(indicesValidation); var validationOutputs = xor.Submatrix(indicesValidation); // Create a Kernel Support Vector Machine to operate on the set var svm = new KernelSupportVectorMachine(new Polynomial(2), 2); // Create a training algorithm and learn the training data var smo = new SequentialMinimalOptimization(svm, trainingInputs, trainingOutputs); double trainingError = smo.Run(); // Now we can compute the validation error on the validation data: double validationError = smo.ComputeError(validationInputs, validationOutputs); // Return a new information structure containing the model and the errors achieved. return new CrossValidationValues<KernelSupportVectorMachine>(svm, trainingError, validationError); }; //crossvalidation.CreatePartitions(2, data,out xor); // Compute the cross-validation var result = crossvalidation.Compute(); // Finally, access the measured performance. double trainingErrors = result.Training.Mean; double validationErrors = result.Validation.Mean; }
public void CrossvalidationConstructorTest2() { Accord.Math.Tools.SetupGenerator(0); // This is a sample code on how to use Cross-Validation // to assess the performance of Hidden Markov Models. // Declare some testing data int[][] inputs = new int[][] { new int[] { 0,1,1,0 }, // Class 0 new int[] { 0,0,1,0 }, // Class 0 new int[] { 0,1,1,1,0 }, // Class 0 new int[] { 0,1,1,1,0 }, // Class 0 new int[] { 0,1,1,0 }, // Class 0 new int[] { 0,1,1,1,0 }, // Class 0 new int[] { 0,1,1,1,0 }, // Class 0 new int[] { 0,1,0,1,0 }, // Class 0 new int[] { 0,1,0 }, // Class 0 new int[] { 0,1,1,0 }, // Class 0 new int[] { 1,0,0,1 }, // Class 1 new int[] { 1,1,0,1 }, // Class 1 new int[] { 1,0,0,0,1 }, // Class 1 new int[] { 1,0,1 }, // Class 1 new int[] { 1,1,0,1 }, // Class 1 new int[] { 1,0,1 }, // Class 1 new int[] { 1,0,0,1 }, // Class 1 new int[] { 1,0,0,0,1 }, // Class 1 new int[] { 1,0,1 }, // Class 1 new int[] { 1,0,0,0,1 }, // Class 1 }; int[] outputs = new int[] { 0,0,0,0,0,0,0,0,0,0, // First 10 sequences are of class 0 1,1,1,1,1,1,1,1,1,1, // Last 10 sequences are of class 1 }; // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = new CrossValidation<HiddenMarkovClassifier>(size: inputs.Length, folds: 3); // Define a fitting function using Support Vector Machines. The objective of this // function is to learn a SVM in the subset of the data indicated by cross-validation. crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { // The fitting function is passing the indices of the original set which // should be considered training data and the indices of the original set // which should be considered validation data. // Lets now grab the training data: var trainingInputs = inputs.Submatrix(indicesTrain); var trainingOutputs = outputs.Submatrix(indicesTrain); // And now the validation data: var validationInputs = inputs.Submatrix(indicesValidation); var validationOutputs = outputs.Submatrix(indicesValidation); // We are trying to predict two different classes int classes = 2; // Each sequence may have up to two symbols (0 or 1) int symbols = 2; // Nested models will have two states each int[] states = new int[] { 2, 2 }; // Creates a new Hidden Markov Model Classifier with the given parameters HiddenMarkovClassifier classifier = new HiddenMarkovClassifier(classes, states, symbols); // Create a new learning algorithm to train the sequence classifier var teacher = new HiddenMarkovClassifierLearning(classifier, // Train each model until the log-likelihood changes less than 0.001 modelIndex => new BaumWelchLearning(classifier.Models[modelIndex]) { Tolerance = 0.001, Iterations = 0 } ); // Train the sequence classifier using the algorithm double likelihood = teacher.Run(trainingInputs, trainingOutputs); double trainingError = teacher.ComputeError(trainingInputs, trainingOutputs); // Now we can compute the validation error on the validation data: double validationError = teacher.ComputeError(validationInputs, validationOutputs); // Return a new information structure containing the model and the errors achieved. return new CrossValidationValues<HiddenMarkovClassifier>(classifier, trainingError, validationError); }; // Compute the cross-validation var result = crossvalidation.Compute(); // Finally, access the measured performance. double trainingErrors = result.Training.Mean; double validationErrors = result.Validation.Mean; Assert.AreEqual(3, crossvalidation.K); Assert.AreEqual(0, result.Training.Mean); Assert.AreEqual(0.055555555555555552, result.Validation.Mean); Assert.AreEqual(3, crossvalidation.Folds.Length); Assert.AreEqual(3, result.Models.Length); }
public void NotEnoughSamplesTest2() { Accord.Math.Tools.SetupGenerator(0); int[] labels = Matrix.Vector(10, 1).Concatenate(Matrix.Vector(30, 0)); Accord.Statistics.Tools.Shuffle(labels); var crossvalidation = new CrossValidation<MulticlassSupportVectorMachine>(labels, 2, folds: 10) { RunInParallel = false, Fitting = (int index, int[] indicesTrain, int[] indicesValidation) => { var labelsValidation = labels.Submatrix(indicesValidation); int countValidation = labelsValidation.Count(x => x == 1); Assert.AreEqual(1, countValidation); var labelsTraining = labels.Submatrix(indicesTrain); int countTraining = labelsTraining.Count(x => x == 1); Assert.AreEqual(9, countTraining); return new CrossValidationValues<MulticlassSupportVectorMachine>(null, 0, 0); } }; crossvalidation.Compute(); }
public void KNearestNeighbor_CrossValidation() { // Create some sample learning data. In this data, // the first two instances belong to a class, the // four next belong to another class and the last // three to yet another. double[][] inputs = { // The first two are from class 0 new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, // The next four are from class 1 new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, // The last three are from class 2 new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; int[] outputs = { 0, 0, // First two from class 0 1, 1, 1, 1, // Next four from class 1 2, 2, 2 // Last three from class 2 }; // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = new CrossValidation(size: inputs.Length, folds: 3); // Define a fitting function using Support Vector Machines. The objective of this // function is to learn a SVM in the subset of the data indicated by cross-validation. crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { // The fitting function is passing the indices of the original set which // should be considered training data and the indices of the original set // which should be considered validation data. // Lets now grab the training data: var trainingInputs = inputs.Submatrix(indicesTrain); var trainingOutputs = outputs.Submatrix(indicesTrain); // And now the validation data: var validationInputs = inputs.Submatrix(indicesValidation); var validationOutputs = outputs.Submatrix(indicesValidation); // Now we will create the K-Nearest Neighbors algorithm. For this // example, we will be choosing k = 4. This means that, for a given // instance, its nearest 4 neighbors will be used to cast a decision. KNearestNeighbors knn = new KNearestNeighbors(k: 4, classes: 3, inputs: inputs, outputs: outputs); // After the algorithm has been created, we can classify instances: int[] train_predicted = trainingInputs.Apply(knn.Compute); int[] test_predicted = validationInputs.Apply(knn.Compute); // Compute classification error var cmTrain = new ConfusionMatrix(train_predicted, trainingOutputs); double trainingAcc = cmTrain.Accuracy; // Now we can compute the validation error on the validation data: var cmTest = new ConfusionMatrix(test_predicted, validationOutputs); double validationAcc = cmTest.Accuracy; // Return a new information structure containing the model and the errors achieved. return new CrossValidationValues(knn, trainingAcc, validationAcc); }; // Compute the cross-validation var result = crossvalidation.Compute(); // Finally, access the measured performance. double trainingAccs = result.Training.Mean; double validationAccs = result.Validation.Mean; Assert.AreEqual(1, trainingAccs); Assert.AreEqual(1, validationAccs); }
public void CrossvalidationConstructorTest() { Accord.Math.Tools.SetupGenerator(0); // This is a sample code on how to use Cross-Validation // to assess the performance of Support Vector Machines. // Consider the example binary data. We will be trying // to learn a XOR problem and see how well does SVMs // perform on this data. double[][] data = { new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, }; int[] xor = // result of xor for the sample input data { -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, }; // Create a new Cross-validation algorithm passing the data set size and the number of folds var crossvalidation = new CrossValidation<KernelSupportVectorMachine>(size: data.Length, folds: 3); // Define a fitting function using Support Vector Machines. The objective of this // function is to learn a SVM in the subset of the data indicated by cross-validation. crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation) { // The fitting function is passing the indices of the original set which // should be considered training data and the indices of the original set // which should be considered validation data. // Lets now grab the training data: var trainingInputs = data.Submatrix(indicesTrain); var trainingOutputs = xor.Submatrix(indicesTrain); // And now the validation data: var validationInputs = data.Submatrix(indicesValidation); var validationOutputs = xor.Submatrix(indicesValidation); // Create a Kernel Support Vector Machine to operate on the set var svm = new KernelSupportVectorMachine(new Polynomial(2), 2); // Create a training algorithm and learn the training data var smo = new SequentialMinimalOptimization(svm, trainingInputs, trainingOutputs); double trainingError = smo.Run(); // Now we can compute the validation error on the validation data: double validationError = smo.ComputeError(validationInputs, validationOutputs); // Return a new information structure containing the model and the errors achieved. return new CrossValidationValues<KernelSupportVectorMachine>(svm, trainingError, validationError); }; // Compute the cross-validation var result = crossvalidation.Compute(); // Finally, access the measured performance. double trainingErrors = result.Training.Mean; double validationErrors = result.Validation.Mean; Assert.AreEqual(3, crossvalidation.K); Assert.AreEqual(0, result.Training.Mean); Assert.AreEqual(0, result.Validation.Mean); Assert.AreEqual(3, crossvalidation.Folds.Length); Assert.AreEqual(3, result.Models.Length); }