public static void Train(string trainingFolder) { Console.WriteLine("Training SVM model with Cross-Validation..."); (double[][] inputs, int[] output) = ReadData(trainingFolder); int crossValidateCount = Math.Min(maxCrossValidateCount, inputs.Count()); Accord.Math.Random.Generator.Seed = 0; Console.WriteLine("Grid-Search..."); var gscv = GridSearch <double[], int> .CrossValidate( ranges : new { Sigma = GridSearch.Range(fromInclusive: 0.00000001, toExclusive: 3), }, learner : (p, ss) => new MulticlassSupportVectorLearning <Gaussian> { Kernel = new Gaussian(p.Sigma) }, fit : (teacher, x, y, w) => teacher.Learn(x, y, w), loss : (actual, expected, r) => new ZeroOneLoss(expected).Loss(actual), folds : 10); //gscv.ParallelOptions.MaxDegreeOfParallelism = 1; var result = gscv.Learn(inputs.Take(crossValidateCount).ToArray(), output.Take(crossValidateCount).ToArray()); var crossValidation = result.BestModel; double bestError = result.BestModelError; double trainError = result.BestModel.Training.Mean; double trainErrorVar = result.BestModel.Training.Variance; double valError = result.BestModel.Validation.Mean; double valErrorVar = result.BestModel.Validation.Variance; double bestSigma = result.BestParameters.Sigma; Console.WriteLine("Grid-Search Done."); Console.WriteLine("Using Sigma=" + bestSigma); // train model with best parameter var bestTeacher = new MulticlassSupportVectorLearning <Gaussian> { Kernel = new Gaussian(bestSigma) }; MulticlassSupportVectorMachine <Gaussian> svm = bestTeacher.Learn( inputs.Take(traingRowsCount).ToArray(), output.Take(traingRowsCount).ToArray()); // save model svm.Save(Path.Combine(trainingFolder, "model"), SerializerCompression.GZip); }
private static Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double> Training(List <double[]> inputsList, List <int> outputsList) { var gridsearch = GridSearch <double[], int> .CrossValidate( // Here we can specify the range of the parameters to be included in the search ranges : new { Complexity = GridSearch.Values(Math.Pow(2, -10), Math.Pow(2, -8), Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2), Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10)), Gamma = GridSearch.Values(Math.Pow(2, -10), Math.Pow(2, -8), Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2), Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10)) }, // Indicate how learning algorithms for the models should be created learner : (p, ss) => new MulticlassSupportVectorLearning <Gaussian>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { // Estimate a suitable guess for the Gaussian kernel's parameters. // This estimate can serve as a starting point for a grid search. //UseComplexityHeuristic = true, //UseKernelEstimation = true Complexity = p.Complexity, Kernel = new Gaussian(p.Gamma) } }, // Define how the model should be learned, if needed fit : (teacher, x, y, w) => teacher.Learn(x, y, w), // Define how the performance of the models should be measured loss : (actual, expected, m) => new HammingLoss(expected).Loss(actual), folds : 10 ); gridsearch.ParallelOptions.MaxDegreeOfParallelism = 1; Console.WriteLine("y nos ponemos a aprender"); // Search for the best model parameters var result = gridsearch.Learn(inputsList.ToArray(), outputsList.ToArray()); return(new Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double>(CreateModel(inputsList, outputsList, result.BestParameters.Complexity, result.BestParameters.Gamma), result.BestModelError, result.BestParameters.Gamma, result.BestParameters.Complexity)); }
public void cross_validation_decision_tree() { #region doc_learn_tree_cv // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // This is a sample code showing how to use Grid-Search in combination with // Cross-Validation to assess the performance of Decision Trees with C4.5. var parkinsons = new Parkinsons(); double[][] input = parkinsons.Features; int[] output = parkinsons.ClassLabels; // Create a new Grid-Search with Cross-Validation algorithm. Even though the // generic, strongly-typed approach used accross the framework is most of the // time easier to handle, combining those both methods in a single call can be // difficult. For this reason. the framework offers a specialized method for // combining those two algorirthms: var gscv = GridSearch.CrossValidate( // Here we can specify the range of the parameters to be included in the search ranges: new { Join = GridSearch.Range(fromInclusive: 1, toExclusive: 20), MaxHeight = GridSearch.Range(fromInclusive: 1, toExclusive: 20), }, // Indicate how learning algorithms for the models should be created learner: (p, ss) => new C45Learning { // Here, we can use the parameters we have specified above: Join = p.Join, MaxHeight = p.MaxHeight, }, // Define how the model should be learned, if needed fit: (teacher, x, y, w) => teacher.Learn(x, y, w), // Define how the performance of the models should be measured loss: (actual, expected, r) => new ZeroOneLoss(expected).Loss(actual), folds: 3, // use k = 3 in k-fold cross validation x: input, y: output // so the compiler can infer generic types ); // If needed, control the parallelization degree gscv.ParallelOptions.MaxDegreeOfParallelism = 1; // Search for the best decision tree var result = gscv.Learn(input, output); // Get the best cross-validation result: var crossValidation = result.BestModel; // Get an estimate of its error: double bestAverageError = result.BestModelError; double trainError = result.BestModel.Training.Mean; double trainErrorVar = result.BestModel.Training.Variance; double valError = result.BestModel.Validation.Mean; double valErrorVar = result.BestModel.Validation.Variance; // Get the best values for the parameters: int bestJoin = result.BestParameters.Join; int bestHeight = result.BestParameters.MaxHeight; // Use the best parameter values to create the final // model using all the training and validation data: var bestTeacher = new C45Learning { Join = bestJoin, MaxHeight = bestHeight, }; // Use the best parameters to create the final tree model: DecisionTree finalTree = bestTeacher.Learn(input, output); #endregion int height = finalTree.GetHeight(); Assert.AreEqual(5, height); Assert.AreEqual(22, result.BestModel.NumberOfInputs); Assert.AreEqual(2, result.BestModel.NumberOfOutputs); Assert.AreEqual(195, result.BestModel.NumberOfSamples); Assert.AreEqual(65, result.BestModel.AverageNumberOfSamples); Assert.AreEqual(bestAverageError, valError); Assert.AreEqual(5, bestJoin, 1e-10); Assert.AreEqual(0.1076923076923077, bestAverageError, 1e-8); Assert.AreEqual(5, bestHeight, 1e-8); }
public void cross_validation_test() { #region doc_learn_cv // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // This is a sample code showing how to use Grid-Search in combination with // Cross-Validation to assess the performance of Support Vector Machines. // Consider the example binary data. We will be trying to learn a XOR // problem and see how well does SVMs perform on this data. double[][] inputs = { new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, new double[] { -1, -1 }, new double[] { 1, -1 }, new double[] { -1, 1 }, new double[] { 1, 1 }, }; int[] xor = // result of xor for the sample input data { -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, }; // Create a new Grid-Search with Cross-Validation algorithm. Even though the // generic, strongly-typed approach used accross the framework is most of the // time easier to handle, combining those both methods in a single call can be // difficult. For this reason. the framework offers a specialized method for // combining those two algorirthms: var gscv = GridSearch <double[], int> .CrossValidate( // Here we can specify the range of the parameters to be included in the search ranges : new { Complexity = GridSearch.Values(0.00000001, 5.20, 0.30, 0.50), Degree = GridSearch.Values(1, 10, 2, 3, 4, 5), Constant = GridSearch.Values(0, 1, 2), }, // Indicate how learning algorithms for the models should be created learner : (p, ss) => new SequentialMinimalOptimization <Polynomial> { // Here, we can use the parameters we have specified above: Complexity = p.Complexity, Kernel = new Polynomial(p.Degree, p.Constant) }, // Define how the model should be learned, if needed fit : (teacher, x, y, w) => teacher.Learn(x, y, w), // Define how the performance of the models should be measured loss : (actual, expected, r) => new ZeroOneLoss(expected).Loss(actual), folds : 3 // use k = 3 in k-fold cross validation ); // If needed, control the parallelization degree gscv.ParallelOptions.MaxDegreeOfParallelism = 1; // Search for the best vector machine var result = gscv.Learn(inputs, xor); // Get the best cross-validation result: var crossValidation = result.BestModel; // Estimate its error: double bestError = result.BestModelError; double trainError = result.BestModel.Training.Mean; double trainErrorVar = result.BestModel.Training.Variance; double valError = result.BestModel.Validation.Mean; double valErrorVar = result.BestModel.Validation.Variance; // Get the best values for the parameters: double bestC = result.BestParameters.Complexity; double bestDegree = result.BestParameters.Degree; double bestConstant = result.BestParameters.Constant; #endregion Assert.AreEqual(2, result.BestModel.NumberOfInputs); Assert.AreEqual(1, result.BestModel.NumberOfOutputs); Assert.AreEqual(16, result.BestModel.NumberOfSamples); Assert.AreEqual(5.333333333333333, result.BestModel.AverageNumberOfSamples); Assert.AreEqual(1e-8, bestC, 1e-10); Assert.AreEqual(0, bestError, 1e-8); Assert.AreEqual(10, bestDegree, 1e-8); Assert.AreEqual(0, bestConstant, 1e-8); }
private Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double> TrainingPaper(List <double[]> inputsList, List <int> outputsList) { var gridsearch = GridSearch <double[], int> .CrossValidate( // Here we can specify the range of the parameters to be included in the search ranges : new { Complexity = GridSearch.Values(Math.Pow(2, -12), Math.Pow(2, -11), Math.Pow(2, -10), Math.Pow(2, -8), Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2), Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10), Math.Pow(2, 11), Math.Pow(2, 12)), Gamma = GridSearch.Values(Math.Pow(2, -12), Math.Pow(2, -11), Math.Pow(2, -10), Math.Pow(2, -8), Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2), Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10), Math.Pow(2, 11), Math.Pow(2, 12)) }, // Indicate how learning algorithms for the models should be created learner : (p, ss) => new MulticlassSupportVectorLearning <Gaussian>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { // Estimate a suitable guess for the Gaussian kernel's parameters. // This estimate can serve as a starting point for a grid search. //UseComplexityHeuristic = true, //UseKernelEstimation = true Complexity = p.Complexity, Kernel = Gaussian.FromGamma(p.Gamma) } }, // Define how the model should be learned, if needed fit : (teacher, x, y, w) => teacher.Learn(x, y, w), // Define how the performance of the models should be measured /*loss: (actual, expected, m) => * { * double totalError = 0; * foreach (var input in _originalInputsList) * { * if (!m.Decide(input.Item1).Equals(input.Item2)) * { * totalError++; * } * } * return totalError / _originalInputsList.Count; * },*/ loss : (actual, expected, m) => new HammingLoss(expected).Loss(actual), folds : 10 ); gridsearch.ParallelOptions.MaxDegreeOfParallelism = _paralelism; Console.WriteLine("y nos ponemos a aprender"); // Search for the best model parameters var result = gridsearch.Learn(inputsList.ToArray(), outputsList.ToArray()); Console.WriteLine("Error modelo: " + result.BestModelError); var model = CreateModel(inputsList, outputsList, result.BestParameters.Complexity, result.BestParameters.Gamma); double error = 0; Console.WriteLine("Largo: " + _originalInputsList.Count); foreach (var input in _originalInputsList) { if (!model.Decide(input.Item1).Equals(input.Item2)) { error++; } } error = error / (_originalInputsList.Count); Console.WriteLine("Error real: " + error); return(new Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double>(model, error, result.BestParameters.Gamma.Value, result.BestParameters.Complexity.Value)); }