예제 #1
0
        public static void Train(string trainingFolder)
        {
            Console.WriteLine("Training SVM model with Cross-Validation...");

            (double[][] inputs, int[] output) = ReadData(trainingFolder);

            int crossValidateCount = Math.Min(maxCrossValidateCount, inputs.Count());

            Accord.Math.Random.Generator.Seed = 0;

            Console.WriteLine("Grid-Search...");
            var gscv = GridSearch <double[], int> .CrossValidate(
                ranges : new
            {
                Sigma = GridSearch.Range(fromInclusive: 0.00000001, toExclusive: 3),
            },

                learner : (p, ss) => new MulticlassSupportVectorLearning <Gaussian>
            {
                Kernel = new Gaussian(p.Sigma)
            },

                fit : (teacher, x, y, w) => teacher.Learn(x, y, w),

                loss : (actual, expected, r) => new ZeroOneLoss(expected).Loss(actual),

                folds : 10);

            //gscv.ParallelOptions.MaxDegreeOfParallelism = 1;

            var result = gscv.Learn(inputs.Take(crossValidateCount).ToArray(), output.Take(crossValidateCount).ToArray());

            var crossValidation = result.BestModel;

            double bestError     = result.BestModelError;
            double trainError    = result.BestModel.Training.Mean;
            double trainErrorVar = result.BestModel.Training.Variance;
            double valError      = result.BestModel.Validation.Mean;
            double valErrorVar   = result.BestModel.Validation.Variance;

            double bestSigma = result.BestParameters.Sigma;

            Console.WriteLine("Grid-Search Done.");

            Console.WriteLine("Using Sigma=" + bestSigma);

            // train model with best parameter
            var bestTeacher = new MulticlassSupportVectorLearning <Gaussian>
            {
                Kernel = new Gaussian(bestSigma)
            };

            MulticlassSupportVectorMachine <Gaussian> svm = bestTeacher.Learn(
                inputs.Take(traingRowsCount).ToArray(),
                output.Take(traingRowsCount).ToArray());

            // save model
            svm.Save(Path.Combine(trainingFolder, "model"), SerializerCompression.GZip);
        }
예제 #2
0
        private static Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double> Training(List <double[]> inputsList, List <int> outputsList)
        {
            var gridsearch = GridSearch <double[], int> .CrossValidate(
                // Here we can specify the range of the parameters to be included in the search
                ranges : new
            {
                Complexity = GridSearch.Values(Math.Pow(2, -10), Math.Pow(2, -8),
                                               Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2),
                                               Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10)),
                Gamma = GridSearch.Values(Math.Pow(2, -10), Math.Pow(2, -8),
                                          Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2),
                                          Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10))
            },

                // Indicate how learning algorithms for the models should be created
                learner : (p, ss) => new MulticlassSupportVectorLearning <Gaussian>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                {
                    // Estimate a suitable guess for the Gaussian kernel's parameters.
                    // This estimate can serve as a starting point for a grid search.
                    //UseComplexityHeuristic = true,
                    //UseKernelEstimation = true
                    Complexity = p.Complexity,
                    Kernel     = new Gaussian(p.Gamma)
                }
            },
                // Define how the model should be learned, if needed
                fit : (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Define how the performance of the models should be measured
                loss : (actual, expected, m) => new HammingLoss(expected).Loss(actual),
                folds : 10
                );

            gridsearch.ParallelOptions.MaxDegreeOfParallelism = 1;

            Console.WriteLine("y nos ponemos a aprender");
            // Search for the best model parameters
            var result = gridsearch.Learn(inputsList.ToArray(), outputsList.ToArray());


            return(new Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double>(CreateModel(inputsList, outputsList, result.BestParameters.Complexity, result.BestParameters.Gamma), result.BestModelError, result.BestParameters.Gamma, result.BestParameters.Complexity));
        }
예제 #3
0
        public void cross_validation_decision_tree()
        {
            #region doc_learn_tree_cv
            // Ensure results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // This is a sample code showing how to use Grid-Search in combination with
            // Cross-Validation  to assess the performance of Decision Trees with C4.5.

            var        parkinsons = new Parkinsons();
            double[][] input      = parkinsons.Features;
            int[]      output     = parkinsons.ClassLabels;

            // Create a new Grid-Search with Cross-Validation algorithm. Even though the
            // generic, strongly-typed approach used accross the framework is most of the
            // time easier to handle, combining those both methods in a single call can be
            // difficult. For this reason. the framework offers a specialized method for
            // combining those two algorirthms:
            var gscv = GridSearch.CrossValidate(

                // Here we can specify the range of the parameters to be included in the search
                ranges: new
            {
                Join      = GridSearch.Range(fromInclusive: 1, toExclusive: 20),
                MaxHeight = GridSearch.Range(fromInclusive: 1, toExclusive: 20),
            },

                // Indicate how learning algorithms for the models should be created
                learner: (p, ss) => new C45Learning
            {
                // Here, we can use the parameters we have specified above:
                Join      = p.Join,
                MaxHeight = p.MaxHeight,
            },

                // Define how the model should be learned, if needed
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Define how the performance of the models should be measured
                loss: (actual, expected, r) => new ZeroOneLoss(expected).Loss(actual),

                folds: 3,           // use k = 3 in k-fold cross validation

                x: input, y: output // so the compiler can infer generic types
                );

            // If needed, control the parallelization degree
            gscv.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Search for the best decision tree
            var result = gscv.Learn(input, output);

            // Get the best cross-validation result:
            var crossValidation = result.BestModel;

            // Get an estimate of its error:
            double bestAverageError = result.BestModelError;

            double trainError    = result.BestModel.Training.Mean;
            double trainErrorVar = result.BestModel.Training.Variance;
            double valError      = result.BestModel.Validation.Mean;
            double valErrorVar   = result.BestModel.Validation.Variance;

            // Get the best values for the parameters:
            int bestJoin   = result.BestParameters.Join;
            int bestHeight = result.BestParameters.MaxHeight;

            // Use the best parameter values to create the final
            // model using all the training and validation data:
            var bestTeacher = new C45Learning
            {
                Join      = bestJoin,
                MaxHeight = bestHeight,
            };

            // Use the best parameters to create the final tree model:
            DecisionTree finalTree = bestTeacher.Learn(input, output);
            #endregion

            int height = finalTree.GetHeight();
            Assert.AreEqual(5, height);
            Assert.AreEqual(22, result.BestModel.NumberOfInputs);
            Assert.AreEqual(2, result.BestModel.NumberOfOutputs);
            Assert.AreEqual(195, result.BestModel.NumberOfSamples);
            Assert.AreEqual(65, result.BestModel.AverageNumberOfSamples);
            Assert.AreEqual(bestAverageError, valError);
            Assert.AreEqual(5, bestJoin, 1e-10);
            Assert.AreEqual(0.1076923076923077, bestAverageError, 1e-8);
            Assert.AreEqual(5, bestHeight, 1e-8);
        }
예제 #4
0
        public void cross_validation_test()
        {
            #region doc_learn_cv
            // Ensure results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // This is a sample code showing how to use Grid-Search in combination with
            // Cross-Validation  to assess the performance of Support Vector Machines.

            // Consider the example binary data. We will be trying to learn a XOR
            // problem and see how well does SVMs perform on this data.

            double[][] inputs =
            {
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
            };

            int[] xor = // result of xor for the sample input data
            {
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
            };

            // Create a new Grid-Search with Cross-Validation algorithm. Even though the
            // generic, strongly-typed approach used accross the framework is most of the
            // time easier to handle, combining those both methods in a single call can be
            // difficult. For this reason. the framework offers a specialized method for
            // combining those two algorirthms:
            var gscv = GridSearch <double[], int> .CrossValidate(

                // Here we can specify the range of the parameters to be included in the search
                ranges : new
            {
                Complexity = GridSearch.Values(0.00000001, 5.20, 0.30, 0.50),
                Degree     = GridSearch.Values(1, 10, 2, 3, 4, 5),
                Constant   = GridSearch.Values(0, 1, 2),
            },

                // Indicate how learning algorithms for the models should be created
                learner : (p, ss) => new SequentialMinimalOptimization <Polynomial>
            {
                // Here, we can use the parameters we have specified above:
                Complexity = p.Complexity,
                Kernel     = new Polynomial(p.Degree, p.Constant)
            },

                // Define how the model should be learned, if needed
                fit : (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Define how the performance of the models should be measured
                loss : (actual, expected, r) => new ZeroOneLoss(expected).Loss(actual),

                folds : 3 // use k = 3 in k-fold cross validation
                );

            // If needed, control the parallelization degree
            gscv.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Search for the best vector machine
            var result = gscv.Learn(inputs, xor);

            // Get the best cross-validation result:
            var crossValidation = result.BestModel;

            // Estimate its error:
            double bestError     = result.BestModelError;
            double trainError    = result.BestModel.Training.Mean;
            double trainErrorVar = result.BestModel.Training.Variance;
            double valError      = result.BestModel.Validation.Mean;
            double valErrorVar   = result.BestModel.Validation.Variance;

            // Get the best values for the parameters:
            double bestC        = result.BestParameters.Complexity;
            double bestDegree   = result.BestParameters.Degree;
            double bestConstant = result.BestParameters.Constant;
            #endregion

            Assert.AreEqual(2, result.BestModel.NumberOfInputs);
            Assert.AreEqual(1, result.BestModel.NumberOfOutputs);
            Assert.AreEqual(16, result.BestModel.NumberOfSamples);
            Assert.AreEqual(5.333333333333333, result.BestModel.AverageNumberOfSamples);
            Assert.AreEqual(1e-8, bestC, 1e-10);
            Assert.AreEqual(0, bestError, 1e-8);
            Assert.AreEqual(10, bestDegree, 1e-8);
            Assert.AreEqual(0, bestConstant, 1e-8);
        }
예제 #5
0
        private Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double> TrainingPaper(List <double[]> inputsList, List <int> outputsList)
        {
            var gridsearch = GridSearch <double[], int> .CrossValidate(
                // Here we can specify the range of the parameters to be included in the search
                ranges : new
            {
                Complexity = GridSearch.Values(Math.Pow(2, -12), Math.Pow(2, -11), Math.Pow(2, -10), Math.Pow(2, -8),
                                               Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2),
                                               Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10), Math.Pow(2, 11), Math.Pow(2, 12)),
                Gamma = GridSearch.Values(Math.Pow(2, -12), Math.Pow(2, -11), Math.Pow(2, -10), Math.Pow(2, -8),
                                          Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2),
                                          Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10), Math.Pow(2, 11), Math.Pow(2, 12))
            },

                // Indicate how learning algorithms for the models should be created
                learner : (p, ss) => new MulticlassSupportVectorLearning <Gaussian>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                {
                    // Estimate a suitable guess for the Gaussian kernel's parameters.
                    // This estimate can serve as a starting point for a grid search.
                    //UseComplexityHeuristic = true,
                    //UseKernelEstimation = true
                    Complexity = p.Complexity,
                    Kernel     = Gaussian.FromGamma(p.Gamma)
                }
            },
                // Define how the model should be learned, if needed
                fit : (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Define how the performance of the models should be measured

                /*loss: (actual, expected, m) =>
                 * {
                 *  double totalError = 0;
                 *  foreach (var input in _originalInputsList)
                 *  {
                 *      if (!m.Decide(input.Item1).Equals(input.Item2))
                 *      {
                 *          totalError++;
                 *      }
                 *  }
                 *  return totalError / _originalInputsList.Count;
                 * },*/
                loss : (actual, expected, m) => new HammingLoss(expected).Loss(actual),
                folds : 10
                );

            gridsearch.ParallelOptions.MaxDegreeOfParallelism = _paralelism;

            Console.WriteLine("y nos ponemos a aprender");
            // Search for the best model parameters
            var result = gridsearch.Learn(inputsList.ToArray(), outputsList.ToArray());

            Console.WriteLine("Error modelo: " + result.BestModelError);

            var model = CreateModel(inputsList, outputsList, result.BestParameters.Complexity, result.BestParameters.Gamma);

            double error = 0;

            Console.WriteLine("Largo: " + _originalInputsList.Count);
            foreach (var input in _originalInputsList)
            {
                if (!model.Decide(input.Item1).Equals(input.Item2))
                {
                    error++;
                }
            }
            error = error / (_originalInputsList.Count);
            Console.WriteLine("Error real: " + error);

            return(new Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double>(model, error, result.BestParameters.Gamma.Value, result.BestParameters.Complexity.Value));
        }