예제 #1
0
        public double Learn(double[][] observations, int[] labels)
        {
            var gridsearch = GridSearch <double[], int> .Create(

                ranges : new
            {
                Tolerance = GridSearch.Range(1e-10, 1.0, stepSize: 0.05)
            },

                learner : (p) => new LinearDualCoordinateDescent
            {
                Complexity = 1e+10,
                Tolerance  = p.Tolerance
            },

                fit : (teacher, x, y, w) => teacher.Learn(x, y, w),

                loss : (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual)
                );

            gridsearch.ParallelOptions.MaxDegreeOfParallelism = 2;

            var result = gridsearch.Learn(observations, labels);

            machine = result.BestModel;
            bool[] output         = machine.Decide(observations);
            int[]  zeroOneAnswers = output.ToZeroOne();

            double ratio = 1 - (new AccuracyLoss(labels).Loss(zeroOneAnswers));

            return(ratio);
        }
예제 #2
0
        public static void Train(string trainingFolder)
        {
            Console.WriteLine("Training SVM model with Cross-Validation...");

            (double[][] inputs, int[] output) = ReadData(trainingFolder);

            int crossValidateCount = Math.Min(maxCrossValidateCount, inputs.Count());

            Accord.Math.Random.Generator.Seed = 0;

            Console.WriteLine("Grid-Search...");
            var gscv = GridSearch <double[], int> .CrossValidate(
                ranges : new
            {
                Sigma = GridSearch.Range(fromInclusive: 0.00000001, toExclusive: 3),
            },

                learner : (p, ss) => new MulticlassSupportVectorLearning <Gaussian>
            {
                Kernel = new Gaussian(p.Sigma)
            },

                fit : (teacher, x, y, w) => teacher.Learn(x, y, w),

                loss : (actual, expected, r) => new ZeroOneLoss(expected).Loss(actual),

                folds : 10);

            //gscv.ParallelOptions.MaxDegreeOfParallelism = 1;

            var result = gscv.Learn(inputs.Take(crossValidateCount).ToArray(), output.Take(crossValidateCount).ToArray());

            var crossValidation = result.BestModel;

            double bestError     = result.BestModelError;
            double trainError    = result.BestModel.Training.Mean;
            double trainErrorVar = result.BestModel.Training.Variance;
            double valError      = result.BestModel.Validation.Mean;
            double valErrorVar   = result.BestModel.Validation.Variance;

            double bestSigma = result.BestParameters.Sigma;

            Console.WriteLine("Grid-Search Done.");

            Console.WriteLine("Using Sigma=" + bestSigma);

            // train model with best parameter
            var bestTeacher = new MulticlassSupportVectorLearning <Gaussian>
            {
                Kernel = new Gaussian(bestSigma)
            };

            MulticlassSupportVectorMachine <Gaussian> svm = bestTeacher.Learn(
                inputs.Take(traingRowsCount).ToArray(),
                output.Take(traingRowsCount).ToArray());

            // save model
            svm.Save(Path.Combine(trainingFolder, "model"), SerializerCompression.GZip);
        }
예제 #3
0
        public void cross_validation_decision_tree()
        {
            #region doc_learn_tree_cv
            // Ensure results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // This is a sample code showing how to use Grid-Search in combination with
            // Cross-Validation  to assess the performance of Decision Trees with C4.5.

            var        parkinsons = new Parkinsons();
            double[][] input      = parkinsons.Features;
            int[]      output     = parkinsons.ClassLabels;

            // Create a new Grid-Search with Cross-Validation algorithm. Even though the
            // generic, strongly-typed approach used accross the framework is most of the
            // time easier to handle, combining those both methods in a single call can be
            // difficult. For this reason. the framework offers a specialized method for
            // combining those two algorirthms:
            var gscv = GridSearch.CrossValidate(

                // Here we can specify the range of the parameters to be included in the search
                ranges: new
            {
                Join      = GridSearch.Range(fromInclusive: 1, toExclusive: 20),
                MaxHeight = GridSearch.Range(fromInclusive: 1, toExclusive: 20),
            },

                // Indicate how learning algorithms for the models should be created
                learner: (p, ss) => new C45Learning
            {
                // Here, we can use the parameters we have specified above:
                Join      = p.Join,
                MaxHeight = p.MaxHeight,
            },

                // Define how the model should be learned, if needed
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Define how the performance of the models should be measured
                loss: (actual, expected, r) => new ZeroOneLoss(expected).Loss(actual),

                folds: 3,           // use k = 3 in k-fold cross validation

                x: input, y: output // so the compiler can infer generic types
                );

            // If needed, control the parallelization degree
            gscv.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Search for the best decision tree
            var result = gscv.Learn(input, output);

            // Get the best cross-validation result:
            var crossValidation = result.BestModel;

            // Get an estimate of its error:
            double bestAverageError = result.BestModelError;

            double trainError    = result.BestModel.Training.Mean;
            double trainErrorVar = result.BestModel.Training.Variance;
            double valError      = result.BestModel.Validation.Mean;
            double valErrorVar   = result.BestModel.Validation.Variance;

            // Get the best values for the parameters:
            int bestJoin   = result.BestParameters.Join;
            int bestHeight = result.BestParameters.MaxHeight;

            // Use the best parameter values to create the final
            // model using all the training and validation data:
            var bestTeacher = new C45Learning
            {
                Join      = bestJoin,
                MaxHeight = bestHeight,
            };

            // Use the best parameters to create the final tree model:
            DecisionTree finalTree = bestTeacher.Learn(input, output);
            #endregion

            int height = finalTree.GetHeight();
            Assert.AreEqual(5, height);
            Assert.AreEqual(22, result.BestModel.NumberOfInputs);
            Assert.AreEqual(2, result.BestModel.NumberOfOutputs);
            Assert.AreEqual(195, result.BestModel.NumberOfSamples);
            Assert.AreEqual(65, result.BestModel.AverageNumberOfSamples);
            Assert.AreEqual(bestAverageError, valError);
            Assert.AreEqual(5, bestJoin, 1e-10);
            Assert.AreEqual(0.1076923076923077, bestAverageError, 1e-8);
            Assert.AreEqual(5, bestHeight, 1e-8);
        }
예제 #4
0
        public void learn_test_strongly_typed()
        {
            #region doc_learn_strongly_typed
            // Ensure results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // This is a sample code showing how to use Grid-Search in combination with
            // Cross-Validation  to assess the performance of Support Vector Machines.

            // Consider the example binary data. We will be trying to learn a XOR
            // problem and see how well does SVMs perform on this data.

            double[][] inputs =
            {
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
            };

            int[] xor = // result of xor for the sample input data
            {
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
            };

            // Create a new Grid-Search with Cross-Validation algorithm. Even though the
            // generic, strongly-typed approach used accross the framework is most of the
            // time easier to handle, meta-algorithms such as grid-search can be a bit hard
            // to setup. For this reason. the framework offers a specialized method for it:
            var gridsearch = GridSearch <double[], int> .Create(

                // Here we can specify the range of the parameters to be included in the search
                ranges : new
            {
                Kernel     = GridSearch.Values <IKernel>(new Linear(), new ChiSquare(), new Gaussian(), new Sigmoid()),
                Complexity = GridSearch.Values(0.00000001, 5.20, 0.30, 0.50),
                Tolerance  = GridSearch.Range(1e-10, 1.0, stepSize: 0.05)
            },

                // Indicate how learning algorithms for the models should be created
                learner : (p) => new SequentialMinimalOptimization <IKernel>
            {
                Complexity = p.Complexity,
                Kernel     = p.Kernel.Value,
                Tolerance  = p.Tolerance
            },

                // Define how the model should be learned, if needed
                fit : (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Define how the performance of the models should be measured
                loss : (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual)
                );

            // If needed, control the degree of CPU parallelization
            gridsearch.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Search for the best model parameters
            var result = gridsearch.Learn(inputs, xor);

            // Get the best SVM:
            SupportVectorMachine <IKernel> svm = result.BestModel;

            // Estimate its error:
            double bestError = result.BestModelError;

            // Get the best values for the parameters:
            double  bestC         = result.BestParameters.Complexity;
            double  bestTolerance = result.BestParameters.Tolerance;
            IKernel bestKernel    = result.BestParameters.Kernel.Value;
            #endregion

            Assert.IsNotNull(svm);
            Assert.AreEqual(1e-8, bestC, 1e-10);
            Assert.AreEqual(0, bestError, 1e-8);
            Assert.AreEqual(0, bestTolerance, 1e-8);
            Assert.AreEqual(typeof(Gaussian), bestKernel.GetType());
        }
예제 #5
0
        public void learn_test_exception()
        {
            // https://github.com/accord-net/framework/issues/1052

            Accord.Math.Random.Generator.Seed = 0;

            double[][] inputs =
            {
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
            };

            int[] xor = // result of xor for the sample input data
            {
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
            };

            var gridsearch = GridSearch <double[], int> .Create(

                ranges : new
            {
                Kernel     = GridSearch.Values <IKernel>(new Linear()),
                Complexity = GridSearch.Values(100000000000),
                Tolerance  = GridSearch.Range(1e-10, 1.0, stepSize: 0.05)
            },

                learner : (p) => new SequentialMinimalOptimization <IKernel>
            {
                Complexity = p.Complexity,
                Kernel     = p.Kernel.Value,
                Tolerance  = p.Tolerance
            },

                fit : (teacher, x, y, w) =>
            {
                try
                {
                    return(teacher.Learn(x, y, w));
                }
                finally
                {
                    throw new Exception("abacaxi");
                }
            },

                loss : (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual)
                );

            var result = gridsearch.Learn(inputs, xor);

            SupportVectorMachine <IKernel> svm = result.BestModel;

            double bestError = result.BestModelError;

            double  bestC         = result.BestParameters.Complexity;
            double  bestTolerance = result.BestParameters.Tolerance;
            IKernel bestKernel    = result.BestParameters.Kernel.Value;

            Assert.IsNull(svm);
            Assert.AreEqual(20, result.Exceptions.Length);

            foreach (Exception ex in result.Exceptions)
            {
                Assert.AreEqual("abacaxi", ex.Message);
            }

            Assert.AreEqual(100000000000, bestC, 1e-10);
            Assert.AreEqual(Double.PositiveInfinity, bestError, 1e-8);
            Assert.AreEqual(1E-10, bestTolerance, 1e-8);
            Assert.AreEqual(typeof(Linear), bestKernel.GetType());
        }
예제 #6
0
        public void internals_test()
        {
            Accord.Math.Random.Generator.Seed = 0;

            string[] inputs =
            {
                "input 1",
                "input 2",
                "input 3",
                "input 4",
            };

            string[] outputs =
            {
                "output 1",
                "output 2",
                "output 3",
            };

            double[] weights =
            {
                1.0,
                2.0,
                3.0
            };

            var lossModels = new List <Mapper>();

            var ranges = new
            {
                Parameter1 = GridSearch.Range("parameter 11", "parameter 12"),
                Parameter2 = GridSearch.Range("parameter 21", "parameter 22", "parameter 23", "parameter 24"),
                Parameter3 = GridSearch.Range("parameter 31")
            };

            var result = GridSearch.Create(

                ranges: ranges,

                learner: (p) => new MapperLearning
            {
                Parameter1 = p.Parameter1,
                Parameter2 = p.Parameter2,
                Parameter3 = p.Parameter3,
            },

                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                loss: (actual, expected, m) =>
            {
                if (m.Parameter1 == "parameter 12" && m.Parameter2 == "parameter 21" && m.Parameter3 == "parameter 31")
                {
                    return(-42);
                }

                lock (lossModels)
                {
                    lossModels.Add(m);
                }

                return(Math.Abs(int.Parse(m.Parameter1.Replace("parameter ", ""))
                                + 100 * int.Parse(m.Parameter2.Replace("parameter ", ""))
                                + 10000 * int.Parse(m.Parameter3.Replace("parameter ", ""))));
            },

                x: inputs,
                y: outputs,
                weights: weights
                );


            Mapper bestModel = result.BestModel;

            Assert.AreEqual("parameter 12", bestModel.Parameter1);
            Assert.AreEqual("parameter 21", bestModel.Parameter2);
            Assert.AreEqual("parameter 31", bestModel.Parameter3);
            Assert.AreEqual(inputs, bestModel.Inputs);
            Assert.AreEqual(outputs, bestModel.Outputs);
            Assert.AreEqual(weights, bestModel.Weights);

            Assert.AreEqual(-42, result.BestModelError);
            Assert.AreEqual(4, result.BestModelIndex);

            var bestParameters = result.BestParameters;

            Assert.AreNotSame(ranges, bestParameters);
            Assert.AreEqual(1, bestParameters.Parameter1.Index);
            Assert.AreEqual(0, bestParameters.Parameter2.Index);
            Assert.AreEqual(0, bestParameters.Parameter3.Index);

            Assert.AreEqual("parameter 12", bestParameters.Parameter1.Value);
            Assert.AreEqual("parameter 21", bestParameters.Parameter2.Value);
            Assert.AreEqual("parameter 31", bestParameters.Parameter3.Value);

            Assert.AreEqual(8, result.Count);
            Assert.AreEqual(result.Errors, new double[] {
                312111, 312211, 312311, 312411,
                -42, Double.PositiveInfinity, 312312, 312412
            });

            Exception[] exceptions = result.Exceptions;
            for (int i = 0; i < exceptions.Length; i++)
            {
                if (i != 5)
                {
                    Assert.IsNull(exceptions[i]);
                }
                else
                {
                    Assert.AreEqual("Exception test", exceptions[i].Message);
                }
            }


            Mapper[] models = result.Models;
            Assert.AreEqual(8, models.Length);
            Assert.AreEqual(6, lossModels.Count);

            int a = ranges.Parameter1.Length;
            int b = ranges.Parameter2.Length;
            int c = ranges.Parameter3.Length;

            Assert.AreEqual(2, a);
            Assert.AreEqual(4, b);
            Assert.AreEqual(1, c);

            for (int i = 0; i < models.Length; i++)
            {
                if (i == 5)
                {
                    Assert.IsNull(models[i]);
                }
                else
                {
                    Assert.AreEqual(inputs, models[i].Inputs);
                    Assert.AreEqual(outputs, models[i].Outputs);
                    Assert.AreEqual(weights, models[i].Weights);


                    Assert.AreEqual(4, models[i].NumberOfInputs);
                    Assert.AreEqual(2, models[i].NumberOfOutputs);
                    Assert.AreEqual(ranges.Parameter1.Values[((i / c) / b) % a], models[i].Parameter1);
                    Assert.AreEqual(ranges.Parameter2.Values[(i / c) % b], models[i].Parameter2);
                    Assert.AreEqual(ranges.Parameter3.Values[i % c], models[i].Parameter3);

                    if (i != 4)
                    {
                        Assert.IsTrue(lossModels.Contains(models[i]));
                    }
                }
            }

            Assert.AreEqual(4, result.NumberOfInputs);
            Assert.AreEqual(2, result.NumberOfOutputs);

            var parameters = result.Parameters;

            for (int i = 0; i < parameters.Length; i++)
            {
                for (int j = 0; j < parameters.Length; j++)
                {
                    if (i != j)
                    {
                        Assert.AreNotSame(parameters[i], parameters[j]);
                        Assert.AreNotEqual(parameters[i], parameters[j]);

                        Assert.AreNotEqual(parameters[i].Parameter1, parameters[j].Parameter1);
                        Assert.AreNotEqual(parameters[i].Parameter2, parameters[j].Parameter2);
                        Assert.AreNotEqual(parameters[i].Parameter3, parameters[j].Parameter3);
                    }

                    Assert.AreEqual(parameters[i].Parameter1.Values, parameters[j].Parameter1.Values);
                    Assert.AreEqual(parameters[i].Parameter2.Values, parameters[j].Parameter2.Values);
                    Assert.AreEqual(parameters[i].Parameter3.Values, parameters[j].Parameter3.Values);
                }
            }
        }
예제 #7
0
        public void cross_validation_test()
        {
            #region doc_learn_cv
            // Ensure results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // This is a sample code showing how to use Grid-Search in combination with
            // Cross-Validation  to assess the performance of Support Vector Machines.

            // Consider the example binary data. We will be trying to learn a XOR
            // problem and see how well does SVMs perform on this data.

            double[][] inputs =
            {
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
                new double[] { -1, -1 }, new double[] { 1, -1 },
                new double[] { -1,  1 }, new double[] { 1,  1 },
            };

            int[] xor = // result of xor for the sample input data
            {
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
                -1,  1,
                1,  -1,
            };

            // Create a new Grid-Search with Cross-Validation algorithm. Even though the
            // generic, strongly-typed approach used accross the framework is most of the
            // time easier to handle, combining those both methods in a single call can be
            // difficult. For this reason. the framework offers a specialized method for
            // combining those two algorirthms:
            var gscv = GridSearch <double[], int> .CrossValidate(

                // Here we can specify the range of the parameters to be included in the search
                ranges : new
            {
                Complexity = GridSearch.Range(new double[] { 0.00000001, 5.20, 0.30, 0.50 }),
                Degree     = GridSearch.Range(new int[] { 1, 10, 2, 3, 4, 5 }),
                Constant   = GridSearch.Range(new double[] { 0, 1, 2 }),
            },

                // Indicate how learning algorithms for the models should be created
                learner : (p, ss) => new SequentialMinimalOptimization <Polynomial>
            {
                // Here, we can use the parameters we have specified above:
                Complexity = p.Complexity,
                Kernel     = new Polynomial(p.Degree, p.Constant)
            },

                // Define how the model should be learned, if needed
                fit : (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Define how the performance of the models should be measured
                loss : (actual, expected, r) => new ZeroOneLoss(expected).Loss(actual),

                folds : 3 // use k = 3 in k-fold cross validation
                );

            // If needed, control the parallelization degree
            gscv.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Search for the best vector machine
            var result = gscv.Learn(inputs, xor);

            // Get the best cross-validation result:
            var crossValidation = result.BestModel;

            // Estimate its error:
            double bestError     = result.BestModelError;
            double trainError    = result.BestModel.Training.Mean;
            double trainErrorVar = result.BestModel.Training.Variance;
            double valError      = result.BestModel.Validation.Mean;
            double valErrorVar   = result.BestModel.Validation.Variance;

            // Get the best values for the parameters:
            double bestC        = result.BestParameters.Complexity;
            double bestDegree   = result.BestParameters.Degree;
            double bestConstant = result.BestParameters.Constant;
            #endregion

            Assert.AreEqual(2, result.BestModel.NumberOfInputs);
            Assert.AreEqual(1, result.BestModel.NumberOfOutputs);
            Assert.AreEqual(16, result.BestModel.NumberOfSamples);
            Assert.AreEqual(5.333333333333333, result.BestModel.AverageNumberOfSamples);
            Assert.AreEqual(1e-8, bestC, 1e-10);
            Assert.AreEqual(0, bestError, 1e-8);
            Assert.AreEqual(10, bestDegree, 1e-8);
            Assert.AreEqual(0, bestConstant, 1e-8);
        }