예제 #1
0
        public void Train(int[] y, double[][] x, CancellationToken token)
        {
            log.LogInformation("Training SVM...");
            var gridsearch = new GridSearch <SupportVectorMachine, double[], int>
            {
                ParameterRanges =
                    new GridSearchRangeCollection
                {
                    //new GridSearchRange("complexity", new[] { 0.001, 0.01, 0.1, 1, 10, 100, 1000 }),
                    new GridSearchRange("C", new[] { 0.001, 0.01, 0.1, 1, 10 }),
                },
                Learner = p => new LinearDualCoordinateDescent
                {
                    //Complexity = p["complexity"],
                    Loss   = Loss.L2,
                    Kernel = new Linear(p["C"])
                },
                Loss = (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual)
            };

            gridsearch.Token = token;
            if (ParallelHelper.Options != null)
            {
                gridsearch.ParallelOptions = ParallelHelper.Options;
            }

            GridSearchResult <SupportVectorMachine, double[], int> result = gridsearch.Learn(x, y);

            Model = result.BestModel;
            GridSearchParameterCollection parameters = result.BestParameters;
            var error = result.BestModelError;

            log.LogInformation("SVM Trained. Threshold: {0} Constant: {1} Error: {2} ...", Model.Threshold, parameters[0].Value, error);
        }
예제 #2
0
        public void learn_test()
        {
            #region doc_learn
            // Ensure results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // Example binary data
            double[][] inputs =
            {
                new double[] { -1, -1 },
                new double[] { -1,  1 },
                new double[] {  1, -1 },
                new double[] {  1,  1 }
            };

            int[] xor = // xor labels
            {
                -1, 1, 1, -1
            };

            // Instantiate a new Grid Search algorithm for Kernel Support Vector Machines
            var gridsearch = new GridSearch <SupportVectorMachine <Polynomial>, double[], int>()
            {
                // Here we can specify the range of the parameters to be included in the search
                ParameterRanges = new GridSearchRangeCollection()
                {
                    new GridSearchRange("complexity", new double[] { 0.00000001, 5.20, 0.30, 0.50 }),
                    new GridSearchRange("degree", new double[] { 1, 10, 2, 3, 4, 5 }),
                    new GridSearchRange("constant", new double[] { 0, 1, 2 })
                },

                // Indicate how learning algorithms for the models should be created
                Learner = (p) => new SequentialMinimalOptimization <Polynomial>
                {
                    Complexity = p["complexity"],
                    Kernel     = new Polynomial((int)p["degree"], p["constant"])
                },

                // Define how the performance of the models should be measured
                Loss = (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual)
            };

            // If needed, control the degree of CPU parallelization
            gridsearch.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Search for the best model parameters
            var result = gridsearch.Learn(inputs, xor);

            // Get the best SVM found during the parameter search
            SupportVectorMachine <Polynomial> svm = result.BestModel;

            // Get an estimate for its error:
            double bestError = result.BestModelError;

            // Get the best values found for the model parameters:
            double bestC        = result.BestParameters["complexity"].Value;
            double bestDegree   = result.BestParameters["degree"].Value;
            double bestConstant = result.BestParameters["constant"].Value;
            #endregion

            Assert.IsNotNull(svm);
            Assert.AreEqual(1e-8, bestC, 1e-10);
            Assert.AreEqual(0, bestError, 1e-8);
            Assert.AreEqual(1, bestDegree, 1e-8);
            Assert.AreEqual(1, bestConstant, 1e-8);

            Assert.AreEqual(1, svm.Kernel.Degree);
            Assert.AreEqual(1, svm.Kernel.Constant);
        }
예제 #3
0
        public void Train(DataPackage data, CancellationToken token)
        {
            if (data is null)
            {
                throw new ArgumentNullException(nameof(data));
            }

            log.Debug("Training with {0} records", data.Y.Length);

            standardizer = Standardizer.GetNumericStandardizer(data.X);
            var xTraining = data.X;
            var yTraining = data.Y;

            var xTesting = xTraining;
            var yTesting = yTraining;

            int testSize = 100;

            if (xTraining.Length > testSize * 4)
            {
                var training = xTraining.Length - testSize;
                xTesting  = xTraining.Skip(training).ToArray();
                yTesting  = yTraining.Skip(training).ToArray();
                xTraining = xTraining.Take(training).ToArray();
                yTraining = yTraining.Take(training).ToArray();
            }

            xTraining = standardizer.StandardizeAll(xTraining);
            // Instantiate a new Grid Search algorithm for Kernel Support Vector Machines
            var gridsearch = new GridSearch <SupportVectorMachine <Gaussian>, double[], int>()
            {
                // Here we can specify the range of the parameters to be included in the search
                ParameterRanges = new GridSearchRangeCollection
                {
                    new GridSearchRange("complexity", new [] { 0.001, 0.01, 0.1, 1, 10 }),
                    new GridSearchRange("gamma", new [] { 0.001, 0.01, 0.1, 1 })
                },

                // Indicate how learning algorithms for the models should be created
                Learner = p => new SequentialMinimalOptimization <Gaussian>
                {
                    Complexity = p["complexity"],
                    Kernel     = new Gaussian
                    {
                        Gamma = p["gamma"]
                    }
                },

                // Define how the performance of the models should be measured
                Loss = (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual)
            };

            gridsearch.Token = token;

            var randomized = new Random().Shuffle(xTraining, yTraining).ToArray();

            yTraining = randomized[1].Cast <int>().ToArray();
            xTraining = randomized[0].Cast <double[]>().ToArray();

            var result = gridsearch.Learn(xTraining, yTraining);

            // Get the best SVM found during the parameter search
            SupportVectorMachine <Gaussian> svm = result.BestModel;

            // Instantiate the probabilistic calibration (using Platt's scaling)
            var calibration = new ProbabilisticOutputCalibration <Gaussian>(svm);

            // Run the calibration algorithm
            calibration.Learn(xTraining, yTraining); // returns the same machine
            model = calibration.Model;
            var predicted       = ClassifyInternal(xTraining);
            var confusionMatrix = new GeneralConfusionMatrix(classes: 2, expected: yTraining, predicted: predicted);

            log.Debug("Performance on training dataset . F1(0):{0} F1(1):{1}", confusionMatrix.PerClassMatrices[0].FScore, confusionMatrix.PerClassMatrices[1].FScore);

            predicted          = Classify(xTesting);
            confusionMatrix    = new GeneralConfusionMatrix(classes: 2, expected: yTesting, predicted: predicted);
            TestSetPerformance = confusionMatrix;
            log.Debug("Performance on testing dataset . F1(0):{0} F1(1):{1}", confusionMatrix.PerClassMatrices[0].FScore, confusionMatrix.PerClassMatrices[1].FScore);
        }
        public async Task Train(DataSet dataset, CancellationToken token)
        {
            logger.LogDebug("Train");
            IProcessingTextBlock[] data = dataset.Positive.Concat(dataset.Negative).ToArray();
            int[]      yData            = dataset.Positive.Select(item => 1).Concat(dataset.Negative.Select(item => - 1)).ToArray();
            double[][] xData            = vectorSource.GetVectors(data, NormalizationType.None);
            Array[]    randomized       = GlobalSettings.Random.Shuffle(yData, xData).ToArray();
            //standardizer = Standardizer.GetNumericStandardizer(data);
            GridSearch <SupportVectorMachine <Linear>, double[], int> gridsearch = new GridSearch <SupportVectorMachine <Linear>, double[], int>
            {
                ParameterRanges =
                    new GridSearchRangeCollection
                {
                    new GridSearchRange("complexity", new[] { 0.001, 0.01, 0.1, 1, 10 }),
                },
                Learner = p => new LinearDualCoordinateDescent {
                    Complexity = p["complexity"], Loss = Loss.L2
                },
                Loss = (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual)
            };


            gridsearch.Token = token;
            GridSearchResult <SupportVectorMachine <Linear>, double[], int> result = await Task.Run(() => gridsearch.Learn(randomized[1].Cast <double[]>().ToArray(), randomized[0].Cast <int>().ToArray()), token).ConfigureAwait(false);

            Model = result.BestModel;
        }