public void Train(int[] y, double[][] x, CancellationToken token) { log.LogInformation("Training SVM..."); var gridsearch = new GridSearch <SupportVectorMachine, double[], int> { ParameterRanges = new GridSearchRangeCollection { //new GridSearchRange("complexity", new[] { 0.001, 0.01, 0.1, 1, 10, 100, 1000 }), new GridSearchRange("C", new[] { 0.001, 0.01, 0.1, 1, 10 }), }, Learner = p => new LinearDualCoordinateDescent { //Complexity = p["complexity"], Loss = Loss.L2, Kernel = new Linear(p["C"]) }, Loss = (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual) }; gridsearch.Token = token; if (ParallelHelper.Options != null) { gridsearch.ParallelOptions = ParallelHelper.Options; } GridSearchResult <SupportVectorMachine, double[], int> result = gridsearch.Learn(x, y); Model = result.BestModel; GridSearchParameterCollection parameters = result.BestParameters; var error = result.BestModelError; log.LogInformation("SVM Trained. Threshold: {0} Constant: {1} Error: {2} ...", Model.Threshold, parameters[0].Value, error); }
public void learn_test() { #region doc_learn // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // Example binary data double[][] inputs = { new double[] { -1, -1 }, new double[] { -1, 1 }, new double[] { 1, -1 }, new double[] { 1, 1 } }; int[] xor = // xor labels { -1, 1, 1, -1 }; // Instantiate a new Grid Search algorithm for Kernel Support Vector Machines var gridsearch = new GridSearch <SupportVectorMachine <Polynomial>, double[], int>() { // Here we can specify the range of the parameters to be included in the search ParameterRanges = new GridSearchRangeCollection() { new GridSearchRange("complexity", new double[] { 0.00000001, 5.20, 0.30, 0.50 }), new GridSearchRange("degree", new double[] { 1, 10, 2, 3, 4, 5 }), new GridSearchRange("constant", new double[] { 0, 1, 2 }) }, // Indicate how learning algorithms for the models should be created Learner = (p) => new SequentialMinimalOptimization <Polynomial> { Complexity = p["complexity"], Kernel = new Polynomial((int)p["degree"], p["constant"]) }, // Define how the performance of the models should be measured Loss = (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual) }; // If needed, control the degree of CPU parallelization gridsearch.ParallelOptions.MaxDegreeOfParallelism = 1; // Search for the best model parameters var result = gridsearch.Learn(inputs, xor); // Get the best SVM found during the parameter search SupportVectorMachine <Polynomial> svm = result.BestModel; // Get an estimate for its error: double bestError = result.BestModelError; // Get the best values found for the model parameters: double bestC = result.BestParameters["complexity"].Value; double bestDegree = result.BestParameters["degree"].Value; double bestConstant = result.BestParameters["constant"].Value; #endregion Assert.IsNotNull(svm); Assert.AreEqual(1e-8, bestC, 1e-10); Assert.AreEqual(0, bestError, 1e-8); Assert.AreEqual(1, bestDegree, 1e-8); Assert.AreEqual(1, bestConstant, 1e-8); Assert.AreEqual(1, svm.Kernel.Degree); Assert.AreEqual(1, svm.Kernel.Constant); }
public void Train(DataPackage data, CancellationToken token) { if (data is null) { throw new ArgumentNullException(nameof(data)); } log.Debug("Training with {0} records", data.Y.Length); standardizer = Standardizer.GetNumericStandardizer(data.X); var xTraining = data.X; var yTraining = data.Y; var xTesting = xTraining; var yTesting = yTraining; int testSize = 100; if (xTraining.Length > testSize * 4) { var training = xTraining.Length - testSize; xTesting = xTraining.Skip(training).ToArray(); yTesting = yTraining.Skip(training).ToArray(); xTraining = xTraining.Take(training).ToArray(); yTraining = yTraining.Take(training).ToArray(); } xTraining = standardizer.StandardizeAll(xTraining); // Instantiate a new Grid Search algorithm for Kernel Support Vector Machines var gridsearch = new GridSearch <SupportVectorMachine <Gaussian>, double[], int>() { // Here we can specify the range of the parameters to be included in the search ParameterRanges = new GridSearchRangeCollection { new GridSearchRange("complexity", new [] { 0.001, 0.01, 0.1, 1, 10 }), new GridSearchRange("gamma", new [] { 0.001, 0.01, 0.1, 1 }) }, // Indicate how learning algorithms for the models should be created Learner = p => new SequentialMinimalOptimization <Gaussian> { Complexity = p["complexity"], Kernel = new Gaussian { Gamma = p["gamma"] } }, // Define how the performance of the models should be measured Loss = (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual) }; gridsearch.Token = token; var randomized = new Random().Shuffle(xTraining, yTraining).ToArray(); yTraining = randomized[1].Cast <int>().ToArray(); xTraining = randomized[0].Cast <double[]>().ToArray(); var result = gridsearch.Learn(xTraining, yTraining); // Get the best SVM found during the parameter search SupportVectorMachine <Gaussian> svm = result.BestModel; // Instantiate the probabilistic calibration (using Platt's scaling) var calibration = new ProbabilisticOutputCalibration <Gaussian>(svm); // Run the calibration algorithm calibration.Learn(xTraining, yTraining); // returns the same machine model = calibration.Model; var predicted = ClassifyInternal(xTraining); var confusionMatrix = new GeneralConfusionMatrix(classes: 2, expected: yTraining, predicted: predicted); log.Debug("Performance on training dataset . F1(0):{0} F1(1):{1}", confusionMatrix.PerClassMatrices[0].FScore, confusionMatrix.PerClassMatrices[1].FScore); predicted = Classify(xTesting); confusionMatrix = new GeneralConfusionMatrix(classes: 2, expected: yTesting, predicted: predicted); TestSetPerformance = confusionMatrix; log.Debug("Performance on testing dataset . F1(0):{0} F1(1):{1}", confusionMatrix.PerClassMatrices[0].FScore, confusionMatrix.PerClassMatrices[1].FScore); }
public async Task Train(DataSet dataset, CancellationToken token) { logger.LogDebug("Train"); IProcessingTextBlock[] data = dataset.Positive.Concat(dataset.Negative).ToArray(); int[] yData = dataset.Positive.Select(item => 1).Concat(dataset.Negative.Select(item => - 1)).ToArray(); double[][] xData = vectorSource.GetVectors(data, NormalizationType.None); Array[] randomized = GlobalSettings.Random.Shuffle(yData, xData).ToArray(); //standardizer = Standardizer.GetNumericStandardizer(data); GridSearch <SupportVectorMachine <Linear>, double[], int> gridsearch = new GridSearch <SupportVectorMachine <Linear>, double[], int> { ParameterRanges = new GridSearchRangeCollection { new GridSearchRange("complexity", new[] { 0.001, 0.01, 0.1, 1, 10 }), }, Learner = p => new LinearDualCoordinateDescent { Complexity = p["complexity"], Loss = Loss.L2 }, Loss = (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual) }; gridsearch.Token = token; GridSearchResult <SupportVectorMachine <Linear>, double[], int> result = await Task.Run(() => gridsearch.Learn(randomized[1].Cast <double[]>().ToArray(), randomized[0].Cast <int>().ToArray()), token).ConfigureAwait(false); Model = result.BestModel; }