private void TrainAndPredict(double complexity, double gamma, int degree, double[][] trainingInputs, int[] trainingOutputs, double[][] validationInputs, out int[] predictedTraining, out int[] predictedValidation) { switch (Kernel) { case Kernel.Gaussian: var gaussianLearningKfold = new MulticlassSupportVectorLearning <Gaussian> { Learner = p => new SequentialMinimalOptimization <Gaussian> { UseKernelEstimation = false, UseComplexityHeuristic = false, Complexity = complexity, Token = CancellationTokenSource.Token, Tolerance = 0.01, Kernel = Gaussian.FromGamma(gamma), } }; var svmGaussian = gaussianLearningKfold.Learn(trainingInputs, trainingOutputs); predictedTraining = svmGaussian.Decide(trainingInputs); predictedValidation = svmGaussian.Decide(validationInputs); break; case Kernel.Linear: var linearLearning = new MulticlassSupportVectorLearning <Linear> { Learner = p => new LinearDualCoordinateDescent <Linear> { Complexity = complexity, UseComplexityHeuristic = false, Token = CancellationTokenSource.Token } }; var svmLinear = linearLearning.Learn(trainingInputs, trainingOutputs); predictedTraining = svmLinear.Decide(trainingInputs); predictedValidation = svmLinear.Decide(validationInputs); break; case Kernel.Polynomial: var polynomialLearning = new MulticlassSupportVectorLearning <Polynomial> { Learner = p => new SequentialMinimalOptimization <Polynomial> { UseKernelEstimation = false, UseComplexityHeuristic = false, Complexity = complexity, Token = CancellationTokenSource.Token, Kernel = new Polynomial(degree, 1) } }; var polynomialSvm = polynomialLearning.Learn(trainingInputs, trainingOutputs); predictedTraining = polynomialSvm.Decide(trainingInputs); predictedValidation = polynomialSvm.Decide(validationInputs); break; default: throw new NotImplementedException(); } }
private MulticlassSupportVectorMachine <Gaussian> CreateModel(List <double[]> inputsList, List <int> outputsList, double complexity, double gamma) { var teacher = new MulticlassSupportVectorLearning <Gaussian>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { // Estimate a suitable guess for the Gaussian kernel's parameters. // This estimate can serve as a starting point for a grid search. Complexity = complexity, Kernel = Gaussian.FromGamma(gamma) } }; return(teacher.Learn(inputsList.ToArray(), outputsList.ToArray())); }
public void multiclass_calibration_generic_kernel() { // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // double[][] inputs = { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 1, 0 }, // 0 new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 1, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 0, 1, 1 }, // 2 new double[] { 1, 1, 0, 1 }, // 2 new double[] { 0, 1, 1, 1 }, // 2 new double[] { 1, 1, 1, 1 }, // 2 }; int[] outputs = // those are the class labels { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, }; // Create the multi-class learning algorithm for the machine var teacher = new MulticlassSupportVectorLearning <IKernel>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization <IKernel>() { UseKernelEstimation = false, Kernel = Gaussian.FromGamma(0.5) } }; // Learn a machine var machine = teacher.Learn(inputs, outputs); // Create the multi-class learning algorithm for the machine var calibration = new MulticlassSupportVectorLearning <IKernel>(machine) { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new ProbabilisticOutputCalibration <IKernel>(param.Model) }; // Configure parallel execution options calibration.ParallelOptions.MaxDegreeOfParallelism = 1; // Learn a machine calibration.Learn(inputs, outputs); // Obtain class predictions for each sample int[] predicted = machine.Decide(inputs); // Get class scores for each sample double[] scores = machine.Score(inputs); // Get log-likelihoods (should be same as scores) double[][] logl = machine.LogLikelihoods(inputs); // Get probability for each sample double[][] prob = machine.Probabilities(inputs); // Compute classification error double error = new ZeroOneLoss(outputs).Loss(predicted); double loss = new CategoryCrossEntropyLoss(outputs).Loss(prob); //string str = logl.ToCSharp(); double[] expectedScores = { 1.87436400885238, 1.81168086449304, 1.74038320983522, 1.87436400885238, 1.81168086449304, 1.55446926953952, 1.67016543853596, 1.67016543853596, 1.83135194001403, 1.83135194001403, 1.59836868669125, 2.0618816310294 }; double[][] expectedLogL = { new double[] { 1.87436400885238, -1.87436400885238, -1.7463646841257 }, new double[] { 1.81168086449304, -1.81168086449304, -1.73142460658826 }, new double[] { 1.74038320983522, -1.58848669816072, -1.74038320983522 }, new double[] { 1.87436400885238, -1.87436400885238, -1.7463646841257 }, new double[] { 1.81168086449304, -1.81168086449304, -1.73142460658826 }, new double[] { -1.55446926953952, 1.55446926953952, -0.573599079216229 }, new double[] { -0.368823000428743, 1.67016543853596, -1.67016543853596 }, new double[] { -0.368823000428743, 1.67016543853596, -1.67016543853596 }, new double[] { -1.83135194001403, -1.20039293330558, 1.83135194001403 }, new double[] { -1.83135194001403, -1.20039293330558, 1.83135194001403 }, new double[] { -0.894598978116595, -1.59836868669125, 1.59836868669125 }, new double[] { -1.87336852014759, -2.0618816310294, 2.0618816310294 } }; double[][] expectedProbs = { new double[] { 0.95209908906855, 0.0224197237689656, 0.0254811871624848 }, new double[] { 0.947314032745205, 0.0252864560196241, 0.0273995112351714 }, new double[] { 0.937543314993345, 0.0335955309754816, 0.028861154031173 }, new double[] { 0.95209908906855, 0.0224197237689656, 0.0254811871624848 }, new double[] { 0.947314032745205, 0.0252864560196241, 0.0273995112351714 }, new double[] { 0.0383670466237636, 0.859316640577158, 0.102316312799079 }, new double[] { 0.111669460983068, 0.857937888238824, 0.0303926507781076 }, new double[] { 0.111669460983068, 0.857937888238824, 0.0303926507781076 }, new double[] { 0.0238971617859334, 0.0449126146360623, 0.931190223578004 }, new double[] { 0.0238971617859334, 0.0449126146360623, 0.931190223578004 }, new double[] { 0.0735735561383806, 0.0363980776342206, 0.890028366227399 }, new double[] { 0.0188668069460003, 0.0156252941482294, 0.96550789890577 } }; // Must be exactly the same as test above Assert.AreEqual(0, error); Assert.AreEqual(0.5, ((Gaussian)machine[0].Value.Kernel).Gamma); Assert.AreEqual(0.5, ((Gaussian)machine[1].Value.Kernel).Gamma); Assert.AreEqual(0.5, ((Gaussian)machine[2].Value.Kernel).Gamma); Assert.AreEqual(1.0231652126930515, loss); Assert.IsTrue(predicted.IsEqual(outputs)); Assert.IsTrue(expectedScores.IsEqual(scores, 1e-10)); Assert.IsTrue(expectedLogL.IsEqual(logl, 1e-10)); Assert.IsTrue(expectedProbs.IsEqual(prob, 1e-10)); }
public override Task <GeneralConfusionMatrix> ComputeConfusionMatrixAsync(ClassificationModel classificationModel) { return(Task.Factory.StartNew(() => { int numFeatures = classificationModel.FeatureVectors.Count; double[][] input = new double[numFeatures][]; int[] responses = new int[numFeatures]; for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex) { var featureVector = classificationModel.FeatureVectors[featureIndex]; input[featureIndex] = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue); responses[featureIndex] = featureVector.FeatureClass; } var folds = new int[input.Length][]; var splittings = CrossValidation.Splittings(input.Length, 2); for (int i = 0; i < 2; ++i) { folds[i] = splittings.Find(x => x == i); } int[] indicesTrain = folds[0]; int[] indicesValidation = folds[1]; // Lets now grab the training data: var trainingInputs = input.Get(indicesTrain); var trainingOutputs = responses.Get(indicesTrain); // And now the validation data: var validationInputs = input.Get(indicesValidation); var validationOutputs = responses.Get(indicesValidation); // Predict int[] prediction; switch (Kernel) { case Kernel.Gaussian: var gaussianLearningKfold = new MulticlassSupportVectorLearning <Gaussian> { Kernel = Gaussian.FromGamma(Gamma), Learner = p => new SequentialMinimalOptimization <Gaussian> { UseKernelEstimation = false, UseComplexityHeuristic = false, Complexity = Complexity, Token = CancellationTokenSource.Token, Tolerance = 0.01 } }; var svmGaussian = gaussianLearningKfold.Learn(trainingInputs, trainingOutputs); prediction = svmGaussian.Decide(validationInputs); break; case Kernel.Linear: var linearLearning = new MulticlassSupportVectorLearning <Linear> { Learner = p => new LinearDualCoordinateDescent <Linear> { Complexity = Complexity, UseComplexityHeuristic = false, Token = CancellationTokenSource.Token } }; var svmLinear = linearLearning.Learn(trainingInputs, trainingOutputs); prediction = svmLinear.Decide(validationInputs); break; default: throw new NotImplementedException(); } GeneralConfusionMatrix confusionMatrix = new GeneralConfusionMatrix(classificationModel.LandCoverTypes.Count, prediction, validationOutputs); return confusionMatrix; })); }
public override Task TrainAsync(ClassificationModel classificationModel) { int numFeatures = classificationModel.FeatureVectors.Count; double[][] input = new double[numFeatures][]; int[] responses = new int[numFeatures]; for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex) { var featureVector = classificationModel.FeatureVectors[featureIndex]; input[featureIndex] = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue); responses[featureIndex] = featureVector.FeatureClass; } switch (Kernel) { case Kernel.Linear: var linearLearning = new MulticlassSupportVectorLearning <Linear> { Learner = p => new LinearDualCoordinateDescent <Linear> { Complexity = Complexity, UseComplexityHeuristic = false, Token = CancellationTokenSource.Token } }; return(Task.Factory.StartNew(() => { _lSvm = linearLearning.Learn(input, responses); })); case Kernel.Gaussian: var gaussianLearning = new MulticlassSupportVectorLearning <Gaussian> { Learner = p => new SequentialMinimalOptimization <Gaussian> { Complexity = Complexity, UseComplexityHeuristic = false, UseKernelEstimation = false, Token = CancellationTokenSource.Token, Kernel = Gaussian.FromGamma(Gamma), } }; return(Task.Factory.StartNew(() => { _gSvm = gaussianLearning.Learn(input, responses); })); case Kernel.Polynomial: var polynomialLearning = new MulticlassSupportVectorLearning <Polynomial> { Learner = p => new SequentialMinimalOptimization <Polynomial> { Complexity = Complexity, UseKernelEstimation = false, UseComplexityHeuristic = false, Token = CancellationTokenSource.Token, Kernel = new Polynomial(Degree, 1) } }; return(Task.Factory.StartNew(() => { _pSvm = polynomialLearning.Learn(input, responses); })); default: throw new InvalidOperationException(); } }
public void multilabel_calibration_generic_kernel() { // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // double[][] inputs = { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 1, 0 }, // 0 new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 1, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 0, 1, 1 }, // 2 new double[] { 1, 1, 0, 1 }, // 2 new double[] { 0, 1, 1, 1 }, // 2 new double[] { 1, 1, 1, 1 }, // 2 }; int[] outputs = // those are the class labels { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, }; // Create the multi-class learning algorithm for the machine var teacher = new MultilabelSupportVectorLearning <IKernel>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization <IKernel>() { UseKernelEstimation = false, Kernel = Gaussian.FromGamma(0.5) } }; // Learn a machine var machine = teacher.Learn(inputs, outputs); // Create the multi-class learning algorithm for the machine var calibration = new MultilabelSupportVectorLearning <IKernel>(machine) { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (p) => new ProbabilisticOutputCalibration <IKernel>(p.Model) }; // Configure parallel execution options calibration.ParallelOptions.MaxDegreeOfParallelism = 1; // Learn a machine calibration.Learn(inputs, outputs); // Obtain class predictions for each sample bool[][] predicted = machine.Decide(inputs); // Get class scores for each sample double[][] scores = machine.Scores(inputs); // Get log-likelihoods (should be same as scores) double[][] logl = machine.LogLikelihoods(inputs); // Get probability for each sample double[][] prob = machine.Probabilities(inputs); // Compute classification error using mean accuracy (mAcc) double error = new HammingLoss(outputs).Loss(predicted); double loss = new CategoryCrossEntropyLoss(outputs).Loss(prob); string a = scores.ToCSharp(); string b = logl.ToCSharp(); string c = prob.ToCSharp(); double[][] expectedScores = { new double[] { 1.85316017783605, -2.59688389729331, -2.32170102153988 }, new double[] { 1.84933597524124, -1.99399145231446, -2.2920299547693 }, new double[] { 1.44477953581274, -1.98592298465108, -2.27356092239125 }, new double[] { 1.85316017783605, -2.59688389729331, -2.32170102153988 }, new double[] { 1.84933597524124, -1.99399145231446, -2.2920299547693 }, new double[] { -2.40815576360914, 0.328362962196791, -0.932721757919691 }, new double[] { -2.13111157264226, 1.809192096031, -2.2920299547693 }, new double[] { -2.13111157264226, 1.809192096031, -2.2920299547693 }, new double[] { -2.14888646926108, -1.99399145231447, 1.33101148524982 }, new double[] { -2.12915064678299, -1.98592298465108, 1.3242171079396 }, new double[] { -1.47197826667149, -1.96368715704762, 0.843414180834243 }, new double[] { -2.14221021749314, -2.83117892529093, 2.61354519154994 } }; double[][] expectedLogL = { new double[] { -0.145606614365135, -2.66874434442222, -2.41528841111469 }, new double[] { -0.146125659911391, -2.12163759796483, -2.3883043096263 }, new double[] { -0.211716960454159, -2.11453945718522, -2.37154474995633 }, new double[] { -0.145606614365135, -2.66874434442222, -2.41528841111469 }, new double[] { -0.146125659911391, -2.12163759796483, -2.3883043096263 }, new double[] { -2.4943161092787, -0.542383360363463, -1.26452689970624 }, new double[] { -2.24328358118314, -0.151678833375872, -2.3883043096263 }, new double[] { -2.24328358118314, -0.151678833375872, -2.3883043096263 }, new double[] { -2.25918730624753, -2.12163759796483, -0.234447327588685 }, new double[] { -2.24153091066541, -2.11453945718522, -0.2358711195715 }, new double[] { -1.67856232802554, -2.0950136294762, -0.357841632335707 }, new double[] { -2.25321037906455, -2.88845047104229, -0.0707140798850236 } }; double[][] expectedProbs = { new double[] { 0.844913862516144, 0.0677684640174953, 0.0873176734663607 }, new double[] { 0.803266328757473, 0.111405242674824, 0.0853284285677024 }, new double[] { 0.790831391595502, 0.117950175028754, 0.0912184333757438 }, new double[] { 0.844913862516144, 0.0677684640174953, 0.0873176734663607 }, new double[] { 0.803266328757473, 0.111405242674824, 0.0853284285677024 }, new double[] { 0.0872387667998771, 0.614360294206236, 0.298400938993887 }, new double[] { 0.100372339295793, 0.812805149315815, 0.0868225113883914 }, new double[] { 0.100372339295793, 0.812805149315815, 0.0868225113883914 }, new double[] { 0.102863726210119, 0.11803188195247, 0.779104391837411 }, new double[] { 0.104532503226998, 0.118686968710368, 0.776780528062634 }, new double[] { 0.184996665350572, 0.121983586443407, 0.693019748206021 }, new double[] { 0.0961702585148881, 0.0509517983210315, 0.85287794316408 } }; int[] actual = predicted.ArgMax(dimension: 1); Assert.IsTrue(actual.IsEqual(outputs)); // Must be exactly the same as test above Assert.AreEqual(0, error); Assert.AreEqual(0.5, ((Gaussian)machine[0].Kernel).Gamma); Assert.AreEqual(0.5, ((Gaussian)machine[1].Kernel).Gamma); Assert.AreEqual(0.5, ((Gaussian)machine[2].Kernel).Gamma); Assert.AreEqual(2.9395943260892361, loss); Assert.IsTrue(expectedScores.IsEqual(scores, 1e-10)); Assert.IsTrue(expectedLogL.IsEqual(logl, 1e-10)); Assert.IsTrue(expectedProbs.IsEqual(prob, 1e-10)); double[] probabilities = CorrectProbabilities(machine, inputs[0]); double[] actualProb = machine.Probabilities(inputs[0]); Assert.IsTrue(probabilities.IsEqual(actualProb, 1e-8)); }
public void multilabel_calibration_generic_kernel() { // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // double[][] inputs = { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 1, 0 }, // 0 new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 1, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 0, 1, 1 }, // 2 new double[] { 1, 1, 0, 1 }, // 2 new double[] { 0, 1, 1, 1 }, // 2 new double[] { 1, 1, 1, 1 }, // 2 }; int[] outputs = // those are the class labels { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, }; // Create the multi-class learning algorithm for the machine var teacher = new MultilabelSupportVectorLearning <IKernel>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization <IKernel>() { UseKernelEstimation = false, Kernel = Gaussian.FromGamma(0.5) } }; // Learn a machine var machine = teacher.Learn(inputs, outputs); // Create the multi-class learning algorithm for the machine var calibration = new MultilabelSupportVectorLearning <IKernel>(machine) { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (p) => new ProbabilisticOutputCalibration <IKernel>(p.Model) }; // Configure parallel execution options calibration.ParallelOptions.MaxDegreeOfParallelism = 1; // Learn a machine calibration.Learn(inputs, outputs); // Obtain class predictions for each sample bool[][] predicted = machine.Decide(inputs); // Get class scores for each sample double[][] scores = machine.Scores(inputs); // Get log-likelihoods (should be same as scores) double[][] logl = machine.LogLikelihoods(inputs); // Get probability for each sample double[][] prob = machine.Probabilities(inputs); // Compute classification error using mean accuracy (mAcc) double error = new HammingLoss(outputs).Loss(predicted); double loss = new CategoryCrossEntropyLoss(outputs).Loss(prob); string a = scores.ToCSharp(); string b = logl.ToCSharp(); string c = prob.ToCSharp(); double[][] expectedScores = { new double[] { 1.85316017783605, -2.59688389729331, -2.32170102153988 }, new double[] { 1.84933597524124, -1.99399145231446, -2.2920299547693 }, new double[] { 1.44477953581274, -1.98592298465108, -2.27356092239125 }, new double[] { 1.85316017783605, -2.59688389729331, -2.32170102153988 }, new double[] { 1.84933597524124, -1.99399145231446, -2.2920299547693 }, new double[] { -2.40815576360914, 0.328362962196791, -0.932721757919691 }, new double[] { -2.13111157264226, 1.809192096031, -2.2920299547693 }, new double[] { -2.13111157264226, 1.809192096031, -2.2920299547693 }, new double[] { -2.14888646926108, -1.99399145231447, 1.33101148524982 }, new double[] { -2.12915064678299, -1.98592298465108, 1.3242171079396 }, new double[] { -1.47197826667149, -1.96368715704762, 0.843414180834243 }, new double[] { -2.14221021749314, -2.83117892529093, 2.61354519154994 } }; double[][] expectedLogL = { new double[] { 1.85316017783605, -2.59688389729331, -2.32170102153988 }, new double[] { 1.84933597524124, -1.99399145231446, -2.2920299547693 }, new double[] { 1.44477953581274, -1.98592298465108, -2.27356092239125 }, new double[] { 1.85316017783605, -2.59688389729331, -2.32170102153988 }, new double[] { 1.84933597524124, -1.99399145231446, -2.2920299547693 }, new double[] { -2.40815576360914, 0.328362962196791, -0.932721757919691 }, new double[] { -2.13111157264226, 1.809192096031, -2.2920299547693 }, new double[] { -2.13111157264226, 1.809192096031, -2.2920299547693 }, new double[] { -2.14888646926108, -1.99399145231447, 1.33101148524982 }, new double[] { -2.12915064678299, -1.98592298465108, 1.3242171079396 }, new double[] { -1.47197826667149, -1.96368715704762, 0.843414180834243 }, new double[] { -2.14221021749314, -2.83117892529093, 2.61354519154994 } }; double[][] expectedProbs = { new double[] { 6.37994947365835, 0.0745053832890827, 0.0981065622139132 }, new double[] { 6.35559784678136, 0.136150899620619, 0.101061104020747 }, new double[] { 4.24091706941419, 0.137253872418087, 0.102944947658882 }, new double[] { 6.37994947365835, 0.0745053832890827, 0.0981065622139132 }, new double[] { 6.35559784678136, 0.136150899620619, 0.101061104020747 }, new double[] { 0.0899810880411361, 1.38869292386051, 0.393481290780948 }, new double[] { 0.118705270957796, 6.10551277113228, 0.101061104020747 }, new double[] { 0.118705270957796, 6.10551277113228, 0.101061104020747 }, new double[] { 0.116613938707895, 0.136150899620619, 3.78486979203385 }, new double[] { 0.118938271567046, 0.137253872418087, 3.75924112261421 }, new double[] { 0.229471080877097, 0.140340010119971, 2.3242889884131 }, new double[] { 0.11739508739354, 0.0589433229176013, 13.6473476521179 } }; int[] actual = predicted.ArgMax(dimension: 1); Assert.IsTrue(actual.IsEqual(outputs)); // Must be exactly the same as test above Assert.AreEqual(0, error); Assert.AreEqual(0.5, ((Gaussian)machine[0].Kernel).Gamma); Assert.AreEqual(0.5, ((Gaussian)machine[1].Kernel).Gamma); Assert.AreEqual(0.5, ((Gaussian)machine[2].Kernel).Gamma); Assert.AreEqual(-18.908706961799737, loss); Assert.IsTrue(expectedScores.IsEqual(scores, 1e-10)); Assert.IsTrue(expectedLogL.IsEqual(logl, 1e-10)); Assert.IsTrue(expectedProbs.IsEqual(prob, 1e-10)); }
public DetectionResults Filter(DocumentClusters document) { if (document.Clusters.Length < 3) { logger.Info("Not enought text clusters for clustering"); return(new DetectionResults(document.Clusters)); } double[][] observations = vectorSource.GetVectors(document.Clusters, NormalizationType.None); var standardizer = Standardizer.GetNumericStandardizer(observations); observations = standardizer.StandardizeAll(observations); var data = observations.ToArray(); for (int i = 0; i < observations.Length; i++) { for (int j = 0; j < observations[i].Length; j++) { if (double.IsNaN(observations[i][j])) { observations[i][j] = 0; } } } var teacher = new OneclassSupportVectorLearning <Gaussian> { Kernel = Gaussian.FromGamma(1.0 / data.Length), Nu = 0.5, Shrinking = true, Tolerance = 0.001 }; var svm = teacher.Learn(data); double[] prediction = svm.Score(data); Dictionary <int, List <double> > weights = new Dictionary <int, List <double> >(); for (int i = 0; i < prediction.Length; i++) { foreach (var sentenceItem in document.Clusters[i].Sentences) { if (!weights.TryGetValue(sentenceItem.Index, out var classType)) { classType = new List <double>(); weights[sentenceItem.Index] = classType; } classType.Add(prediction[i]); } } List <ProcessingTextBlock> anomaly = new List <ProcessingTextBlock>(); List <ProcessingTextBlock> resultData = new List <ProcessingTextBlock>(); List <SentenceItem> sentences = new List <SentenceItem>(); ProcessingTextBlock cluster; bool?lastResult = null; var cutoffIndex = (int)(weights.Count * 0.2); var cutoff = weights.Select(item => item.Value.Sum()).OrderBy(item => item).Skip(cutoffIndex).First(); var allSentences = document.Clusters.SelectMany(item => item.Sentences) .Distinct() .OrderBy(item => item.Index) .ToArray(); if (allSentences.Length != weights.Count) { throw new ArgumentOutOfRangeException(nameof(document), "Sentence length mismatch"); } foreach (var sentence in allSentences) { var current = weights[sentence.Index].Sum(); var result = current > cutoff; if (lastResult != null && result != lastResult) { cluster = new ProcessingTextBlock(sentences.ToArray()); sentences.Clear(); if (lastResult.Value) { resultData.Add(cluster); } else { anomaly.Add(cluster); } } sentences.Add(sentence); lastResult = result; } cluster = new ProcessingTextBlock(sentences.ToArray()); sentences.Clear(); if (lastResult.Value) { resultData.Add(cluster); } else { anomaly.Add(cluster); } StringBuilder builder = new StringBuilder(); foreach (var textCluster in anomaly) { foreach (var sentenceItem in textCluster.Sentences) { builder.AppendLine(sentenceItem.Text); } } return(new DetectionResults(resultData.ToArray(), anomaly.ToArray())); }
private Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double> TrainingPaper(List <double[]> inputsList, List <int> outputsList) { var gridsearch = GridSearch <double[], int> .CrossValidate( // Here we can specify the range of the parameters to be included in the search ranges : new { Complexity = GridSearch.Values(Math.Pow(2, -12), Math.Pow(2, -11), Math.Pow(2, -10), Math.Pow(2, -8), Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2), Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10), Math.Pow(2, 11), Math.Pow(2, 12)), Gamma = GridSearch.Values(Math.Pow(2, -12), Math.Pow(2, -11), Math.Pow(2, -10), Math.Pow(2, -8), Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2), Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10), Math.Pow(2, 11), Math.Pow(2, 12)) }, // Indicate how learning algorithms for the models should be created learner : (p, ss) => new MulticlassSupportVectorLearning <Gaussian>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { // Estimate a suitable guess for the Gaussian kernel's parameters. // This estimate can serve as a starting point for a grid search. //UseComplexityHeuristic = true, //UseKernelEstimation = true Complexity = p.Complexity, Kernel = Gaussian.FromGamma(p.Gamma) } }, // Define how the model should be learned, if needed fit : (teacher, x, y, w) => teacher.Learn(x, y, w), // Define how the performance of the models should be measured /*loss: (actual, expected, m) => * { * double totalError = 0; * foreach (var input in _originalInputsList) * { * if (!m.Decide(input.Item1).Equals(input.Item2)) * { * totalError++; * } * } * return totalError / _originalInputsList.Count; * },*/ loss : (actual, expected, m) => new HammingLoss(expected).Loss(actual), folds : 10 ); gridsearch.ParallelOptions.MaxDegreeOfParallelism = _paralelism; Console.WriteLine("y nos ponemos a aprender"); // Search for the best model parameters var result = gridsearch.Learn(inputsList.ToArray(), outputsList.ToArray()); Console.WriteLine("Error modelo: " + result.BestModelError); var model = CreateModel(inputsList, outputsList, result.BestParameters.Complexity, result.BestParameters.Gamma); double error = 0; Console.WriteLine("Largo: " + _originalInputsList.Count); foreach (var input in _originalInputsList) { if (!model.Decide(input.Item1).Equals(input.Item2)) { error++; } } error = error / (_originalInputsList.Count); Console.WriteLine("Error real: " + error); return(new Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double>(model, error, result.BestParameters.Gamma.Value, result.BestParameters.Complexity.Value)); }