Beispiel #1
0
        private void TrainAndPredict(double complexity, double gamma, int degree, double[][] trainingInputs, int[] trainingOutputs, double[][] validationInputs, out int[] predictedTraining, out int[] predictedValidation)
        {
            switch (Kernel)
            {
            case Kernel.Gaussian:
                var gaussianLearningKfold = new MulticlassSupportVectorLearning <Gaussian>
                {
                    Learner = p => new SequentialMinimalOptimization <Gaussian>
                    {
                        UseKernelEstimation    = false,
                        UseComplexityHeuristic = false,
                        Complexity             = complexity,
                        Token     = CancellationTokenSource.Token,
                        Tolerance = 0.01,
                        Kernel    = Gaussian.FromGamma(gamma),
                    }
                };
                var svmGaussian = gaussianLearningKfold.Learn(trainingInputs, trainingOutputs);
                predictedTraining   = svmGaussian.Decide(trainingInputs);
                predictedValidation = svmGaussian.Decide(validationInputs);
                break;

            case Kernel.Linear:
                var linearLearning = new MulticlassSupportVectorLearning <Linear>
                {
                    Learner = p => new LinearDualCoordinateDescent <Linear>
                    {
                        Complexity             = complexity,
                        UseComplexityHeuristic = false,
                        Token = CancellationTokenSource.Token
                    }
                };
                var svmLinear = linearLearning.Learn(trainingInputs, trainingOutputs);
                predictedTraining   = svmLinear.Decide(trainingInputs);
                predictedValidation = svmLinear.Decide(validationInputs);
                break;

            case Kernel.Polynomial:
                var polynomialLearning = new MulticlassSupportVectorLearning <Polynomial>
                {
                    Learner = p => new SequentialMinimalOptimization <Polynomial>
                    {
                        UseKernelEstimation    = false,
                        UseComplexityHeuristic = false,
                        Complexity             = complexity,
                        Token  = CancellationTokenSource.Token,
                        Kernel = new Polynomial(degree, 1)
                    }
                };
                var polynomialSvm = polynomialLearning.Learn(trainingInputs, trainingOutputs);
                predictedTraining   = polynomialSvm.Decide(trainingInputs);
                predictedValidation = polynomialSvm.Decide(validationInputs);
                break;

            default:
                throw new NotImplementedException();
            }
        }
        private MulticlassSupportVectorMachine <Gaussian> CreateModel(List <double[]> inputsList,
                                                                      List <int> outputsList, double complexity, double gamma)
        {
            var teacher = new MulticlassSupportVectorLearning <Gaussian>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                {
                    // Estimate a suitable guess for the Gaussian kernel's parameters.
                    // This estimate can serve as a starting point for a grid search.

                    Complexity = complexity,
                    Kernel     = Gaussian.FromGamma(gamma)
                }
            };

            return(teacher.Learn(inputsList.ToArray(), outputsList.ToArray()));
        }
        public void multiclass_calibration_generic_kernel()
        {
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] inputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1,
                2, 2, 2, 2,
            };

            // Create the multi-class learning algorithm for the machine
            var teacher = new MulticlassSupportVectorLearning <IKernel>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization <IKernel>()
                {
                    UseKernelEstimation = false,
                    Kernel = Gaussian.FromGamma(0.5)
                }
            };

            // Learn a machine
            var machine = teacher.Learn(inputs, outputs);


            // Create the multi-class learning algorithm for the machine
            var calibration = new MulticlassSupportVectorLearning <IKernel>(machine)
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new ProbabilisticOutputCalibration <IKernel>(param.Model)
            };


            // Configure parallel execution options
            calibration.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Learn a machine
            calibration.Learn(inputs, outputs);

            // Obtain class predictions for each sample
            int[] predicted = machine.Decide(inputs);

            // Get class scores for each sample
            double[] scores = machine.Score(inputs);

            // Get log-likelihoods (should be same as scores)
            double[][] logl = machine.LogLikelihoods(inputs);

            // Get probability for each sample
            double[][] prob = machine.Probabilities(inputs);

            // Compute classification error
            double error = new ZeroOneLoss(outputs).Loss(predicted);
            double loss  = new CategoryCrossEntropyLoss(outputs).Loss(prob);


            //string str = logl.ToCSharp();

            double[] expectedScores =
            {
                1.87436400885238, 1.81168086449304, 1.74038320983522,
                1.87436400885238, 1.81168086449304, 1.55446926953952,
                1.67016543853596, 1.67016543853596, 1.83135194001403,
                1.83135194001403, 1.59836868669125, 2.0618816310294
            };

            double[][] expectedLogL =
            {
                new double[] {   1.87436400885238, -1.87436400885238,   -1.7463646841257 },
                new double[] {   1.81168086449304, -1.81168086449304,  -1.73142460658826 },
                new double[] {   1.74038320983522, -1.58848669816072,  -1.74038320983522 },
                new double[] {   1.87436400885238, -1.87436400885238,   -1.7463646841257 },
                new double[] {   1.81168086449304, -1.81168086449304,  -1.73142460658826 },
                new double[] {  -1.55446926953952,  1.55446926953952, -0.573599079216229 },
                new double[] { -0.368823000428743,  1.67016543853596,  -1.67016543853596 },
                new double[] { -0.368823000428743,  1.67016543853596,  -1.67016543853596 },
                new double[] {  -1.83135194001403, -1.20039293330558,   1.83135194001403 },
                new double[] {  -1.83135194001403, -1.20039293330558,   1.83135194001403 },
                new double[] { -0.894598978116595, -1.59836868669125,   1.59836868669125 },
                new double[] {  -1.87336852014759,  -2.0618816310294,    2.0618816310294 }
            };

            double[][] expectedProbs =
            {
                new double[] {   0.95209908906855, 0.0224197237689656, 0.0254811871624848 },
                new double[] {  0.947314032745205, 0.0252864560196241, 0.0273995112351714 },
                new double[] {  0.937543314993345, 0.0335955309754816,  0.028861154031173 },
                new double[] {   0.95209908906855, 0.0224197237689656, 0.0254811871624848 },
                new double[] {  0.947314032745205, 0.0252864560196241, 0.0273995112351714 },
                new double[] { 0.0383670466237636,  0.859316640577158,  0.102316312799079 },
                new double[] {  0.111669460983068,  0.857937888238824, 0.0303926507781076 },
                new double[] {  0.111669460983068,  0.857937888238824, 0.0303926507781076 },
                new double[] { 0.0238971617859334, 0.0449126146360623,  0.931190223578004 },
                new double[] { 0.0238971617859334, 0.0449126146360623,  0.931190223578004 },
                new double[] { 0.0735735561383806, 0.0363980776342206,  0.890028366227399 },
                new double[] { 0.0188668069460003, 0.0156252941482294,   0.96550789890577 }
            };

            // Must be exactly the same as test above
            Assert.AreEqual(0, error);
            Assert.AreEqual(0.5, ((Gaussian)machine[0].Value.Kernel).Gamma);
            Assert.AreEqual(0.5, ((Gaussian)machine[1].Value.Kernel).Gamma);
            Assert.AreEqual(0.5, ((Gaussian)machine[2].Value.Kernel).Gamma);
            Assert.AreEqual(1.0231652126930515, loss);
            Assert.IsTrue(predicted.IsEqual(outputs));
            Assert.IsTrue(expectedScores.IsEqual(scores, 1e-10));
            Assert.IsTrue(expectedLogL.IsEqual(logl, 1e-10));
            Assert.IsTrue(expectedProbs.IsEqual(prob, 1e-10));
        }
Beispiel #4
0
        public override Task <GeneralConfusionMatrix> ComputeConfusionMatrixAsync(ClassificationModel classificationModel)
        {
            return(Task.Factory.StartNew(() =>
            {
                int numFeatures = classificationModel.FeatureVectors.Count;

                double[][] input = new double[numFeatures][];
                int[] responses = new int[numFeatures];

                for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex)
                {
                    var featureVector = classificationModel.FeatureVectors[featureIndex];

                    input[featureIndex] = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue);
                    responses[featureIndex] = featureVector.FeatureClass;
                }

                var folds = new int[input.Length][];
                var splittings = CrossValidation.Splittings(input.Length, 2);
                for (int i = 0; i < 2; ++i)
                {
                    folds[i] = splittings.Find(x => x == i);
                }

                int[] indicesTrain = folds[0];
                int[] indicesValidation = folds[1];

                // Lets now grab the training data:
                var trainingInputs = input.Get(indicesTrain);
                var trainingOutputs = responses.Get(indicesTrain);

                // And now the validation data:
                var validationInputs = input.Get(indicesValidation);
                var validationOutputs = responses.Get(indicesValidation);

                // Predict
                int[] prediction;
                switch (Kernel)
                {
                case Kernel.Gaussian:
                    var gaussianLearningKfold = new MulticlassSupportVectorLearning <Gaussian>
                    {
                        Kernel = Gaussian.FromGamma(Gamma),
                        Learner = p => new SequentialMinimalOptimization <Gaussian>
                        {
                            UseKernelEstimation = false,
                            UseComplexityHeuristic = false,
                            Complexity = Complexity,
                            Token = CancellationTokenSource.Token,
                            Tolerance = 0.01
                        }
                    };
                    var svmGaussian = gaussianLearningKfold.Learn(trainingInputs, trainingOutputs);
                    prediction = svmGaussian.Decide(validationInputs);
                    break;

                case Kernel.Linear:
                    var linearLearning = new MulticlassSupportVectorLearning <Linear>
                    {
                        Learner = p => new LinearDualCoordinateDescent <Linear>
                        {
                            Complexity = Complexity,
                            UseComplexityHeuristic = false,
                            Token = CancellationTokenSource.Token
                        }
                    };
                    var svmLinear = linearLearning.Learn(trainingInputs, trainingOutputs);
                    prediction = svmLinear.Decide(validationInputs);
                    break;

                default:
                    throw new NotImplementedException();
                }

                GeneralConfusionMatrix confusionMatrix = new GeneralConfusionMatrix(classificationModel.LandCoverTypes.Count, prediction, validationOutputs);

                return confusionMatrix;
            }));
        }
Beispiel #5
0
        public override Task TrainAsync(ClassificationModel classificationModel)
        {
            int numFeatures = classificationModel.FeatureVectors.Count;

            double[][] input     = new double[numFeatures][];
            int[]      responses = new int[numFeatures];

            for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex)
            {
                var featureVector = classificationModel.FeatureVectors[featureIndex];

                input[featureIndex]     = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue);
                responses[featureIndex] = featureVector.FeatureClass;
            }

            switch (Kernel)
            {
            case Kernel.Linear:
                var linearLearning = new MulticlassSupportVectorLearning <Linear>
                {
                    Learner = p => new LinearDualCoordinateDescent <Linear>
                    {
                        Complexity             = Complexity,
                        UseComplexityHeuristic = false,
                        Token = CancellationTokenSource.Token
                    }
                };

                return(Task.Factory.StartNew(() =>
                {
                    _lSvm = linearLearning.Learn(input, responses);
                }));

            case Kernel.Gaussian:
                var gaussianLearning = new MulticlassSupportVectorLearning <Gaussian>
                {
                    Learner = p => new SequentialMinimalOptimization <Gaussian>
                    {
                        Complexity             = Complexity,
                        UseComplexityHeuristic = false,
                        UseKernelEstimation    = false,
                        Token  = CancellationTokenSource.Token,
                        Kernel = Gaussian.FromGamma(Gamma),
                    }
                };

                return(Task.Factory.StartNew(() =>
                {
                    _gSvm = gaussianLearning.Learn(input, responses);
                }));

            case Kernel.Polynomial:
                var polynomialLearning = new MulticlassSupportVectorLearning <Polynomial>
                {
                    Learner = p => new SequentialMinimalOptimization <Polynomial>
                    {
                        Complexity             = Complexity,
                        UseKernelEstimation    = false,
                        UseComplexityHeuristic = false,
                        Token  = CancellationTokenSource.Token,
                        Kernel = new Polynomial(Degree, 1)
                    }
                };

                return(Task.Factory.StartNew(() =>
                {
                    _pSvm = polynomialLearning.Learn(input, responses);
                }));

            default:
                throw new InvalidOperationException();
            }
        }
Beispiel #6
0
        public void multilabel_calibration_generic_kernel()
        {
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] inputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1,
                2, 2, 2, 2,
            };

            // Create the multi-class learning algorithm for the machine
            var teacher = new MultilabelSupportVectorLearning <IKernel>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization <IKernel>()
                {
                    UseKernelEstimation = false,
                    Kernel = Gaussian.FromGamma(0.5)
                }
            };

            // Learn a machine
            var machine = teacher.Learn(inputs, outputs);


            // Create the multi-class learning algorithm for the machine
            var calibration = new MultilabelSupportVectorLearning <IKernel>(machine)
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (p) => new ProbabilisticOutputCalibration <IKernel>(p.Model)
            };


            // Configure parallel execution options
            calibration.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Learn a machine
            calibration.Learn(inputs, outputs);

            // Obtain class predictions for each sample
            bool[][] predicted = machine.Decide(inputs);

            // Get class scores for each sample
            double[][] scores = machine.Scores(inputs);

            // Get log-likelihoods (should be same as scores)
            double[][] logl = machine.LogLikelihoods(inputs);

            // Get probability for each sample
            double[][] prob = machine.Probabilities(inputs);

            // Compute classification error using mean accuracy (mAcc)
            double error = new HammingLoss(outputs).Loss(predicted);
            double loss  = new CategoryCrossEntropyLoss(outputs).Loss(prob);

            string a = scores.ToCSharp();
            string b = logl.ToCSharp();
            string c = prob.ToCSharp();

            double[][] expectedScores =
            {
                new double[] {  1.85316017783605, -2.59688389729331,  -2.32170102153988 },
                new double[] {  1.84933597524124, -1.99399145231446,   -2.2920299547693 },
                new double[] {  1.44477953581274, -1.98592298465108,  -2.27356092239125 },
                new double[] {  1.85316017783605, -2.59688389729331,  -2.32170102153988 },
                new double[] {  1.84933597524124, -1.99399145231446,   -2.2920299547693 },
                new double[] { -2.40815576360914, 0.328362962196791, -0.932721757919691 },
                new double[] { -2.13111157264226,    1.809192096031,   -2.2920299547693 },
                new double[] { -2.13111157264226,    1.809192096031,   -2.2920299547693 },
                new double[] { -2.14888646926108, -1.99399145231447,   1.33101148524982 },
                new double[] { -2.12915064678299, -1.98592298465108,    1.3242171079396 },
                new double[] { -1.47197826667149, -1.96368715704762,  0.843414180834243 },
                new double[] { -2.14221021749314, -2.83117892529093,   2.61354519154994 }
            };

            double[][] expectedLogL =
            {
                new double[] { -0.145606614365135,  -2.66874434442222,   -2.41528841111469 },
                new double[] { -0.146125659911391,  -2.12163759796483,    -2.3883043096263 },
                new double[] { -0.211716960454159,  -2.11453945718522,   -2.37154474995633 },
                new double[] { -0.145606614365135,  -2.66874434442222,   -2.41528841111469 },
                new double[] { -0.146125659911391,  -2.12163759796483,    -2.3883043096263 },
                new double[] {   -2.4943161092787, -0.542383360363463,   -1.26452689970624 },
                new double[] {  -2.24328358118314, -0.151678833375872,    -2.3883043096263 },
                new double[] {  -2.24328358118314, -0.151678833375872,    -2.3883043096263 },
                new double[] {  -2.25918730624753,  -2.12163759796483,  -0.234447327588685 },
                new double[] {  -2.24153091066541,  -2.11453945718522,    -0.2358711195715 },
                new double[] {  -1.67856232802554,   -2.0950136294762,  -0.357841632335707 },
                new double[] {  -2.25321037906455,  -2.88845047104229, -0.0707140798850236 }
            };

            double[][] expectedProbs =
            {
                new double[] {  0.844913862516144, 0.0677684640174953, 0.0873176734663607 },
                new double[] {  0.803266328757473,  0.111405242674824, 0.0853284285677024 },
                new double[] {  0.790831391595502,  0.117950175028754, 0.0912184333757438 },
                new double[] {  0.844913862516144, 0.0677684640174953, 0.0873176734663607 },
                new double[] {  0.803266328757473,  0.111405242674824, 0.0853284285677024 },
                new double[] { 0.0872387667998771,  0.614360294206236,  0.298400938993887 },
                new double[] {  0.100372339295793,  0.812805149315815, 0.0868225113883914 },
                new double[] {  0.100372339295793,  0.812805149315815, 0.0868225113883914 },
                new double[] {  0.102863726210119,   0.11803188195247,  0.779104391837411 },
                new double[] {  0.104532503226998,  0.118686968710368,  0.776780528062634 },
                new double[] {  0.184996665350572,  0.121983586443407,  0.693019748206021 },
                new double[] { 0.0961702585148881, 0.0509517983210315,   0.85287794316408 }
            };

            int[] actual = predicted.ArgMax(dimension: 1);
            Assert.IsTrue(actual.IsEqual(outputs));

            // Must be exactly the same as test above
            Assert.AreEqual(0, error);
            Assert.AreEqual(0.5, ((Gaussian)machine[0].Kernel).Gamma);
            Assert.AreEqual(0.5, ((Gaussian)machine[1].Kernel).Gamma);
            Assert.AreEqual(0.5, ((Gaussian)machine[2].Kernel).Gamma);
            Assert.AreEqual(2.9395943260892361, loss);
            Assert.IsTrue(expectedScores.IsEqual(scores, 1e-10));
            Assert.IsTrue(expectedLogL.IsEqual(logl, 1e-10));
            Assert.IsTrue(expectedProbs.IsEqual(prob, 1e-10));

            double[] probabilities = CorrectProbabilities(machine, inputs[0]);
            double[] actualProb    = machine.Probabilities(inputs[0]);
            Assert.IsTrue(probabilities.IsEqual(actualProb, 1e-8));
        }
        public void multilabel_calibration_generic_kernel()
        {
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] inputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1,
                2, 2, 2, 2,
            };

            // Create the multi-class learning algorithm for the machine
            var teacher = new MultilabelSupportVectorLearning <IKernel>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization <IKernel>()
                {
                    UseKernelEstimation = false,
                    Kernel = Gaussian.FromGamma(0.5)
                }
            };

            // Learn a machine
            var machine = teacher.Learn(inputs, outputs);


            // Create the multi-class learning algorithm for the machine
            var calibration = new MultilabelSupportVectorLearning <IKernel>(machine)
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (p) => new ProbabilisticOutputCalibration <IKernel>(p.Model)
            };


            // Configure parallel execution options
            calibration.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Learn a machine
            calibration.Learn(inputs, outputs);

            // Obtain class predictions for each sample
            bool[][] predicted = machine.Decide(inputs);

            // Get class scores for each sample
            double[][] scores = machine.Scores(inputs);

            // Get log-likelihoods (should be same as scores)
            double[][] logl = machine.LogLikelihoods(inputs);

            // Get probability for each sample
            double[][] prob = machine.Probabilities(inputs);

            // Compute classification error using mean accuracy (mAcc)
            double error = new HammingLoss(outputs).Loss(predicted);
            double loss  = new CategoryCrossEntropyLoss(outputs).Loss(prob);

            string a = scores.ToCSharp();
            string b = logl.ToCSharp();
            string c = prob.ToCSharp();

            double[][] expectedScores =
            {
                new double[] {  1.85316017783605, -2.59688389729331,  -2.32170102153988 },
                new double[] {  1.84933597524124, -1.99399145231446,   -2.2920299547693 },
                new double[] {  1.44477953581274, -1.98592298465108,  -2.27356092239125 },
                new double[] {  1.85316017783605, -2.59688389729331,  -2.32170102153988 },
                new double[] {  1.84933597524124, -1.99399145231446,   -2.2920299547693 },
                new double[] { -2.40815576360914, 0.328362962196791, -0.932721757919691 },
                new double[] { -2.13111157264226,    1.809192096031,   -2.2920299547693 },
                new double[] { -2.13111157264226,    1.809192096031,   -2.2920299547693 },
                new double[] { -2.14888646926108, -1.99399145231447,   1.33101148524982 },
                new double[] { -2.12915064678299, -1.98592298465108,    1.3242171079396 },
                new double[] { -1.47197826667149, -1.96368715704762,  0.843414180834243 },
                new double[] { -2.14221021749314, -2.83117892529093,   2.61354519154994 }
            };

            double[][] expectedLogL =
            {
                new double[] {  1.85316017783605, -2.59688389729331,  -2.32170102153988 },
                new double[] {  1.84933597524124, -1.99399145231446,   -2.2920299547693 },
                new double[] {  1.44477953581274, -1.98592298465108,  -2.27356092239125 },
                new double[] {  1.85316017783605, -2.59688389729331,  -2.32170102153988 },
                new double[] {  1.84933597524124, -1.99399145231446,   -2.2920299547693 },
                new double[] { -2.40815576360914, 0.328362962196791, -0.932721757919691 },
                new double[] { -2.13111157264226,    1.809192096031,   -2.2920299547693 },
                new double[] { -2.13111157264226,    1.809192096031,   -2.2920299547693 },
                new double[] { -2.14888646926108, -1.99399145231447,   1.33101148524982 },
                new double[] { -2.12915064678299, -1.98592298465108,    1.3242171079396 },
                new double[] { -1.47197826667149, -1.96368715704762,  0.843414180834243 },
                new double[] { -2.14221021749314, -2.83117892529093,   2.61354519154994 }
            };

            double[][] expectedProbs =
            {
                new double[] {   6.37994947365835, 0.0745053832890827, 0.0981065622139132 },
                new double[] {   6.35559784678136,  0.136150899620619,  0.101061104020747 },
                new double[] {   4.24091706941419,  0.137253872418087,  0.102944947658882 },
                new double[] {   6.37994947365835, 0.0745053832890827, 0.0981065622139132 },
                new double[] {   6.35559784678136,  0.136150899620619,  0.101061104020747 },
                new double[] { 0.0899810880411361,   1.38869292386051,  0.393481290780948 },
                new double[] {  0.118705270957796,   6.10551277113228,  0.101061104020747 },
                new double[] {  0.118705270957796,   6.10551277113228,  0.101061104020747 },
                new double[] {  0.116613938707895,  0.136150899620619,   3.78486979203385 },
                new double[] {  0.118938271567046,  0.137253872418087,   3.75924112261421 },
                new double[] {  0.229471080877097,  0.140340010119971,    2.3242889884131 },
                new double[] {   0.11739508739354, 0.0589433229176013,   13.6473476521179 }
            };

            int[] actual = predicted.ArgMax(dimension: 1);
            Assert.IsTrue(actual.IsEqual(outputs));

            // Must be exactly the same as test above
            Assert.AreEqual(0, error);
            Assert.AreEqual(0.5, ((Gaussian)machine[0].Kernel).Gamma);
            Assert.AreEqual(0.5, ((Gaussian)machine[1].Kernel).Gamma);
            Assert.AreEqual(0.5, ((Gaussian)machine[2].Kernel).Gamma);
            Assert.AreEqual(-18.908706961799737, loss);
            Assert.IsTrue(expectedScores.IsEqual(scores, 1e-10));
            Assert.IsTrue(expectedLogL.IsEqual(logl, 1e-10));
            Assert.IsTrue(expectedProbs.IsEqual(prob, 1e-10));
        }
        public DetectionResults Filter(DocumentClusters document)
        {
            if (document.Clusters.Length < 3)
            {
                logger.Info("Not enought text clusters for clustering");
                return(new DetectionResults(document.Clusters));
            }

            double[][] observations = vectorSource.GetVectors(document.Clusters, NormalizationType.None);
            var        standardizer = Standardizer.GetNumericStandardizer(observations);

            observations = standardizer.StandardizeAll(observations);
            var data = observations.ToArray();

            for (int i = 0; i < observations.Length; i++)
            {
                for (int j = 0; j < observations[i].Length; j++)
                {
                    if (double.IsNaN(observations[i][j]))
                    {
                        observations[i][j] = 0;
                    }
                }
            }

            var teacher = new OneclassSupportVectorLearning <Gaussian>
            {
                Kernel    = Gaussian.FromGamma(1.0 / data.Length),
                Nu        = 0.5,
                Shrinking = true,
                Tolerance = 0.001
            };

            var svm = teacher.Learn(data);

            double[] prediction = svm.Score(data);

            Dictionary <int, List <double> > weights = new Dictionary <int, List <double> >();

            for (int i = 0; i < prediction.Length; i++)
            {
                foreach (var sentenceItem in document.Clusters[i].Sentences)
                {
                    if (!weights.TryGetValue(sentenceItem.Index, out var classType))
                    {
                        classType = new List <double>();
                        weights[sentenceItem.Index] = classType;
                    }

                    classType.Add(prediction[i]);
                }
            }

            List <ProcessingTextBlock> anomaly    = new List <ProcessingTextBlock>();
            List <ProcessingTextBlock> resultData = new List <ProcessingTextBlock>();
            List <SentenceItem>        sentences  = new List <SentenceItem>();
            ProcessingTextBlock        cluster;
            bool?lastResult   = null;
            var  cutoffIndex  = (int)(weights.Count * 0.2);
            var  cutoff       = weights.Select(item => item.Value.Sum()).OrderBy(item => item).Skip(cutoffIndex).First();
            var  allSentences = document.Clusters.SelectMany(item => item.Sentences)
                                .Distinct()
                                .OrderBy(item => item.Index)
                                .ToArray();

            if (allSentences.Length != weights.Count)
            {
                throw new ArgumentOutOfRangeException(nameof(document), "Sentence length mismatch");
            }

            foreach (var sentence in allSentences)
            {
                var current = weights[sentence.Index].Sum();
                var result  = current > cutoff;
                if (lastResult != null &&
                    result != lastResult)
                {
                    cluster = new ProcessingTextBlock(sentences.ToArray());
                    sentences.Clear();
                    if (lastResult.Value)
                    {
                        resultData.Add(cluster);
                    }
                    else
                    {
                        anomaly.Add(cluster);
                    }
                }

                sentences.Add(sentence);
                lastResult = result;
            }

            cluster = new ProcessingTextBlock(sentences.ToArray());
            sentences.Clear();
            if (lastResult.Value)
            {
                resultData.Add(cluster);
            }
            else
            {
                anomaly.Add(cluster);
            }

            StringBuilder builder = new StringBuilder();

            foreach (var textCluster in anomaly)
            {
                foreach (var sentenceItem in textCluster.Sentences)
                {
                    builder.AppendLine(sentenceItem.Text);
                }
            }

            return(new DetectionResults(resultData.ToArray(), anomaly.ToArray()));
        }
        private Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double> TrainingPaper(List <double[]> inputsList, List <int> outputsList)
        {
            var gridsearch = GridSearch <double[], int> .CrossValidate(
                // Here we can specify the range of the parameters to be included in the search
                ranges : new
            {
                Complexity = GridSearch.Values(Math.Pow(2, -12), Math.Pow(2, -11), Math.Pow(2, -10), Math.Pow(2, -8),
                                               Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2),
                                               Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10), Math.Pow(2, 11), Math.Pow(2, 12)),
                Gamma = GridSearch.Values(Math.Pow(2, -12), Math.Pow(2, -11), Math.Pow(2, -10), Math.Pow(2, -8),
                                          Math.Pow(2, -6), Math.Pow(2, -4), Math.Pow(2, -2), Math.Pow(2, 0), Math.Pow(2, 2),
                                          Math.Pow(2, 4), Math.Pow(2, 6), Math.Pow(2, 8), Math.Pow(2, 10), Math.Pow(2, 11), Math.Pow(2, 12))
            },

                // Indicate how learning algorithms for the models should be created
                learner : (p, ss) => new MulticlassSupportVectorLearning <Gaussian>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                {
                    // Estimate a suitable guess for the Gaussian kernel's parameters.
                    // This estimate can serve as a starting point for a grid search.
                    //UseComplexityHeuristic = true,
                    //UseKernelEstimation = true
                    Complexity = p.Complexity,
                    Kernel     = Gaussian.FromGamma(p.Gamma)
                }
            },
                // Define how the model should be learned, if needed
                fit : (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Define how the performance of the models should be measured

                /*loss: (actual, expected, m) =>
                 * {
                 *  double totalError = 0;
                 *  foreach (var input in _originalInputsList)
                 *  {
                 *      if (!m.Decide(input.Item1).Equals(input.Item2))
                 *      {
                 *          totalError++;
                 *      }
                 *  }
                 *  return totalError / _originalInputsList.Count;
                 * },*/
                loss : (actual, expected, m) => new HammingLoss(expected).Loss(actual),
                folds : 10
                );

            gridsearch.ParallelOptions.MaxDegreeOfParallelism = _paralelism;

            Console.WriteLine("y nos ponemos a aprender");
            // Search for the best model parameters
            var result = gridsearch.Learn(inputsList.ToArray(), outputsList.ToArray());

            Console.WriteLine("Error modelo: " + result.BestModelError);

            var model = CreateModel(inputsList, outputsList, result.BestParameters.Complexity, result.BestParameters.Gamma);

            double error = 0;

            Console.WriteLine("Largo: " + _originalInputsList.Count);
            foreach (var input in _originalInputsList)
            {
                if (!model.Decide(input.Item1).Equals(input.Item2))
                {
                    error++;
                }
            }
            error = error / (_originalInputsList.Count);
            Console.WriteLine("Error real: " + error);

            return(new Tuple <MulticlassSupportVectorMachine <Gaussian>, double, double, double>(model, error, result.BestParameters.Gamma.Value, result.BestParameters.Complexity.Value));
        }