public void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs)
        {
            crossValidation = CrossValidation.Create(

                k: 10,                                     // We will be using 10-fold cross validation

                learner: (p) => new B.NaiveBayesLearning() // here we create the learning algorithm
            {
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs.Select(v => v.Select(u => Convert.ToInt32(u)).ToArray()).ToArray(), y: outputs
                );
            var result = crossValidation.Learn(inputs.Select(u => u.Select(v => Convert.ToInt32(v)).ToArray()).ToArray(), outputs);

            ConfusionMatrix = result.ToConfusionMatrix(inputs.Select(v => v.Select(u => Convert.ToInt32(u)).ToArray()).ToArray(), outputs).Matrix;
        }
    public void SupportVectorMachinePerformanceTest() {
      ex = null;

      var cv = new CrossValidation();
      cv.Algorithm = new SupportVectorRegression();
      var rand = new HeuristicLab.Random.MersenneTwister();
      double[,] data = GenerateData(1000, rand);
      List<string> variables = new List<string>() { "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "y" };
      Dataset ds = new Dataset(variables, data);
      cv.Problem.ProblemDataParameter.ActualValue = new RegressionProblemData(ds, variables.Take(10), variables.Last());
      cv.Folds.Value = 5;
      cv.SamplesStart.Value = 0;
      cv.SamplesEnd.Value = 999;

      cv.ExceptionOccurred += new EventHandler<EventArgs<Exception>>(cv_ExceptionOccurred);
      cv.Stopped += new EventHandler(cv_Stopped);

      cv.Prepare();
      cv.Start();
      trigger.WaitOne();
      if (ex != null) throw ex;

      TestContext.WriteLine("Runtime: {0}", cv.ExecutionTime.ToString());

    }
        public override void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs)
        {
            crossValidation = CrossValidation.Create(

                k: 10, // We will be using 10-fold cross validation

                learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                {
                    // Estimate a suitable guess for the Gaussian kernel's parameters.
                    // This estimate can serve as a starting point for a grid search.
                    UseKernelEstimation = true
                }
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );
            var result = crossValidation.Learn(inputs, outputs);

            ConfusionMatrix = result.ToConfusionMatrix(inputs, outputs).Matrix;
            //throw new NotImplementedException();
        }
Пример #4
0
        public void nativeBayesValidation()
        {
            var        learn = new NaiveBayesLearning();
            NaiveBayes nb    = learn.Learn(inputsInt, outputs);

            var cv = CrossValidation.Create(
                k: 3,

                learner: (p) => new NaiveBayesLearning(),

                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                x: inputsInt, y: outputs
                );

            var result = cv.Learn(inputsInt, outputs);

            int numberOfSamples = result.NumberOfSamples;
            int numberOfInputs  = result.NumberOfInputs;
            int numberOfOutputs = result.NumberOfOutputs;

            double trainingError       = result.Training.Mean;
            double validationError     = result.Validation.Mean;
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputsInt, outputs);
            double accuracy            = gcm.Accuracy;

            message += "Native Bayes Validacja\n";
            message += "trainingError " + trainingError.ToString() + "\n";
            message += "validationError " + validationError.ToString() + "\n";
            message += "accuracy " + accuracy.ToString() + "\n\n";
        }
        public void SupportVectorMachinePerformanceTest()
        {
            ex = null;

            var cv = new CrossValidation();

            cv.Algorithm = new SupportVectorRegression();
            var rand = new HeuristicLab.Random.MersenneTwister();

            double[,] data = GenerateData(1000, rand);
            List <string> variables = new List <string>()
            {
                "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "y"
            };
            Dataset ds = new Dataset(variables, data);

            cv.Problem.ProblemDataParameter.ActualValue = new RegressionProblemData(ds, variables.Take(10), variables.Last());
            cv.Folds.Value        = 5;
            cv.SamplesStart.Value = 0;
            cv.SamplesEnd.Value   = 999;

            cv.ExceptionOccurred += new EventHandler <EventArgs <Exception> >(cv_ExceptionOccurred);

            cv.Prepare();
            cv.Start();
            if (ex != null)
            {
                throw ex;
            }

            TestContext.WriteLine("Runtime: {0}", cv.ExecutionTime.ToString());
        }
Пример #6
0
        public void NotEnoughSamplesTest2()
        {
            Accord.Math.Tools.SetupGenerator(0);

            int[] labels = Matrix.Vector(10, 1).Concatenate(Matrix.Vector(30, 0));

            Vector.Shuffle(labels);

            var crossvalidation = new CrossValidation <MulticlassSupportVectorMachine>(labels, 2, folds: 10)
            {
                RunInParallel = false,

                Fitting = (int index, int[] indicesTrain, int[] indicesValidation) =>
                {
                    var labelsValidation = labels.Submatrix(indicesValidation);
                    int countValidation  = labelsValidation.Count(x => x == 1);
                    Assert.AreEqual(1, countValidation);

                    var labelsTraining = labels.Submatrix(indicesTrain);
                    int countTraining  = labelsTraining.Count(x => x == 1);
                    Assert.AreEqual(9, countTraining);

                    return(new CrossValidationValues <MulticlassSupportVectorMachine>(null, 0, 0));
                }
            };

            crossvalidation.Compute();
        }
Пример #7
0
        public static void TestSVM(double[][] inputs, int[] outputs)
        {
            var crossValidation = new CrossValidation(inputs.Length, 10);

            crossValidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
            {
                var trainingInputs  = inputs.Submatrix(indicesTrain);
                var trainingOutputs = outputs.Submatrix(indicesTrain);

                // And now the validation data:
                var validationInputs  = inputs.Submatrix(indicesValidation);
                var validationOutputs = outputs.Submatrix(indicesValidation);

                var sw1           = Stopwatch.StartNew();
                var svm           = new SVM();
                var trainingError = svm.TrainSVM(new RationalQuadratic(1), 3, trainingInputs, trainingOutputs);
                sw1.Stop();
                Console.WriteLine("Training for: " + sw1.ElapsedMilliseconds + "ms with errors: " + trainingError);

                var validationError = svm.GetSMO().ComputeError(validationInputs, validationOutputs);

                // Return a new information structure containing the model and the errors achieved.
                return(new CrossValidationValues(svm, trainingError, validationError));
            };

            // Compute the cross-validation
            var result = crossValidation.Compute();

            // Finally, access the measured performance.
            var trainingErrors   = result.Training.Mean;
            var validationErrors = result.Validation.Mean;

            Console.WriteLine("Finished with " + trainingErrors + " training errors and " + validationErrors + " validation errors");
        }
 public GridSearchModelSelection(ModelSelectionParameters model_parameters, CrossValidation cross_validation) : this(modshogunPINVOKE.new_GridSearchModelSelection__SWIG_1(ModelSelectionParameters.getCPtr(model_parameters), CrossValidation.getCPtr(cross_validation)), true)
 {
     if (modshogunPINVOKE.SWIGPendingException.Pending)
     {
         throw modshogunPINVOKE.SWIGPendingException.Retrieve();
     }
 }
Пример #9
0
        public static void TestKNN(double[][] inputs, int[] outputs, int kValue)
        {
            var crossValidation = new CrossValidation(inputs[0].Length, 10);

            crossValidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
            {
                var trainingInputs  = inputs.Submatrix(indicesTrain);
                var trainingOutputs = outputs.Submatrix(indicesTrain);

                // And now the validation data:
                var validationInputs  = inputs.Submatrix(indicesValidation);
                var validationOutputs = outputs.Submatrix(indicesValidation);

                var sw  = Stopwatch.StartNew();
                var knn = new KNN();
                knn.TrainKNN(trainingInputs, trainingOutputs, kValue);
                sw.Stop();

                //Console.WriteLine("Training for: " + sw.ElapsedMilliseconds + "ms");

                var error = knn.ComputeError(validationInputs, validationOutputs);

                return(new CrossValidationValues(knn, 0, error));
            };

            // Compute the cross-validation
            var result = crossValidation.Compute();

            // Finally, access the measured performance.
            var trainingErrors   = result.Training.Mean;
            var validationErrors = result.Validation.Mean;

            Console.WriteLine("Finished with " + trainingErrors + " training errors and " + validationErrors + " validation errors");
        }
Пример #10
0
        public CrossValidationResult <object> Validate(IClassifier classifier, TrainingData trainingData, int folds = 10)
        {
            var crossValidation = new CrossValidation(size: trainingData.Inputs.Length, folds: folds);

            crossValidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
            {
                var trainingInputs  = trainingData.Inputs.Get(indicesTrain);
                var trainingOutputs = trainingData.Outputs.Get(indicesTrain);

                var validationInputs  = trainingData.Inputs.Get(indicesValidation);
                var validationOutputs = trainingData.Outputs.Get(indicesValidation);

                var foldClassifier = classifier.CreateInstance(trainingData.FeatureDefaultsValueTypes, trainingData.FeatureGranularities);
                foldClassifier.Train(trainingInputs, trainingOutputs);

                var trainingPredicted   = foldClassifier.Decide(trainingInputs);
                var validationPredicted = foldClassifier.Decide(validationInputs);

                double trainingError   = new ZeroOneLoss(trainingOutputs).Loss(trainingPredicted);
                double validationError = new ZeroOneLoss(validationOutputs).Loss(validationPredicted);

                var confusionMatrix = new ConfusionMatrix(validationPredicted, validationOutputs, positiveValue: 1, negativeValue: 0);

                Console.WriteLine($"{k}\t{trainingError}\t{validationError}\t{confusionMatrix.Accuracy}\t{confusionMatrix.TruePositives}\t{confusionMatrix.TrueNegatives}\t{confusionMatrix.FalsePositives}\t{confusionMatrix.FalseNegatives}\t{confusionMatrix.FalsePositiveRate}");

                return(new CrossValidationValues(foldClassifier, trainingError, validationError));
            };

            var result = crossValidation.Compute();

            return(result);
        }
Пример #11
0
        public void knnValidation()
        {
            var crossvalidation = CrossValidation.Create(
                k: 3,
                learner: (p) => new KNearestNeighbors(k: 4),
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: inputs, y: outputs
                );
            var result = crossvalidation.Learn(inputs, outputs);
            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;
            int numberOfInputs  = result.NumberOfInputs;
            int numberOfOutputs = result.NumberOfOutputs;

            double trainingError   = result.Training.Mean;
            double validationError = result.Validation.Mean;

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs);
            double accuracy            = gcm.Accuracy;

            message += "Knn Validacja\n";
            message += "trainingError " + trainingError.ToString() + "\n";
            message += "validationError " + validationError.ToString() + "\n";
            message += "accuracy " + accuracy.ToString() + "\n\n";
        }
        public override void ApplyCrossValidation(int folds, double[][] inputs, int[] outputs)
        {
            crossValidation = CrossValidation.Create(

                k: 10,                                           // We will be using 10-fold cross validation

                learner: (p) => new KernelDiscriminantAnalysis() // here we create the learning algorithm
            {
                Kernel = new Quadratic()                         // We can choose any kernel function
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );
            var result = crossValidation.Learn(inputs, outputs);

            ConfusionMatrix = result.ToConfusionMatrix(inputs, outputs).Matrix;
        }
Пример #13
0
        private static void breastCancerExample()
        {
            // Ensure we have reproducible results
            Accord.Math.Random.Generator.Seed = 0;

            // Get some data to be learned. We will be using the Wiconsin's
            // (Diagnostic) Breast Cancer dataset, where the goal is to determine
            // whether the characteristics extracted from a breast cancer exam
            // correspond to a malignant or benign type of cancer:
            var data = new WisconsinDiagnosticBreastCancer();

            double[][] input  = data.Features;    // 569 samples, 30-dimensional features
            int[]      output = data.ClassLabels; // 569 samples, 2 different class labels

            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 5 and where variables
            // are able to join the decision path at most 2 times during evaluation:
            var cv = CrossValidation.Create(

                k: 10,                            // We will be using 10-fold cross validation

                learner: (p) => new C45Learning() // here we create the learning algorithm
            {
                Join      = 2,
                MaxHeight = 5
            },

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input, y: output
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(input, output);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples;    // should be 569
            int numberOfInputs  = result.NumberOfInputs;     // should be 30
            int numberOfOutputs = result.NumberOfOutputs;    // should be 2

            double trainingError   = result.Training.Mean;   // should be 0.017771153143274855
            double validationError = result.Validation.Mean; // should be 0.0755952380952381

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, output);
            double accuracy            = gcm.Accuracy; // result should be 0.92442882249560632

            Console.WriteLine("C45Learning learning algorithm accuracy is %" + (accuracy * 100).ToString("N2"));
        }
Пример #14
0
        protected Performance.Performance runExperiment(Classifier.Classifier classifier, Parameter.Parameter parameter,
                                                        CrossValidation <Instance.Instance> crossValidation)
        {
            var trainSet = new InstanceList.InstanceList(crossValidation.GetTrainFold(0));
            var testSet  = new InstanceList.InstanceList(crossValidation.GetTestFold(0));

            return(classifier.SingleRun(parameter, trainSet, testSet));
        }
Пример #15
0
        public void FittingTest()
        {
            int[] folds = CrossValidation.Splittings(100, 10);

            int[] samples = Matrix.Indices(0, 100);

            CrossValidation val = new CrossValidation(folds, 10);

            val.RunInParallel = false;

            int current = 0;

            val.Fitting = (k, trainingSamples, validationSamples) =>
            {
                Assert.AreEqual(current, k);
                Assert.AreEqual(90, trainingSamples.Length);
                Assert.AreEqual(10, validationSamples.Length);

                int[] trainingSet   = samples.Submatrix(trainingSamples);
                int[] validationSet = samples.Submatrix(validationSamples);

                for (int i = 0; i < trainingSet.Length; i++)
                {
                    Assert.AreEqual(samples[trainingSamples[i]], trainingSet[i]);
                }

                for (int i = 0; i < validationSet.Length; i++)
                {
                    Assert.AreEqual(samples[validationSamples[i]], validationSet[i]);
                }

                current++;

                return(new CrossValidationValues(k, 2 * k));
            };

            var result = val.Compute();

            Assert.AreEqual(10, current);
            Assert.AreEqual(4.5, result.Training.Mean);
            Assert.AreEqual(9.0, result.Validation.Mean);
            Assert.AreEqual(
                2 * result.Training.StandardDeviation,
                result.Validation.StandardDeviation);

            Assert.AreEqual(val.Folds.Length, result.Training.Sizes.Length);
            Assert.AreEqual(val.Folds.Length, result.Validation.Sizes.Length);

            for (int i = 0; i < result.Training.Sizes.Length; i++)
            {
                Assert.AreEqual(90, result.Training.Sizes[i]);
            }

            for (int i = 0; i < result.Validation.Sizes.Length; i++)
            {
                Assert.AreEqual(10, result.Validation.Sizes[i]);
            }
        }
Пример #16
0
        private void trainingC45lib()
        {
            Accord.Math.Random.Generator.Seed = 0;
            c45Learning = new C45Learning()
            {
                Join      = 2,
                MaxHeight = 5
            };
            int size = trainingSets.Count;

            double[][] inputs1  = new double[size][];
            int[]      outputs1 = new int[size];
            int        i        = 0;

            foreach (Patient patient in trainingSets)
            {
                double[] aux = new double[9];
                for (int j = 1; j <= 9; j++)
                {
                    if (j == 1)
                    {
                        aux[j - 1] = patient.get(j) < 30 ? 0 : patient.get(j) < 60 ? 1 : 2;
                    }
                    else
                    {
                        aux[j - 1] = patient.get(j);
                    }
                }
                inputs1[i]  = aux;
                outputs1[i] = patient.get(10);
                i++;
            }

            var crossValidation = CrossValidation.Create(

                k: 5,

                learner: (p) => new C45Learning()
            {
                Join      = 2,
                MaxHeight = 5
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                x: inputs1, y: outputs1
                );

            decisionTreeLib = c45Learning.Learn(inputs1, outputs1);
            var result = crossValidation.Learn(inputs1, outputs1);

            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs1, outputs1);

            accuracyC45lib = Math.Round(gcm.Accuracy, 3);
        }
Пример #17
0
        double CrossValidate(int folds)
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var sut         = new CrossValidation <double>(new RandomIndexSampler <double>(42), folds);
            var predictions = sut.CrossValidate(new RegressionDecisionTreeLearner(), observations, targets);
            var metric      = new MeanSquaredErrorRegressionMetric();

            return(metric.Error(targets, predictions));
        }
        public override Task <List <GeneralConfusionMatrix> > ComputeFoldedConfusionMatrixAsync(ClassificationModel classificationModel, int folds)
        {
            return(Task.Factory.StartNew(() =>
            {
                int numFeatures = classificationModel.FeatureVectors.Count;
                DecisionVariable[] decisionVariables = Enumerable.ToArray(classificationModel.Bands.Select(b => DecisionVariable.Continuous(b.ToString())));

                double[][] input = new double[numFeatures][];
                int[] responses = new int[numFeatures];

                for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex)
                {
                    var featureVector = classificationModel.FeatureVectors[featureIndex];

                    input[featureIndex] = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue);
                    responses[featureIndex] = featureVector.FeatureClass;
                }

                List <GeneralConfusionMatrix> confusionMatrices = new List <GeneralConfusionMatrix>();

                // Create a new Cross-validation algorithm passing the data set size and the number of folds
                var crossvalidation = new CrossValidation(input.Length, folds);

                crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
                {
                    // Lets now grab the training data:
                    var trainingInputs = input.Get(indicesTrain);
                    var trainingOutputs = responses.Get(indicesTrain);

                    // And now the validation data:
                    var validationInputs = input.Get(indicesValidation);
                    var validationOutputs = responses.Get(indicesValidation);

                    var tree = new DecisionTree(decisionVariables, Enum.GetValues(typeof(LandcoverTypeViewModel)).Length);
                    C45Learning id3Learning = new C45Learning(tree);
                    id3Learning.Learn(trainingInputs, trainingOutputs);

                    var predictedTraining = tree.Decide(trainingInputs);
                    var predictedValidation = tree.Decide(validationInputs);

                    double trainingError = new ZeroOneLoss(trainingOutputs).Loss(predictedTraining);
                    double validationError = new ZeroOneLoss(validationOutputs).Loss(predictedValidation);

                    GeneralConfusionMatrix confusionMatrix = new GeneralConfusionMatrix(Enum.GetValues(typeof(LandcoverTypeViewModel)).Length - 1, validationOutputs, predictedValidation);
                    confusionMatrices.Add(confusionMatrix);

                    // Return a new information structure containing the model and the errors achieved.
                    return new CrossValidationValues(trainingError, validationError);
                };

                var result = crossvalidation.Compute();

                return confusionMatrices;
            }));
        }
Пример #19
0
 protected void RunExperiment(Classifier.Classifier classifier, Parameter.Parameter parameter,
                              ExperimentPerformance experimentPerformance, CrossValidation <Instance.Instance> crossValidation,
                              InstanceList.InstanceList testSet)
 {
     for (var i = 0; i < K; i++)
     {
         var trainSet = new InstanceList.InstanceList(crossValidation.GetTrainFold(i));
         classifier.Train(trainSet, parameter);
         experimentPerformance.Add(classifier.Test(testSet));
     }
 }
Пример #20
0
        public void SplittingsTest()
        {
            int[] folds = CrossValidation.Splittings(100, 10);

            for (int i = 0; i < 10; i++)
            {
                int actual   = folds.Count(x => x == i);
                int expected = 10;

                Assert.AreEqual(expected, actual);
            }
        }
Пример #21
0
        public void NotEnoughSamplesTest1()
        {
            Accord.Math.Random.Generator.Seed = 0;

            int[] labels = Matrix.Vector(10, 1).Concatenate(Matrix.Vector(30, 0));

            Vector.Shuffle(labels);

            var crossvalidation = new CrossValidation <MulticlassSupportVectorMachine>(size: 40, folds: 10)
            {
                RunInParallel = false,

                Fitting = (int index, int[] indicesTrain, int[] indicesValidation) =>
                {
                    var labelsValidation = labels.Submatrix(indicesValidation);
                    int countValidation  = labelsValidation.Count(x => x == 1);
                    Assert.AreEqual(2, countValidation);

                    var labelsTraining = labels.Submatrix(indicesTrain);
                    int countTraining  = labelsTraining.Count(x => x == 1);
                    Assert.AreEqual(9 * 2, countTraining);

                    return(new CrossValidationValues <MulticlassSupportVectorMachine>(null, 0, 0));
                }
            };

            bool thrown = false;

            try { crossvalidation.Compute(); }
            catch (Exception) { thrown = true; }
            Assert.IsTrue(thrown);

            crossvalidation = new CrossValidation <MulticlassSupportVectorMachine>(labels, 2, folds: 10)
            {
                RunInParallel = false,

                Fitting = (int index, int[] indicesTrain, int[] indicesValidation) =>
                {
                    var labelsValidation = labels.Submatrix(indicesValidation);
                    int countValidation  = labelsValidation.Count(x => x == 1);
                    Assert.AreEqual(1, countValidation);

                    var labelsTraining = labels.Submatrix(indicesTrain);
                    int countTraining  = labelsTraining.Count(x => x == 1);
                    Assert.AreEqual(9, countTraining);

                    return(new CrossValidationValues <MulticlassSupportVectorMachine>(null, 0, 0));
                }
            };

            crossvalidation.Compute();
        }
Пример #22
0
        static public int [] MultiNomialLogRegressionLowerBoundNewtonRaphson(double [][] input1, int[] labels, string SaveFile)
        {
            // http://accord-framework.net/docs/html/T_Accord_Statistics_Models_Regression_MultinomialLogisticRegression.htm
            // Create a estimation algorithm to estimate the regression
            LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson()
            {
                MaxIterations = 10,
                Tolerance     = 1e-6
            };
            // *******************************************************************************
            var cv = CrossValidation.Create(

                k: 10,     // We will be using 10-fold cross validation

                // First we define the learning algorithm:
                learner: (p) => new LowerBoundNewtonRaphson(),

                // Now we have to specify how the n.b. performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teach, x, y, w) => teach.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input1, y: labels
                );
            // Genrate a cross validation of the data
            var cvresult = cv.Learn(input1, labels);



            // iteratively estimate the  model
            MultinomialLogisticRegression mlr = lbnr.Learn(input1, labels);

            // Generate statistics from confusion matrices
            ConfusionMatrix        cm  = ConfusionMatrix.Estimate(mlr, input1, labels);
            GeneralConfusionMatrix gcm = cvresult.ToConfusionMatrix(input1, labels);

            Funcs.Utility.OutPutStats(cvresult.NumberOfSamples, cvresult.NumberOfInputs,
                                      cvresult.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore);

            // We can compute the model answers
            int[]  answers       = mlr.Decide(input1);
            string modelsavefile = SaveFile.Replace(".csv", ".MLR.save");

            mlr.Save(modelsavefile, compression: SerializerCompression.None);

            return(answers);
        }
        double CrossValidate(int folds)
        {
            var targetName   = "T";
            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows(v => !v.Contains(targetName)).ToF64Matrix();
            var targets      = parser.EnumerateRows(targetName).ToF64Vector();

            var sut         = new CrossValidation <double>(new RandomIndexSampler <double>(42), folds);
            var predictions = sut.CrossValidate(new RegressionDecisionTreeLearner(), observations, targets);
            var metric      = new MeanSquaredErrorRegressionMetric();

            return(metric.Error(targets, predictions));
        }
Пример #24
0
        public override Task <List <GeneralConfusionMatrix> > ComputeFoldedConfusionMatrixAsync(ClassificationModel classificationModel, int folds)
        {
            return(Task.Factory.StartNew(() =>
            {
                int numFeatures = classificationModel.FeatureVectors.Count;

                double[][] input = new double[numFeatures][];
                int[] responses = new int[numFeatures];

                for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex)
                {
                    var featureVector = classificationModel.FeatureVectors[featureIndex];

                    input[featureIndex] = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue);
                    responses[featureIndex] = featureVector.FeatureClass;
                }

                List <GeneralConfusionMatrix> confusionMatrices = new List <GeneralConfusionMatrix>();

                // Create a new Cross-validation algorithm passing the data set size and the number of folds
                var crossvalidation = new CrossValidation(input.Length, folds);

                crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
                {
                    // Lets now grab the training data:
                    var trainingInputs = input.Get(indicesTrain);
                    var trainingOutputs = responses.Get(indicesTrain);

                    // And now the validation data:
                    var validationInputs = input.Get(indicesValidation);
                    var validationOutputs = responses.Get(indicesValidation);

                    int[] predictedTraining;
                    int[] predictedValidation;
                    TrainAndPredict(Complexity, Gamma, Degree, trainingInputs, trainingOutputs, validationInputs, out predictedTraining, out predictedValidation);

                    double trainingError = new ZeroOneLoss(trainingOutputs).Loss(predictedTraining);
                    double validationError = new ZeroOneLoss(validationOutputs).Loss(predictedValidation);

                    GeneralConfusionMatrix confusionMatrix = new GeneralConfusionMatrix(classificationModel.LandCoverTypes.Count, validationOutputs, predictedValidation);
                    confusionMatrices.Add(confusionMatrix);

                    // Return a new information structure containing the model and the errors achieved.
                    return new CrossValidationValues(trainingError, validationError);
                };

                crossvalidation.Compute();

                return confusionMatrices;
            }));
        }
Пример #25
0
        public void FittingTest()
        {

            int[] folds = CrossValidation.Splittings(100, 10);

            int[] samples = Matrix.Indices(0, 100);

            CrossValidation val = new CrossValidation(folds, 10);

            val.RunInParallel = false;

            int current = 0;
            val.Fitting = (k, trainingSamples, validationSamples) =>
            {
                Assert.AreEqual(current, k);
                Assert.AreEqual(90, trainingSamples.Length);
                Assert.AreEqual(10, validationSamples.Length);

                int[] trainingSet = samples.Submatrix(trainingSamples);
                int[] validationSet = samples.Submatrix(validationSamples);

                for (int i = 0; i < trainingSet.Length; i++)
                    Assert.AreEqual(samples[trainingSamples[i]], trainingSet[i]);

                for (int i = 0; i < validationSet.Length; i++)
                    Assert.AreEqual(samples[validationSamples[i]], validationSet[i]);

                current++;

                return new CrossValidationValues<object>(new object(), k, 2 * k);
            };

            var result = val.Compute();

            Assert.AreEqual(10, current);
            Assert.AreEqual(4.5, result.Training.Mean);
            Assert.AreEqual(9.0, result.Validation.Mean);
            Assert.AreEqual(
                2 * result.Training.StandardDeviation,      
                result.Validation.StandardDeviation);

            Assert.AreEqual(val.Folds.Length, result.Training.Sizes.Length);
            Assert.AreEqual(val.Folds.Length, result.Validation.Sizes.Length);

            for (int i = 0; i < result.Training.Sizes.Length; i++)
                Assert.AreEqual(90, result.Training.Sizes[i]);

            for (int i = 0; i < result.Validation.Sizes.Length; i++)
                Assert.AreEqual(10, result.Validation.Sizes[i]);
        }
Пример #26
0
        static public int[] MultiNomialLogisticRegressionBFGS(double [][] input, int [] labels, string fName)
        {
            /* The L-BFGS algorithm is a member of the broad family of quasi-Newton optimization methods.
             * L-BFGS stands for 'Limited memory BFGS'. Indeed, L-BFGS uses a limited memory variation of
             * the Broyden–Fletcher–Goldfarb–Shanno (BFGS) update to approximate the inverse Hessian matrix
             * (denoted by Hk). Unlike the original BFGS method which stores a dense approximation, L-BFGS
             * stores only a few vectors that represent the approximation implicitly. Due to its moderate
             * memory requirement, L-BFGS method is particularly well suited for optimization problems with
             * a large number of variables.
             */

            // Create a lbfgs model
            var mlbfgs = new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>();

            // Estimate using the data against a logistic regression
            MultinomialLogisticRegression mlr = mlbfgs.Learn(input, labels);

            //
            // Create a cross validation model derived from the training set to measure the performance of this
            // predictive model and estimate how well we expect the model will generalize. The algorithm executes
            // multiple rounds of cross validation on different partitions and averages the results.
            //
            int folds = 4; // could play around with this later
            var cv    = CrossValidation.Create(k: folds, learner: (p) => new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>(),
                                               loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                                               fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                                               x: input, y: labels);
            var result = cv.Learn(input, labels);
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, labels);
            ConfusionMatrix        cm  = ConfusionMatrix.Estimate(mlr, input, labels);

            //
            //output relevant statistics
            //
            Funcs.Utility.OutPutStats(result.NumberOfSamples, result.NumberOfInputs,
                                      result.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore);

            // Compute the model predictions and return the values
            int[] answers = mlr.Decide(input);

            // And also the probability of each of the answers
            double[][] probabilities = mlr.Probabilities(input);

            // Now we can check how good our model is at predicting
            double error = new Accord.Math.Optimization.Losses.ZeroOneLoss(labels).Loss(answers);

            mlr.Save(fName, compression: SerializerCompression.None);

            return(answers);
        }
Пример #27
0
        private void button1_Click(object sender, EventArgs e)
        {
            // Creates a matrix from the source data table
            double[,] sourceMatrix = (dgvLearningSource.DataSource as DataTable).ToMatrix(out sourceColumns);

            // Get only the input vector values
            var inputs = sourceMatrix.Submatrix(0, sourceMatrix.GetLength(0) - 1, 0, 1).ToArray();

            // Get only the label outputs
            var outputs = new int[sourceMatrix.GetLength(0)];

            for (int i = 0; i < outputs.Length; i++)
            {
                outputs[i] = (int)sourceMatrix[i, 2];
            }

            var cv = new CrossValidation <KernelSupportVectorMachine>(inputs.Length, 10);

            cv.Fitting = (int k, int[] training, int[] testing) =>
            {
                var trainingInputs  = inputs.Submatrix(training);
                var trainingOutputs = outputs.Submatrix(training);
                var testingInputs   = inputs.Submatrix(testing);
                var testingOutputs  = outputs.Submatrix(testing);

                // Create the specified Kernel
                IKernel kernel = getKernel();


                // Creates the Support Vector Machine using the selected kernel
                var svm = new KernelSupportVectorMachine(kernel, 2);

                // Creates a new instance of the SMO Learning Algortihm
                var smo = new SequentialMinimalOptimization(svm, trainingInputs, trainingOutputs);

                // Set learning parameters
                smo.Complexity = (double)numC.Value;
                smo.Tolerance  = (double)numT.Value;

                // Run
                double trainingError   = smo.Run();
                double validationError = smo.ComputeError(testingInputs, testingOutputs);

                return(new CrossValidationValues <KernelSupportVectorMachine>(svm, trainingError, validationError));
            };

            var result = cv.Compute();
        }
Пример #28
0
        private void ClassifyDataByNaiveBayes(int numOfFolds = 3, int minOccurences = 1)
        {
            CalcInputAndOutputVariables(minOccurences);

            var cvNaiveBayesClassifier = CrossValidation.Create(
                k: numOfFolds,
                learner: p => new NaiveBayesLearning <BernoulliDistribution>(),
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: InputVariables,
                y: OutputVariables
                );

            // Run Cross-Validation
            Result = cvNaiveBayesClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>;
        }
        public static void Main(string[] args)
        {
            // create data set from csv file
            MultiLayerPerceptron neuralNet = (MultiLayerPerceptron)NeuralNetwork.createFromFile("irisNet.nnet");
            DataSet dataSet = DataSet.createFromFile("data_sets/iris_data_normalised.txt", 4, 3, ",");

            string[] classNames = new string[] { "Virginica", "Setosa", "Versicolor" };

            CrossValidation crossval = new CrossValidation(neuralNet, dataSet, 5);

            crossval.addEvaluator(new ClassifierEvaluator.MultiClass(classNames));

            crossval.run();
            CrossValidationResult results = crossval.Result;

            Console.WriteLine(results);
        }
        public double Accuracy()
        {
            // Let's say we want to measure the cross-validation performance of
            // a decision tree with a maximum tree height of 6 and where variables
            // are able to join the decision path at most 1 times during evaluation:
            var cv = CrossValidation.Create(

                k: 5,                             // We will be using 5-fold cross validation

                learner: (p) => new ID3Learning() // here we create the learning algorithm
            {
                Join      = 1,
                MaxHeight = 0
            },

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                // Now we have to specify how the tree performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: inputs, y: outputs
                );

            // After the cross-validation object has been created,
            // we can call its .Learn method with the input and
            // output data that will be partitioned into the folds:
            var result = cv.Learn(inputs, outputs);

            // We can grab some information about the problem:
            int numberOfSamples = result.NumberOfSamples; // should be 1000
            int numberOfInputs  = result.NumberOfInputs;  // should be 4
            int numberOfOutputs = result.NumberOfOutputs; // should be 6

            double trainingError   = result.Training.Mean;
            double validationError = result.Validation.Mean;

            // If desired, compute an aggregate confusion matrix for the validation sets:
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputs, outputs);

            return(gcm.Accuracy * 100);
        }
Пример #31
0
        /// <param name="dataSet"> training set used for error estimation </param>
        /// <returns> neural network model with optimized architecture for provided data set </returns>
        public virtual NeuralNetwork createOptimalModel(DataSet dataSet)
        {
            List <int> neurons = new List <int>();

            neurons.Add(minNeuronsPerLayer);
            findArchitectures(1, minNeuronsPerLayer, neurons);

            LOG.info("Total [{}] different network topologies found", allArchitectures.Count);

            foreach (List <int> architecture in allArchitectures)
            {
                architecture.Insert(0, dataSet.InputSize);
                architecture.Add(dataSet.OutputSize);

                LOG.info("Architecture: [{}]", architecture);

                MultiLayerPerceptron network  = new MultiLayerPerceptron(architecture);
                LearningListener     listener = new LearningListener(10, learningRule.MaxIterations);
                learningRule.addListener(listener);
                network.LearningRule = learningRule;

                errorEstimationMethod = new CrossValidation(network, dataSet, 10);
                errorEstimationMethod.run();
                // FIX
                var evaluator = errorEstimationMethod.getEvaluator <ClassifierEvaluator.MultiClass>(typeof(ClassifierEvaluator.MultiClass));

                ClassificationMetrics[] result = ClassificationMetrics.createFromMatrix(evaluator.Result);

                // nadji onaj sa najmanjim f measure
                if (optimalResult == null || optimalResult.FMeasure < result[0].FMeasure)
                {
                    LOG.info("Architecture [{}] became optimal architecture  with metrics {}", architecture, result);
                    optimalResult      = result[0];
                    optimalClassifier  = network;
                    optimalArchitecure = architecture;
                }

                LOG.info("#################################################################");
            }


            LOG.info("Optimal Architecture: {}", optimalArchitecure);
            return(optimalClassifier);
        }
Пример #32
0
        private void ClassifyDataByLogisticRegression(int numOfFolds = 3, int minOccurences = 1, int maxIterations = 100)
        {
            CalcInputAndOutputVariables(minOccurences);

            var cvLogisticRegressionClassifier = CrossValidation.Create(
                k: numOfFolds,
                learner: (p) => new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                MaxIterations  = 100,
                Regularization = 1e-6
            },
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                x: InputVariables,
                y: OutputVariables
                );

            // Run Cross-Validation
            Result = cvLogisticRegressionClassifier.Learn(InputVariables, OutputVariables) as CrossValidationResult <TModel, double[], int>;
        }
Пример #33
0
        public void CrossValidation(string name, double minAccuracy, int folds, params IEnumerable<Sentence>[] sentences)
        {
            CrossValidation<LuMiiTagger> evaluation = new CrossValidation<LuMiiTagger>();
            evaluation.Folds = folds;
            evaluation.Randomize = true;
            evaluation.RandomSeed = 1;
            foreach (Sentence[] s in sentences)
                evaluation.Sentences.AddRange(s);

            Assert.Greater(evaluation.Sentences.Count, 0);

            var results = evaluation.Evaluate();

            Debug.WriteLine("{0}-fold cross validation for {1}", evaluation.Folds, name);
            Debug.WriteLine("{0} sentences, {1} tokens", evaluation.Sentences.Count, evaluation.Sentences.SelectMany(t => t).Count());
            Debug.WriteLine("Mean: {0:0.00}% [{1:0.00}..{2:0.00} @ 99%]", results.Mean, results.ConfidenceIntervalAt99.Lower, results.ConfidenceIntervalAt99.Upper);
            foreach (var fold in results.OrderBy(f => f.Fold))
                Debug.WriteLine("Fold {0}: {1:0.00}%", fold.Fold, fold.CorrectPercentage);
            Debug.WriteLine("Duration: {0}", results.Duration);

            Assert.Greater(results.Mean, minAccuracy < 1 ? minAccuracy * 100 : minAccuracy);
            Assert.Less(results.Mean, 0.97 * 100);
        }
Пример #34
0
 internal static HandleRef getCPtr(CrossValidation obj) {
   return (obj == null) ? new HandleRef(null, IntPtr.Zero) : obj.swigCPtr;
 }
Пример #35
0
        private void button1_Click(object sender, EventArgs e)
        {
            // Creates a matrix from the source data table
            double[,] sourceMatrix = (dgvLearningSource.DataSource as DataTable).ToMatrix(out sourceColumns);

            // Get only the input vector values
            var inputs = sourceMatrix.Submatrix(0, sourceMatrix.GetLength(0) - 1, 0, 1).ToArray();

            // Get only the label outputs
            var outputs = new int[sourceMatrix.GetLength(0)];
            for (int i = 0; i < outputs.Length; i++)
                outputs[i] = (int)sourceMatrix[i, 2];

            var cv = new CrossValidation<KernelSupportVectorMachine>(inputs.Length, 10);
            cv.Fitting = (int k, int[] training, int[] testing) =>
            {
                var trainingInputs = inputs.Submatrix(training);
                var trainingOutputs = outputs.Submatrix(training);
                var testingInputs = inputs.Submatrix(testing);
                var testingOutputs = outputs.Submatrix(testing);

                // Create the specified Kernel
                IKernel kernel = getKernel();


                // Creates the Support Vector Machine using the selected kernel
                var svm = new KernelSupportVectorMachine(kernel, 2);

                // Creates a new instance of the SMO Learning Algortihm
                var smo = new SequentialMinimalOptimization(svm, trainingInputs, trainingOutputs);

                // Set learning parameters
                smo.Complexity = (double)numC.Value;
                smo.Tolerance = (double)numT.Value;

                // Run
                double trainingError = smo.Run();
                double validationError = smo.ComputeError(testingInputs, testingOutputs);

                return new CrossValidationValues<KernelSupportVectorMachine>(svm, trainingError, validationError);

            };

            var result = cv.Compute();

        }
Пример #36
0
        public void NotEnoughSamplesTest1()
        {
            Accord.Math.Random.Generator.Seed = 0;

            int[] labels = Matrix.Vector(10, 1).Concatenate(Matrix.Vector(30, 0));

            Vector.Shuffle(labels);

            var crossvalidation = new CrossValidation<MulticlassSupportVectorMachine>(size: 40, folds: 10)
            {
                RunInParallel = false,

                Fitting = (int index, int[] indicesTrain, int[] indicesValidation) =>
                {
                    var labelsValidation = labels.Submatrix(indicesValidation);
                    int countValidation = labelsValidation.Count(x => x == 1);
                    Assert.AreEqual(2, countValidation);

                    var labelsTraining = labels.Submatrix(indicesTrain);
                    int countTraining = labelsTraining.Count(x => x == 1);
                    Assert.AreEqual(9 * 2, countTraining);

                    return new CrossValidationValues<MulticlassSupportVectorMachine>(null, 0, 0);
                }
            };

            bool thrown = false;
            try { crossvalidation.Compute(); }
            catch (Exception) { thrown = true; }
            Assert.IsTrue(thrown);

            crossvalidation = new CrossValidation<MulticlassSupportVectorMachine>(labels, 2, folds: 10)
            {
                RunInParallel = false,

                Fitting = (int index, int[] indicesTrain, int[] indicesValidation) =>
                {
                    var labelsValidation = labels.Submatrix(indicesValidation);
                    int countValidation = labelsValidation.Count(x => x == 1);
                    Assert.AreEqual(1, countValidation);

                    var labelsTraining = labels.Submatrix(indicesTrain);
                    int countTraining = labelsTraining.Count(x => x == 1);
                    Assert.AreEqual(9, countTraining);

                    return new CrossValidationValues<MulticlassSupportVectorMachine>(null, 0, 0);
                }
            };

            crossvalidation.Compute();
        }
Пример #37
0
        private void Test_Load(object sender, EventArgs e)
        {
            // TODO: This line of code loads data into the 'diabetesDataSetB.ContinuousData' table. You can move, or remove it, as needed.
            this.continuousDataTableAdapter.Fill(this.diabetesDataSetB.ContinuousData);

            // This is a sample code on how to use Cross-Validation 
            // to access the performance of Support Vector Machines. 

            // Consider the example binary data. We will be trying 
            // to learn a XOR problem and see how well does SVMs 
            // perform on this data. 

            double[][] data =
{
    new double[] { -1, -1 }, new double[] {  1, -1 },
    new double[] { -1,  1 }, new double[] {  1,  1 },
    new double[] { -1, -1 }, new double[] {  1, -1 },
    new double[] { -1,  1 }, new double[] {  1,  1 },
    new double[] { -1, -1 }, new double[] {  1, -1 },
    new double[] { -1,  1 }, new double[] {  1,  1 },
    new double[] { -1, -1 }, new double[] {  1, -1 },
    new double[] { -1,  1 }, new double[] {  1,  1 },
};

            int[] xor = // result of xor for the sample input data
{
    -1,       1,
     1,      -1,
    -1,       1,
     1,      -1,
    -1,       1,
     1,      -1,
    -1,       1,
     1,      -1,
};


            // Create a new Cross-validation algorithm passing the data set size and the number of folds 
            var crossvalidation = new CrossValidation<KernelSupportVectorMachine>(size: data.Length, folds: 3);

            // Define a fitting function using Support Vector Machines. The objective of this 
            // function is to learn a SVM in the subset of the data dicted by cross-validation.

            crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
            {
                // The fitting function is passing the indices of the original set which 
                // should be considered training data and the indices of the original set 
                // which should be considered validation data. 

                // Lets now grab the training data: 
                var trainingInputs = data.Submatrix(indicesTrain);
                var trainingOutputs = xor.Submatrix(indicesTrain);

                // And now the validation data: 
                var validationInputs = data.Submatrix(indicesValidation);
                var validationOutputs = xor.Submatrix(indicesValidation);


                // Create a Kernel Support Vector Machine to operate on the set 
                var svm = new KernelSupportVectorMachine(new Polynomial(2), 2);

                // Create a training algorithm and learn the training data 
                var smo = new SequentialMinimalOptimization(svm, trainingInputs, trainingOutputs);

                double trainingError = smo.Run();

                // Now we can compute the validation error on the validation data: 
                double validationError = smo.ComputeError(validationInputs, validationOutputs);

                // Return a new information structure containing the model and the errors achieved. 
                return new CrossValidationValues<KernelSupportVectorMachine>(svm, trainingError, validationError);
            };

            //crossvalidation.CreatePartitions(2, data,out xor);

            // Compute the cross-validation 
            var result = crossvalidation.Compute();

            // Finally, access the measured performance. 
            double trainingErrors = result.Training.Mean;
            double validationErrors = result.Validation.Mean;



        }
Пример #38
0
        public void CrossvalidationConstructorTest2()
        {

            Accord.Math.Tools.SetupGenerator(0);

            // This is a sample code on how to use Cross-Validation
            // to assess the performance of Hidden Markov Models.

            // Declare some testing data
            int[][] inputs = new int[][]
            {
                new int[] { 0,1,1,0 },   // Class 0
                new int[] { 0,0,1,0 },   // Class 0
                new int[] { 0,1,1,1,0 }, // Class 0
                new int[] { 0,1,1,1,0 }, // Class 0
                new int[] { 0,1,1,0 },   // Class 0
                new int[] { 0,1,1,1,0 }, // Class 0
                new int[] { 0,1,1,1,0 }, // Class 0
                new int[] { 0,1,0,1,0 }, // Class 0
                new int[] { 0,1,0 },     // Class 0
                new int[] { 0,1,1,0 },   // Class 0

                new int[] { 1,0,0,1 },   // Class 1
                new int[] { 1,1,0,1 },   // Class 1
                new int[] { 1,0,0,0,1 }, // Class 1
                new int[] { 1,0,1 },     // Class 1
                new int[] { 1,1,0,1 },   // Class 1
                new int[] { 1,0,1 },     // Class 1
                new int[] { 1,0,0,1 },   // Class 1
                new int[] { 1,0,0,0,1 }, // Class 1
                new int[] { 1,0,1 },     // Class 1
                new int[] { 1,0,0,0,1 }, // Class 1
            };

            int[] outputs = new int[]
            {
                0,0,0,0,0,0,0,0,0,0, // First 10 sequences are of class 0
                1,1,1,1,1,1,1,1,1,1, // Last 10 sequences are of class 1
            };



            // Create a new Cross-validation algorithm passing the data set size and the number of folds
            var crossvalidation = new CrossValidation<HiddenMarkovClassifier>(size: inputs.Length, folds: 3);

            // Define a fitting function using Support Vector Machines. The objective of this
            // function is to learn a SVM in the subset of the data indicated by cross-validation.

            crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
            {
                // The fitting function is passing the indices of the original set which
                // should be considered training data and the indices of the original set
                // which should be considered validation data.

                // Lets now grab the training data:
                var trainingInputs = inputs.Submatrix(indicesTrain);
                var trainingOutputs = outputs.Submatrix(indicesTrain);

                // And now the validation data:
                var validationInputs = inputs.Submatrix(indicesValidation);
                var validationOutputs = outputs.Submatrix(indicesValidation);


                // We are trying to predict two different classes
                int classes = 2;

                // Each sequence may have up to two symbols (0 or 1)
                int symbols = 2;

                // Nested models will have two states each
                int[] states = new int[] { 2, 2 };

                // Creates a new Hidden Markov Model Classifier with the given parameters
                HiddenMarkovClassifier classifier = new HiddenMarkovClassifier(classes, states, symbols);


                // Create a new learning algorithm to train the sequence classifier
                var teacher = new HiddenMarkovClassifierLearning(classifier,

                    // Train each model until the log-likelihood changes less than 0.001
                    modelIndex => new BaumWelchLearning(classifier.Models[modelIndex])
                    {
                        Tolerance = 0.001,
                        Iterations = 0
                    }
                );

                // Train the sequence classifier using the algorithm
                double likelihood = teacher.Run(trainingInputs, trainingOutputs);

                double trainingError = teacher.ComputeError(trainingInputs, trainingOutputs);

                // Now we can compute the validation error on the validation data:
                double validationError = teacher.ComputeError(validationInputs, validationOutputs);

                // Return a new information structure containing the model and the errors achieved.
                return new CrossValidationValues<HiddenMarkovClassifier>(classifier, trainingError, validationError);
            };


            // Compute the cross-validation
            var result = crossvalidation.Compute();

            // Finally, access the measured performance.
            double trainingErrors = result.Training.Mean;
            double validationErrors = result.Validation.Mean;

            Assert.AreEqual(3, crossvalidation.K);
            Assert.AreEqual(0, result.Training.Mean);
            Assert.AreEqual(0.055555555555555552, result.Validation.Mean);

            Assert.AreEqual(3, crossvalidation.Folds.Length);
            Assert.AreEqual(3, result.Models.Length);
        }
Пример #39
0
        public void NotEnoughSamplesTest2()
        {
            Accord.Math.Tools.SetupGenerator(0);

            int[] labels = Matrix.Vector(10, 1).Concatenate(Matrix.Vector(30, 0));

            Accord.Statistics.Tools.Shuffle(labels);

            var crossvalidation = new CrossValidation<MulticlassSupportVectorMachine>(labels, 2, folds: 10)
            {
                RunInParallel = false,

                Fitting = (int index, int[] indicesTrain, int[] indicesValidation) =>
                {
                    var labelsValidation = labels.Submatrix(indicesValidation);
                    int countValidation = labelsValidation.Count(x => x == 1);
                    Assert.AreEqual(1, countValidation);

                    var labelsTraining = labels.Submatrix(indicesTrain);
                    int countTraining = labelsTraining.Count(x => x == 1);
                    Assert.AreEqual(9, countTraining);

                    return new CrossValidationValues<MulticlassSupportVectorMachine>(null, 0, 0);
                }
            };

            crossvalidation.Compute();
        }
Пример #40
0
        public void KNearestNeighbor_CrossValidation()
        {
            // Create some sample learning data. In this data,
            // the first two instances belong to a class, the
            // four next belong to another class and the last
            // three to yet another.

            double[][] inputs = 
            {
                // The first two are from class 0
                new double[] { -5, -2, -1 },
                new double[] { -5, -5, -6 },

                // The next four are from class 1
                new double[] {  2,  1,  1 },
                new double[] {  1,  1,  2 },
                new double[] {  1,  2,  2 },
                new double[] {  3,  1,  2 },

                // The last three are from class 2
                new double[] { 11,  5,  4 },
                new double[] { 15,  5,  6 },
                new double[] { 10,  5,  6 },
            };

            int[] outputs =
            {
                0, 0,        // First two from class 0
                1, 1, 1, 1,  // Next four from class 1
                2, 2, 2      // Last three from class 2
            };



            // Create a new Cross-validation algorithm passing the data set size and the number of folds
            var crossvalidation = new CrossValidation(size: inputs.Length, folds: 3);

            // Define a fitting function using Support Vector Machines. The objective of this
            // function is to learn a SVM in the subset of the data indicated by cross-validation.

            crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
            {
                // The fitting function is passing the indices of the original set which
                // should be considered training data and the indices of the original set
                // which should be considered validation data.

                // Lets now grab the training data:
                var trainingInputs = inputs.Submatrix(indicesTrain);
                var trainingOutputs = outputs.Submatrix(indicesTrain);

                // And now the validation data:
                var validationInputs = inputs.Submatrix(indicesValidation);
                var validationOutputs = outputs.Submatrix(indicesValidation);

                // Now we will create the K-Nearest Neighbors algorithm. For this
                // example, we will be choosing k = 4. This means that, for a given
                // instance, its nearest 4 neighbors will be used to cast a decision.
                KNearestNeighbors knn = new KNearestNeighbors(k: 4, classes: 3,
                    inputs: inputs, outputs: outputs);


                // After the algorithm has been created, we can classify instances:
                int[] train_predicted = trainingInputs.Apply(knn.Compute);
                int[] test_predicted = validationInputs.Apply(knn.Compute);

                // Compute classification error
                var cmTrain = new ConfusionMatrix(train_predicted, trainingOutputs);
                double trainingAcc = cmTrain.Accuracy;

                // Now we can compute the validation error on the validation data:
                var cmTest = new ConfusionMatrix(test_predicted, validationOutputs);
                double validationAcc = cmTest.Accuracy;

                // Return a new information structure containing the model and the errors achieved.
                return new CrossValidationValues(knn, trainingAcc, validationAcc);
            };


            // Compute the cross-validation
            var result = crossvalidation.Compute();

            // Finally, access the measured performance.
            double trainingAccs = result.Training.Mean;
            double validationAccs = result.Validation.Mean;


            Assert.AreEqual(1, trainingAccs);
            Assert.AreEqual(1, validationAccs);
        }
Пример #41
0
        public void CrossvalidationConstructorTest()
        {

            Accord.Math.Tools.SetupGenerator(0);

            // This is a sample code on how to use Cross-Validation
            // to assess the performance of Support Vector Machines.

            // Consider the example binary data. We will be trying
            // to learn a XOR problem and see how well does SVMs
            // perform on this data.

            double[][] data =
            {
                new double[] { -1, -1 }, new double[] {  1, -1 },
                new double[] { -1,  1 }, new double[] {  1,  1 },
                new double[] { -1, -1 }, new double[] {  1, -1 },
                new double[] { -1,  1 }, new double[] {  1,  1 },
                new double[] { -1, -1 }, new double[] {  1, -1 },
                new double[] { -1,  1 }, new double[] {  1,  1 },
                new double[] { -1, -1 }, new double[] {  1, -1 },
                new double[] { -1,  1 }, new double[] {  1,  1 },
            };

            int[] xor = // result of xor for the sample input data
            {
                -1,       1,
                 1,      -1,
                -1,       1,
                 1,      -1,
                -1,       1,
                 1,      -1,
                -1,       1,
                 1,      -1,
            };


            // Create a new Cross-validation algorithm passing the data set size and the number of folds
            var crossvalidation = new CrossValidation<KernelSupportVectorMachine>(size: data.Length, folds: 3);

            // Define a fitting function using Support Vector Machines. The objective of this
            // function is to learn a SVM in the subset of the data indicated by cross-validation.

            crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
            {
                // The fitting function is passing the indices of the original set which
                // should be considered training data and the indices of the original set
                // which should be considered validation data.

                // Lets now grab the training data:
                var trainingInputs = data.Submatrix(indicesTrain);
                var trainingOutputs = xor.Submatrix(indicesTrain);

                // And now the validation data:
                var validationInputs = data.Submatrix(indicesValidation);
                var validationOutputs = xor.Submatrix(indicesValidation);


                // Create a Kernel Support Vector Machine to operate on the set
                var svm = new KernelSupportVectorMachine(new Polynomial(2), 2);

                // Create a training algorithm and learn the training data
                var smo = new SequentialMinimalOptimization(svm, trainingInputs, trainingOutputs);

                double trainingError = smo.Run();

                // Now we can compute the validation error on the validation data:
                double validationError = smo.ComputeError(validationInputs, validationOutputs);

                // Return a new information structure containing the model and the errors achieved.
                return new CrossValidationValues<KernelSupportVectorMachine>(svm, trainingError, validationError);
            };


            // Compute the cross-validation
            var result = crossvalidation.Compute();

            // Finally, access the measured performance.
            double trainingErrors = result.Training.Mean;
            double validationErrors = result.Validation.Mean;

            Assert.AreEqual(3, crossvalidation.K);
            Assert.AreEqual(0, result.Training.Mean);
            Assert.AreEqual(0, result.Validation.Mean);

            Assert.AreEqual(3, crossvalidation.Folds.Length);
            Assert.AreEqual(3, result.Models.Length);
        }