Exemple #1
0
        /// <summary>
        /// Classify our data using random forest classifer and save the model.
        /// </summary>
        /// <param name="train_data">Frame objects that we will use to train classifers.</param>
        /// <param name="test_data">Frame objects that we will use to test classifers.</param>
        /// <param name="train_label">Labels of the train data.</param>
        /// <param name="test_label">Labels of the test data.</param>
        /// <param name="Classifier_Path">Path where we want to save the classifer on the disk.</param>
        /// <param name="Classifier_Name">Name of the classifer we wnat to save.</param>
        /// <param name="NumOfTrees">Number of trees used in Random forest classifer</param>
        /// <returns></returns>
        public void RandomForestLearning(double[][] train_data, double[][] test_data, int[] train_label, int[] test_label, String Classifier_Path, String Classifier_Name, int NumOfTrees = 20)
        {
            var teacher = new RandomForestLearning()
            {
                NumberOfTrees = NumOfTrees,
            };

            var forest = teacher.Learn(train_data, train_label);

            int[] predicted = forest.Decide(test_data);

            double error = new ZeroOneLoss(test_label).Loss(predicted);

            Console.WriteLine(error);

            forest.Save(Path.Combine(Classifier_Path, Classifier_Name));
        }
Exemple #2
0
        public void LearnTest()
        {
            double[][] inputs =
            {
                new double[] { -1, -1 },
                new double[] { -1,  1 },
                new double[] {  1, -1 },
                new double[] {  1,  1 }
            };

            int[] xor =
            {
                -1,
                1,
                1,
                -1
            };

            var kernel = new Polynomial(2, 0.0);

            double[][] augmented = new double[inputs.Length][];
            for (int i = 0; i < inputs.Length; i++)
            {
                augmented[i] = kernel.Transform(inputs[i]);
            }

            // Create the Least Squares Support Vector Machine teacher
            var learn = new StochasticGradientDescent()
            {
                LearningRate = 1e-3
            };

            // Run the learning algorithm
            var svm = learn.Learn(augmented, xor);

            bool[] predicted = svm.Decide(augmented);
            double error     = new ZeroOneLoss(xor).Loss(predicted);

            Assert.AreEqual(0, error);

            int[] output = augmented.Apply(p => Math.Sign(svm.Compute(p)));
            for (int i = 0; i < output.Length; i++)
            {
                Assert.AreEqual(System.Math.Sign(xor[i]), System.Math.Sign(output[i]));
            }
        }
Exemple #3
0
        public void Learn_Clicked(object sender, EventArgs args)
        {
            Task.Factory.StartNew(() =>
            {
                var bow = CreateBow();
                foreach (var image in Images)
                {
                    TrainingData.Add(GetData(image, bow));
                }

                var kernel  = new Polynomial();
                var teacher = new MulticlassSupportVectorLearning <IKernel>()
                {
                    Kernel  = kernel,
                    Learner = (param) => new SequentialMinimalOptimization <IKernel>()
                    {
                        Kernel = kernel
                    }
                };

                var svm   = teacher.Learn(TrainingData.ToArray(), Tags.ToArray());
                var error = new ZeroOneLoss(Tags.ToArray()).Loss(svm.Decide(TrainingData.ToArray()));
                Error.Dispatcher.Invoke(() => Error.Text = error.ToString());

                //var kernel = new Polynomial(16, 5);
                //var complexity = CalculateComplexity(kernel);
                //var ml = new MulticlassSupportVectorLearning<IKernel>()
                //{
                //    Learner = (param) => new SequentialMinimalOptimization<IKernel>()
                //    {
                //        Complexity = complexity,
                //        Kernel = kernel
                //    }
                //};

                //machine = ml.Learn(TrainingData.ToArray(), Tags.ToArray());
                //var result = machine.Decide(TrainingData.ToArray());
                //var error = new ZeroOneLoss(Tags.ToArray())
                //{
                //    Mean = true
                //}.Loss(result);

                //Error.Dispatcher.Invoke(() => Error.Text = error.ToString());
            });
        }
    // Use this for initialization
    void Start()
    {
        // In this example, we will learn a decision tree directly from integer
        // matrices that define the inputs and outputs of our learning problem.

        int[][] inputs =                // Tabela de valores lógicos (1 é verdadeiro e 0 é falso)
        {
            new int[] { 1, 0 },
            new int[] { 0, 1 },
            new int[] { 0, 0 },
            new int[] { 1, 1 },
        };

        int[] outputs =                 // Operação AND
        {
            0, 0, 0, 1
        };

        int[][] exampleData =
        {
            new int[] { 1, 1 },
            new int[] { 0, 0 },
            new int[] { 1, 0 },
            new int[] { 0, 1 },
        };

        // Create an ID3 learning algorithm
        ID3Learning teacher = new ID3Learning();

        // Learn a decision tree for the XOR problem
        var tree = teacher.Learn(inputs, outputs);

        // Compute the error in the learning
        double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

        Debug.Log("Houve erro?" + error);

        // The tree can now be queried for new examples:
        int[] predicted = tree.Decide(exampleData);         // A saída será { 1, 0, 0, 0 }

        for (int i = 0; i < predicted.Length; i++)
        {
            Debug.Log(predicted[i]);
        }
    }
Exemple #5
0
        public void TreeLearning()
        {
            Console.WriteLine("SottoProgramma chiamato: TreeLearning.");
            this.clock = DateTime.Now;

            var teacher = new C45Learning();

            DecisionTree TreeAlgorithm = teacher.Learn(DataSets[1].ItemsFeatures, DataSets[1].CatIDs);

            int[] predicted = TreeAlgorithm.Decide(DataSets[0].ItemsFeatures);

            double error = new ZeroOneLoss(DataSets[0].CatIDs).Loss(predicted);

            PrintReport(predicted, error, "Tree");

            Console.WriteLine("SottoProgramma TreeLearning terminato.\nErrore: {0}", error);
            Console.WriteLine("Tempo richiesto per l'operazione: " + (DateTime.Now - clock).TotalSeconds + " secondi.");
        }
Exemple #6
0
        public void kaggle_digits_with_compress()
        {
            string root       = Environment.CurrentDirectory;
            var    training   = Properties.Resources.trainingsample;
            var    validation = Properties.Resources.validationsample;

            var tset         = readData(training);
            var observations = tset.Item1;
            var labels       = tset.Item2;

            var teacher = new MulticlassSupportVectorLearning <Linear>();

            var svm = teacher.Learn(observations, labels);

            Assert.AreEqual(50, svm.Models[0][0].SupportVectors.Length);
            Assert.AreEqual(127, svm.Models[1][0].SupportVectors.Length);
            svm.Compress();
            Assert.AreEqual(1, svm.Models[0][0].SupportVectors.Length);
            Assert.AreEqual(1, svm.Models[1][0].SupportVectors.Length);

            {
                var trainingLoss = new ZeroOneLoss(labels)
                {
                    Mean = true
                };

                double error = trainingLoss.Loss(svm.Decide(observations));
                Assert.AreEqual(0.054, error);
            }

            {
                var vset             = readData(validation);
                var validationData   = vset.Item1;
                var validationLabels = vset.Item2;

                var validationLoss = new ZeroOneLoss(validationLabels)
                {
                    Mean = true
                };

                double val = validationLoss.Loss(svm.Decide(validationData));
                Assert.AreEqual(0.082, val);
            }
        }
Exemple #7
0
        private static void initDecisionTreeModel()
        {
            dtStatic.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");
            dtStatic.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            dtStatic.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            dtStatic.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            dtStatic.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            dtStatic.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            dtStatic.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            dtStatic.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            dtStatic.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            dtStatic.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            dtStatic.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            dtStatic.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            dtStatic.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            dtStatic.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            dtStatic.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");
            dtStatic.Rows.Add("D15", "Rain", "Cool", "High", "Strong", "No");
            dtStatic.Rows.Add("D16", "Rain", "Hot", "High", "Strong", "Yes");
            dtStatic.Rows.Add("D17", "Rain", "Hot", "High", "Weak", "Yes");
            dtStatic.Rows.Add("D18", "Rain", "Cool", "High", "Weak", "No");
            dtStatic.Rows.Add("D19", "Rain", "Cool", "High", "Weak", "Yes");
            dtStatic.Rows.Add("D20", "Rain", "Mild", "High", "Strong", "Yes");

            myCodeBook = new Codification(dtStatic);

            DataTable symbols = myCodeBook.Apply(dtStatic);

            int[][] inputs      = symbols.ToJagged <int>("Outlook", "Temperature", "Humidity", "Wind");
            int[]   outputs     = symbols.ToArray <int>("PlayTennis");
            var     id3learning = new ID3Learning()
            {
                new DecisionVariable("Outlook", 3),     // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool)
                new DecisionVariable("Humidity", 2),    // 2 possible values (High, normal)
                new DecisionVariable("Wind", 2)         // 2 possible values (Weak, strong)
            };

            myTreeModel = id3learning.Learn(inputs, outputs);

            double error = new ZeroOneLoss(outputs).Loss(myTreeModel.Decide(inputs));

            Console.WriteLine("learnt model training accuracy is: " + (100 - error).ToString("N2"));
        }
Exemple #8
0
        static RandomForest RandomForestClassification(List <int[]> trainingData, List <int[]> testingData, out double precision)
        {
            int    testingCount      = testingData.Count / 10;
            int    trainingCount     = testingData.Count - testingCount;
            double errorAverage      = 0;
            int    indexTestingStart = testingData.Count - testingCount;
            int    indexTestingEnd   = testingData.Count;
            double prec = 0;

            Console.WriteLine("Random Forest Classification");
            RandomForest bestforest = null;

            for (int i = 0; i < iterations; i++)
            {
                var watch = System.Diagnostics.Stopwatch.StartNew();
                Console.WriteLine("Testing from: {0} to {1}", indexTestingStart, indexTestingEnd);
                int[][] inputData, testinputData;
                int[]   outputData, testoutputData;

                PrepareInputOutput(out inputData, out outputData, out testinputData, out testoutputData, trainingData, testingData, indexTestingStart, indexTestingEnd);
                var RanForest = new RandomForestLearning()
                {
                    NumberOfTrees = 100,
                };
                var forest = RanForest.Learn(inputData, outputData);
                Console.WriteLine("Medis sukurtas - ismokta");
                double er = new ZeroOneLoss(testoutputData).Loss(forest.Decide(testinputData));
                Console.WriteLine("Apmokymo tikslumas: {0}", 1 - er);
                if (1 - er > prec)
                {
                    prec       = 1 - er;
                    bestforest = forest;
                }
                watch.Stop();
                var elapsedMs = watch.ElapsedMilliseconds;
                Console.WriteLine("Iteracija baigta per: {0}ms", elapsedMs);
                indexTestingEnd    = indexTestingStart;
                indexTestingStart -= testingCount;
                errorAverage      += er;
                Console.WriteLine("------------------------------------------------------------------------------");
            }
            precision = 1 - (errorAverage / iterations);
            return(bestforest);
        }
        private void ParameterLearning()
        {
            int[][] inputs  = new int[trainingVects.Length * trainingVects[0].Count][];
            int[]   outputs = new int[trainingVects.Length * trainingVects[0].Count];
            for (int i = 0; i < trainingVects.Length; i++)
            {
                for (int j = 0; j < trainingVects[i].Count; j++)
                {
                    inputs[i * trainingVects[i].Count + j] = new int[] {
                        (int)trainingVects[i][j][0],
                        (int)trainingVects[i][j][1]
                    };
                    outputs[i * trainingVects[i].Count + j] = (int)trainingVects[i][j][2] - 1;
                }
            }

            // Create an ID3 learning algorithm
            C45Learning      teacher = new C45Learning();
            DecisionVariable var1    = new DecisionVariable("A", new Accord.DoubleRange(0, 100));
            DecisionVariable var2    = new DecisionVariable("B", new Accord.DoubleRange(0, 100));

            var1.Nature = DecisionVariableKind.Continuous;
            var2.Nature = DecisionVariableKind.Continuous;
            teacher.Attributes.Add(var1);
            teacher.Attributes.Add(var2);
            var tree = teacher.Learn(inputs, outputs);

            var r = tree.ToRules();

            for (int i = 0; i < r.Count; i++)
            {
                double o     = r.ElementAt(i).Output;
                string name1 = r.ElementAt(i).Variables.ElementAt(0).Name;
                string name2 = r.ElementAt(i).Variables.ElementAt(1).Name;
            }
            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            int[]   predicted = tree.Decide(inputs);
            int[][] inputs2   = new int[1][];
            inputs2[0] = new int[2] {
                80, 81
            };
            var tmp = tree.Decide(inputs2);
        }
Exemple #10
0
        static void Main(string[] args)
        {
            // In this example, we will learn a decision tree directly from integer
            // matrices that define the inputs and outputs of our learning problem.

            int[][] inputs =
            {
                new int[] { 0, 0 },
                new int[] { 0, 1 },
                new int[] { 1, 0 },
                new int[] { 1, 1 },
            };

            int[] outputs = // xor between inputs[0] and inputs[1]
            {
                0, 2, 1, 2
            };

            // Create an ID3 learning algorithm
            C45Learning      teacher = new C45Learning();
            DecisionVariable var1    = new DecisionVariable("0", new Accord.DoubleRange(0, 999));
            DecisionVariable var2    = new DecisionVariable("1", new Accord.DoubleRange(0, 999));

            var1.Nature = DecisionVariableKind.Continuous;
            var2.Nature = DecisionVariableKind.Continuous;
            teacher.Attributes.Add(var1);
            teacher.Attributes.Add(var2);

            // Learn a decision tree for the XOR problem
            var tree = teacher.Learn(inputs, outputs);
            var r    = tree.ToRules();

            for (int i = 0; i < r.Count; i++)
            {
                double o     = r.ElementAt(i).Output;
                string name1 = r.ElementAt(i).Variables.ElementAt(0).Name;
                string name2 = r.ElementAt(i).Variables.ElementAt(1).Name;
            }
            // Compute the error in the learning
            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            // The tree can now be queried for new examples:
            int[] predicted = tree.Decide(inputs); // should be { 0, 1, 1, 0 }
        }
        public ClassifierWReview()
        {
            //runData2 and runData2_1
            //string filedata = System.IO.File.ReadAllText("../runData2.txt");
            string filedata = System.IO.File.ReadAllText("../runData2_1.txt");

            string[] inputColumns =
            {
                "Day", "Outlook", "Temperature", "Humidity", "Wind", "SprintReview"
            };

            string outputColumn = "GoRun";

            DataTable data = new DataTable("Internet Services Run Calculator");

            data.Columns.Add(inputColumns);
            data.Columns.Add(outputColumn);

            string[] lines = filedata.Split(
                new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);

            foreach (var line in lines)
            {
                data.Rows.Add(line.Split(','));
            }

            //create codebook to turn the strings into number representations
            codebook = new Accord.Statistics.Filters.Codification(data);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);

            int[][] inputs  = symbols.ToJagged <int>("Outlook", "Temperature", "Humidity", "Wind", "SprintReview");
            int[]   outputs = symbols.ToArray <int>("GoRun");

            string[]           decisionVariables = { "Outlook", "Temperature", "Humidity", "Wind", "SprintReview" };
            DecisionVariable[] attributes        = DecisionVariable.FromCodebook(codebook, decisionVariables);
            // Create a teacher ID3 algorithm
            var id3learning = new ID3Learning(attributes);

            tree = id3learning.Learn(inputs, outputs);
            // Compute the training error when predicting training instances
            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));
        }
Exemple #12
0
        public void CalibrateSVM(double[][] inputs, int[] outputs, double[] weights)
        {
            var tempInputs = _inputs.Take(_inputs.Count).Concat(inputs).ToArray();

            tempInputs = Accord.Statistics.Tools.ZScores(tempInputs);
            inputs     = tempInputs.Skip(_inputs.Count).Take(inputs.Length).ToArray();

            if (_pcaTransform)
            {
                inputs = _pca.Transform(inputs);
            }

            var calibration = new MulticlassSupportVectorLearning <Gaussian>()
            {
                Model   = _svm,
                Learner = (param) => new ProbabilisticOutputCalibration <Gaussian>()
                {
                    Model = param.Model
                }
            };

            //calibration.ParallelOptions.MaxDegreeOfParallelism = 4;

            calibration.Learn(inputs, outputs);

            _svm.Method = MulticlassComputeMethod.Elimination;

            var predicted = _svm.Decide(inputs);
            var error     = new ZeroOneLoss(outputs).Loss(predicted);
            var cm        = new GeneralConfusionMatrix(3, outputs, predicted);

            Console.WriteLine("Accuracy: {0} Variance: {1} Kappa: {2} Error: {3}", cm.Accuracy, cm.Variance, cm.Kappa, error);

            _cm = cm;

            /*int[] predicted = _svm.Decide(inputs);
             * double error = new ZeroOneLoss(outputs).Loss(predicted);
             *
             * double[][] probabilities = _svm.Probabilities(inputs);
             * Console.WriteLine("P: {0} Prob: {1} E: {2}", predicted.Length, probabilities.Length, error);
             * double loss = new CategoryCrossEntropyLoss(outputs).Loss(probabilities);
             *
             * Console.WriteLine("Number of classes: {0} Error: {1} Loss: {2}", _svm.NumberOfClasses, error, loss);*/
        }
Exemple #13
0
        static DecisionTree DecisionTreeClassification(List <int[]> trainingData, List <int[]> testingData, out double precision)
        {
            int    testingCount      = testingData.Count / 10;
            int    trainingCount     = testingData.Count - testingCount;
            double errorAverage      = 0;
            int    indexTestingStart = testingData.Count - testingCount;
            int    indexTestingEnd   = testingData.Count;
            double prec = 0;

            Console.WriteLine("Decision Tree Classification");
            DecisionTree bestDecision = null;

            for (int i = 0; i < iterations; i++)
            {
                var watch = System.Diagnostics.Stopwatch.StartNew();
                Console.WriteLine("Testing from: {0} to {1}", indexTestingStart, indexTestingEnd);
                int[][] inputData, testinputData;
                int[]   outputData, testoutputData;

                PrepareInputOutput(out inputData, out outputData, out testinputData, out testoutputData, trainingData, testingData, indexTestingStart, indexTestingEnd);

                ID3Learning teacher  = new ID3Learning();
                var         decision = teacher.Learn(inputData, outputData);
                Console.WriteLine("Medis sukurtas - ismokta");
                double error = new ZeroOneLoss(testoutputData).Loss(decision.Decide(testinputData));
                Console.WriteLine("Apmokymo tikslumas: {0}", 1 - error);
                if (1 - error > prec)
                {
                    prec         = 1 - error;
                    bestDecision = decision;
                }
                watch.Stop();
                var elapsedMs = watch.ElapsedMilliseconds;
                Console.WriteLine("Iteracija baigta per: {0}ms", elapsedMs);
                indexTestingEnd    = indexTestingStart;
                indexTestingStart -= testingCount;
                errorAverage      += error;
                bestDecision       = decision;
                Console.WriteLine("------------------------------------------------------------------------------");
            }
            precision = 1 - (errorAverage / iterations);
            return(bestDecision);
        }
        /// <summary>
        /// Trains the classifier and computes the training error if option provided.
        /// </summary>
        /// <param name="trainingData">The training data that will be used to train classifier.</param>
        /// <param name="trainingLabels">The training labels related to provided training data.</param>
        /// <param name="calculateError">The boolean check to tell if the training error should be calculated.</param>
        public override void Train(List <double[]> trainingData, List <int> trainingLabels, bool calculateError = true)
        {
            LearningAlgorithm = new RandomForestLearning();
            if (NumTrees > 0)
            {
                LearningAlgorithm.NumberOfTrees = NumTrees;
            }

            if (SamplePropotion > 0)
            {
                LearningAlgorithm.SampleRatio = SamplePropotion;
            }

            Model = LearningAlgorithm.Learn(trainingData.ToArray(), trainingLabels.ToArray());
            if (calculateError == true)
            {
                TrainingError = new ZeroOneLoss(trainingLabels.ToArray()).Loss(Model.Decide(trainingData.ToArray()));
            }
        }
    void Start()
    {
        // Adicionando classe e atributos à tabela
        keyTable.Columns.Add("First key", typeof(string));
        keyTable.Columns.Add("Second key", typeof(string));
        keyTable.Columns.Add("Third key", typeof(string));
        keyTable.Columns.Add("Exit", typeof(string));

        // Adicionando registros à tabela
        keyTable.Rows.Add("Yellow", "Purple", "Blue", "First");
        keyTable.Rows.Add("Yellow", "Blue", "Purple", "Second");
        keyTable.Rows.Add("Purple", "Yellow", "Blue", "First");
        keyTable.Rows.Add("Purple", "Blue", "Yellow", "Second");
        keyTable.Rows.Add("Blue", "Purple", "Yellow", "First");
        keyTable.Rows.Add("Blue", "Yellow", "Purple", "Second");

        //	Para ficar menos custoso computacionalmente, o Accord converte as
        //	strings em integer symbols. Para isso, usa-se o codebook
        codebook = new Codification(keyTable);

        // Converterndo os dados da tabela para integer symbols usando o codebook
        DataTable symbols = codebook.Apply(keyTable);

        int[][] inputs  = symbols.ToJagged <int> ("First key", "Second key", "Third key");
        int[]   outputs = symbols.ToArray <int> ("Exit");

        // Criando o algoritmo ID3
        var id3Learning = new ID3Learning()
        {
            // Quantidade de instâncias diferentes em cada coluna
            new DecisionVariable("First key", 3),               //	Cada uma possui três instâncias possíveis:
            new DecisionVariable("Second key", 3),              //	1.yellow	2.purple	3.blue
            new DecisionVariable("Third key", 3)
        };

        //	Treinando a árvore
        tree = id3Learning.Learn(inputs, outputs);

        //	Verificando se houve erro no treino da árvore
        double errorTraining = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

        Debug.Log("Tree error? (0 = no, 1 = yes) \n" + errorTraining);
    }
Exemple #16
0
        public void sparse_zero_vector_test()
        {
            // Create a linear-SVM learning method
            var teacher = new LinearNewtonMethod <Linear, Sparse <double> >()
            {
                Tolerance  = 1e-10,
                Complexity = 1e+10, // learn a hard-margin model
            };

            // Now suppose you have some points
            Sparse <double>[] inputs = Sparse.FromDense(new[]
            {
                new double[] { 1, 1, 2 },
                new double[] { 0, 1, 6 },
                new double[] { 1, 0, 8 },
                new double[] { 0, 0, 0 },
            });

            int[] outputs = { 1, -1, 1, -1 };

            // Learn the support vector machine
            var svm = teacher.Learn(inputs, outputs);

            // Compute the predicted points
            bool[] predicted = svm.Decide(inputs);

            // And the squared error loss using
            double error = new ZeroOneLoss(outputs).Loss(predicted);

            Assert.AreEqual(3, svm.NumberOfInputs);
            Assert.AreEqual(1, svm.NumberOfOutputs);
            Assert.AreEqual(2, svm.NumberOfClasses);

            Assert.AreEqual(1, svm.Weights.Length);
            Assert.AreEqual(1, svm.SupportVectors.Length);

            Assert.AreEqual(1.0, svm.Weights[0], 1e-6);
            Assert.AreEqual(2.0056922148257597, svm.SupportVectors[0][0], 1e-6);
            Assert.AreEqual(-0.0085361347231909836, svm.SupportVectors[0][1], 1e-6);
            Assert.AreEqual(0.0014225721169379331, svm.SupportVectors[0][2], 1e-6);
            Assert.AreEqual(0.0, error);
        }
        public void kaggle_digits()
        {
            string root       = TestContext.CurrentContext.TestDirectory;
            var    training   = Properties.Resources.trainingsample;
            var    validation = Properties.Resources.validationsample;

            var tset         = readData(training);
            var observations = tset.Item1;
            var labels       = tset.Item2;

            var teacher = new MulticlassSupportVectorLearning <Linear>();

#if MONO
            teacher.ParallelOptions.MaxDegreeOfParallelism = 1;
#endif

            var svm = teacher.Learn(observations, labels);

            {
                var trainingLoss = new ZeroOneLoss(labels)
                {
                    Mean = true
                };

                double error = trainingLoss.Loss(svm.Decide(observations));
                Assert.AreEqual(0.054, error);
            }

            {
                var vset             = readData(validation);
                var validationData   = vset.Item1;
                var validationLabels = vset.Item2;

                var validationLoss = new ZeroOneLoss(validationLabels)
                {
                    Mean = true
                };

                double val = validationLoss.Loss(svm.Decide(validationData));
                Assert.AreEqual(0.082, val);
            }
        }
        public static MulticlassSupportVectorMachine <Linear> RunSVM(double[][] features, int[] labels, string modelSaveLocation = null)
        {
            var teacher = new MulticlassSupportVectorLearning <Linear>()
            {
                //Learner is aweful naming.. I can hardly talk but still
                Learner = (p) => new LinearDualCoordinateDescent()
                {
                    Loss = Loss.L2
                }
            };
            var    svm    = teacher.Learn(features, labels);
            var    output = svm.Decide(features);
            double error  = new ZeroOneLoss(labels).Loss(output);

            if (!string.IsNullOrEmpty(modelSaveLocation))
            {
                Serializer.Save(obj: svm, path: modelSaveLocation);
            }
            return(svm);
        }
Exemple #19
0
        public override void Train(List <double[]> trainingData, List <double> trainingLabels, bool calculateError = true)
        {
            LearningAlgorithm = new RandomForestLearning();
            if (NumTrees > 0)
            {
                LearningAlgorithm.NumberOfTrees = NumTrees;
            }

            if (SamplePropotion > 0)
            {
                LearningAlgorithm.SampleRatio = SamplePropotion;
            }
            int[][] TrainingData   = TypeCasters.DoubleMultiArrayToInt(trainingData).ToArray();
            int[]   TrainingLabels = TypeCasters.DoubleArrayToInt(trainingLabels).ToArray();

            Model = LearningAlgorithm.Learn(TrainingData, TrainingLabels);
            if (calculateError == true)
            {
                TrainingError = new ZeroOneLoss(TrainingLabels).Loss(Model.Decide(TrainingData));
            }
        }
Exemple #20
0
        public void RegressTest2()
        {
            Accord.Math.Random.Generator.Seed = 0;

            double[][] inputs;
            int[]      outputs;

            MultinomialLogisticRegressionTest.CreateInputOutputsExample1(out inputs, out outputs);

            // Create an algorithm to estimate the regression
            var msgd = new MultinomialLogisticLearning <ConjugateGradient>();

            // Now, we can iteratively estimate our model
            MultinomialLogisticRegression mlr = msgd.Learn(inputs, outputs);

            int[] predicted = mlr.Decide(inputs);

            double acc = new ZeroOneLoss(outputs).Loss(predicted);

            Assert.AreEqual(0.61088435374149663, acc, 1e-8);
        }
Exemple #21
0
    private void Train()
    {
        DataTable data = GetDataTable(Application.dataPath + "/" + trainData);

        //DebugTable(data);
        codebook = new Codification(data);
        DataTable symbols = codebook.Apply(data);

        int[][] inputs  = symbols.ToArray <int>("LIFE", "TOWERS", "MELIANTS", "TIME", "ENEMY_COINS");
        int[]   outputs = symbols.ToArray <int>("POSITION");

        var id3learning = new ID3Learning();

        id3learning.Attributes = DecisionVariable.FromData(inputs);

        tree = id3learning.Learn(inputs, outputs);

        double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

        tree.Save(Application.dataPath + "/" + treeLocation);
    }
Exemple #22
0
        static double Decision_Tree(bool show)
        {
            DataTable    data       = DataController.MakeDataTable("../../drug_consumption.txt");
            DataTable    entireData = DataController.MakeDataTable("../../drug_consumption.txt");
            DataTable    tests      = DataController.MakeDataTable("../../drug_consumption_test2.txt");
            Codification codebook   = new Codification(entireData);

            DecisionVariable[] attributes = DataController.GetAttributes();
            int classCount = 7; // (7) "Never Used", "Used over a Decade Ago", "Used in Last Decade", "Used in Last Year", "Used in Last Month", "Used in Last Week", and "Used in Last Day"

            DecisionTree tree        = new DecisionTree(attributes, classCount);
            ID3Learning  id3learning = new ID3Learning(tree);

            id3learning.MaxHeight = 7;
            DataTable symbols    = codebook.Apply(data);
            string    LookingFor = "Cannabis";

            int[][] inputs  = symbols.ToJagged <int>("Age", "Gender", "Education", "Country", "Eticnity", "Nscore", "Escore", "Oscore", "Ascore", "Cscore", "Impulsive", "SS");
            int[]   outputs = symbols.ToArray <int>(LookingFor);

            id3learning.Learn(inputs, outputs);
            DataTable testSymbols = codebook.Apply(tests);

            int[][]     testIn   = testSymbols.ToJagged <int>("Age", "Gender", "Education", "Country", "Eticnity", "Nscore", "Escore", "Oscore", "Ascore", "Cscore", "Impulsive", "SS");
            int[]       testOut  = testSymbols.ToArray <int>(LookingFor);
            DecisionSet rules    = tree.ToRules();
            string      ruleText = rules.ToString(codebook, LookingFor, System.Globalization.CultureInfo.InvariantCulture);
            double      error    = new ZeroOneLoss(testOut).Loss(tree.Decide(testIn));

            if (show == true)
            {
                Console.WriteLine(LookingFor);
                Console.WriteLine();
                Console.WriteLine(ruleText);
                Console.ReadKey();
                Console.WriteLine("Blad - " + Math.Round(error, 4) + "%");
                Console.ReadKey();
            }
            return(error);
        }
Exemple #23
0
        public CrossValidationResult <RandomForest> GetCrossValidationResultsOfRandomForestModel(AppIdentAcordSource appIdentAcordSource, GridSearchParameterCollection bestParameters, int folds = 10)
        {
            var samples           = appIdentAcordSource.Samples;
            var labels            = appIdentAcordSource.LabelsAsIntegers;
            var decisionVariables = appIdentAcordSource.DecisionVariables;
            // Create a new Cross-validation algorithm passing the data set size and the number of folds
            var crossvalidation = new CrossValidation <RandomForest>(samples.Length, folds)
            {
                Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
                {
                    // The fitting function is passing the indices of the original set which
                    // should be considered training data and the indices of the original set
                    // which should be considered validation data.
                    Console.WriteLine($"{DateTime.Now} RandomForest cross validation.");
                    // Lets now grab the training data:
                    var trainingInputs  = samples.Get(indicesTrain);
                    var trainingOutputs = labels.Get(indicesTrain);
                    // And now the validation data:
                    var validationInputs  = samples.Get(indicesValidation);
                    var validationOutputs = labels.Get(indicesValidation);
                    // create random forest model with the best parameters from grid search results
                    var rfcModel = CreateRandomForestModel(decisionVariables, bestParameters, trainingInputs, trainingOutputs);
                    // compute the training error rate with ZeroOneLoss function
                    var trainingError = new ZeroOneLoss(trainingOutputs).Loss(rfcModel.Decide(trainingInputs));
                    // Now we can compute the validation error on the validation data:
                    var validationError = new ZeroOneLoss(validationOutputs).Loss(rfcModel.Decide(validationInputs));
                    // Return a new information structure containing the model and the errors achieved.

                    var tag = new ValidationDataSource(validationInputs, validationOutputs);
                    return(new CrossValidationValues <RandomForest>(rfcModel, trainingError, validationError)
                    {
                        Tag = tag
                    });
                }
            };

            // Compute the cross-validation
            return(crossvalidation.Compute());
        }
Exemple #24
0
        public void missing_values()
        {
            var dataset = new WisconsinOriginalBreastCancer();

            int?[][] inputs  = dataset.Features;
            int[]    outputs = dataset.ClassLabels;

            var c45 = new C45Learning()
            {
            };

            var tree = c45.Learn(inputs, outputs);

            int height = tree.GetHeight();

            Assert.AreEqual(4, height);
            int[] predicted = tree.Decide(inputs);

            double error = new ZeroOneLoss(outputs).Loss(predicted);

            Assert.AreEqual(0.0028612303290414878, error, 1e-8);
        }
Exemple #25
0
        public string Rules2String()
        {
            int count = dt.Rows.Count;

            int[][]  inputs = new int [count][];
            string[] labels = new string[count];
            int      num    = 0;

            foreach (DataRow dr in dt.Rows)
            {
                int res = Convert.ToInt32(dr[30]);
                inputs[num] = new int[30];
                for (int sensor_i = 0; sensor_i < 30; sensor_i++)
                {
                    inputs[num][sensor_i] = Convert.ToInt32(dr[sensor_i]);
                }
                labels[num] = "class-" + res.ToString();
                num++;
            }
            var codebook = new Codification("Output", labels);

            int[] outputs = codebook.Transform("Output", labels);

            DecisionVariable[] dv = new DecisionVariable[30];
            for (int i = 0; i < 30; i++)
            {
                string name = "sensor_" + (i + 1).ToString();
                dv[i] = new DecisionVariable(name, DecisionVariableKind.Continuous);
            }
            //use C45 Spanning tree algorithm
            var          C45  = new C45Learning(dv);
            DecisionTree tree = C45.Learn(inputs, outputs);

            int[]       predicted = tree.Decide(inputs);
            double      error     = new ZeroOneLoss(outputs).Loss(predicted);
            DecisionSet rules     = tree.ToRules();

            return(rules.ToString(codebook, "Output", System.Globalization.CultureInfo.InvariantCulture));
        }
        public void test_learn()
        {
            #region doc_iris
            // Fix random seed for reproducibility
            Accord.Math.Random.Generator.Seed = 1;

            // In this example, we will process the famous Fisher's Iris dataset in
            // which the task is to classify weather the features of an Iris flower
            // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica:
            //
            //  - https://en.wikipedia.org/wiki/Iris_flower_data_set
            //

            // First, let's load the dataset:
            var        iris    = new DataSets.Iris();
            double[][] inputs  = iris.Instances;   // flower features
            int[]      outputs = iris.ClassLabels; // flower categories

            // Create the forest learning algorithm
            var teacher = new RandomForestLearning()
            {
                NumberOfTrees = 10, // use 10 trees in the forest
            };

            // Finally, learn a random forest from data
            var forest = teacher.Learn(inputs, outputs);

            // We can estimate class labels using
            int[] predicted = forest.Decide(inputs);

            // And the classification error (0.0006) can be computed as
            double error = new ZeroOneLoss(outputs).Loss(forest.Decide(inputs));
            #endregion

            Assert.AreEqual(10, forest.Trees.Length);

            Assert.IsTrue(error < 0.015);
        }
        public void learn_doc()
        {
            #region doc_learn_simplest
            // In this example, we will learn a decision tree directly from integer
            // matrices that define the inputs and outputs of our learning problem.

            int[][] inputs =
            {
                new int[] { 0, 0 },
                new int[] { 0, 1 },
                new int[] { 1, 0 },
                new int[] { 1, 1 },
            };

            int[] outputs = // xor between inputs[0] and inputs[1]
            {
                0, 1, 1, 0
            };

            // Create an ID3 learning algorithm
            ID3Learning teacher = new ID3Learning();

            // Learn a decision tree for the XOR problem
            var tree = teacher.Learn(inputs, outputs);

            // Compute the error in the learning
            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            // The tree can now be queried for new examples:
            int[] predicted = tree.Decide(inputs); // should be { 0, 1, 1, 0 }
            #endregion

            Assert.AreEqual(0, error);
            Assert.AreEqual(0, predicted[0]);
            Assert.AreEqual(1, predicted[1]);
            Assert.AreEqual(1, predicted[2]);
            Assert.AreEqual(0, predicted[3]);
        }
Exemple #28
0
        public double DecisionTreeAccuracyPercentageLib(DataTable data, Codification codebook)
        {
            DataTable symbols = codebook.Apply(data);

            int[][] inputs = DataTableToMatrix(symbols, new string[] { "CAP SHAPE", "CAP SURFACE", "CAP COLOR",
                                                                       "BRUISES", "ODOR", "GILL ATTACHMENT",
                                                                       "GILL SPACING", "GILL SIZE", "GILL COLOR",
                                                                       "STALK SHAPE", "STALK ROOT", "STALK SURFACE ABOVE RING",
                                                                       "STALK SURFACE BELOW RING", "STALK COLOR ABOVE RING", "STALK COLOR BELOW RING",
                                                                       "VEIL TYPE", "VEIL COLOR", "RING NUMBER",
                                                                       "RING TYPE", "SPORE PRINT COLOR", "POPULATION",
                                                                       "HABITAT" });

            int[][] mOutputs = DataTableToMatrix(symbols, new string[] { "TYPE" });
            int[]   outputs  = new int[mOutputs.Length];
            for (int i = 0; i < mOutputs.Length; i++)
            {
                outputs[i] = mOutputs[i][0];
            }

            double error = new ZeroOneLoss(outputs).Loss(decisionTreeLib.Decide(inputs));

            return(1 - error);
        }
        public void multiclass_precomputed_matrix_smo()
        {
            #region doc_precomputed
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] trainInputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 1, 1, 1 }, //  2
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] trainOutputs = // those are the training set class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Let's chose a kernel function
            Polynomial kernel = new Polynomial(2);

            // Get the kernel matrix for the training set
            double[][] K = kernel.ToJagged(trainInputs);

            // Create a pre-computed kernel
            var pre = new Precomputed(K);

            // Create a one-vs-one learning algorithm using SMO
            var teacher = new MulticlassSupportVectorLearning <Precomputed, int>()
            {
                Learner = (p) => new SequentialMinimalOptimization <Precomputed, int>()
                {
                    Kernel = pre
                }
            };

#if DEBUG
            teacher.ParallelOptions.MaxDegreeOfParallelism = 1;
#endif

            // Learn a machine
            var machine = teacher.Learn(pre.Indices, trainOutputs);

            // Compute the machine's prediction for the training set
            int[] trainPrediction = machine.Decide(pre.Indices);

            // Evaluate prediction error for the training set using mean accuracy (mAcc)
            double trainingError = new ZeroOneLoss(trainOutputs).Loss(trainPrediction);

            // Now let's compute the machine's prediction for a test set
            double[][] testInputs = // test-set inputs
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] testOutputs = // those are the test set class labels
            {
                0, 0, 1, 2,
            };

            // Compute precomputed matrix between train and testing
            pre.Values = kernel.ToJagged2(trainInputs, testInputs);

            // Update the kernel
            machine.Kernel = pre;

            // Compute the machine's prediction for the test set
            int[] testPrediction = machine.Decide(pre.Indices);

            // Evaluate prediction error for the training set using mean accuracy (mAcc)
            double testError = new ZeroOneLoss(testOutputs).Loss(testPrediction);
            #endregion


            Assert.AreEqual(0, trainingError);
            Assert.AreEqual(0, testError);

            // Create a one-vs-one learning algorithm using SMO
            var teacher2 = new MulticlassSupportVectorLearning <Polynomial>()
            {
                Learner = (p) => new SequentialMinimalOptimization <Polynomial>()
                {
                    Kernel = kernel
                }
            };

#if DEBUG
            teacher.ParallelOptions.MaxDegreeOfParallelism = 1;
#endif

            // Learn a machine
            var expected = teacher2.Learn(trainInputs, trainOutputs);

            Assert.AreEqual(4, expected.NumberOfInputs);
            Assert.AreEqual(3, expected.NumberOfOutputs);
            Assert.AreEqual(0, machine.NumberOfInputs);
            Assert.AreEqual(3, machine.NumberOfOutputs);

            var machines = Enumerable.Zip(machine, expected, (a, b) => Tuple.Create(a.Value, b.Value));

            foreach (var pair in machines)
            {
                var a = pair.Item1;
                var e = pair.Item2;

                Assert.AreEqual(0, a.NumberOfInputs);
                Assert.AreEqual(2, a.NumberOfOutputs);

                Assert.AreEqual(4, e.NumberOfInputs);
                Assert.AreEqual(2, e.NumberOfOutputs);

                Assert.IsTrue(a.Weights.IsEqual(e.Weights));
            }
        }
        public void multiclass_precomputed_matrix_smo()
        {
            #region doc_precomputed
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] trainInputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0 
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 1, 1, 1 }, //  2
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] trainOutputs = // those are the training set class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Let's chose a kernel function
            Polynomial kernel = new Polynomial(2);

            // Get the kernel matrix for the training set
            double[][] K = kernel.ToJagged(trainInputs);

            // Create a pre-computed kernel
            var pre  = new Precomputed(K);

            // Create a one-vs-one learning algorithm using SMO
            var teacher = new MulticlassSupportVectorLearning<Precomputed, int>()
            {
                Learner = (p) => new SequentialMinimalOptimization<Precomputed, int>()
                {
                    Kernel = pre
                }
            };

#if DEBUG
            teacher.ParallelOptions.MaxDegreeOfParallelism = 1;
#endif

            // Learn a machine
            var machine = teacher.Learn(pre.Indices, trainOutputs);

            // Compute the machine's prediction for the training set
            int[] trainPrediction = machine.Decide(pre.Indices);

            // Evaluate prediction error for the training set using mean accuracy (mAcc)
            double trainingError = new ZeroOneLoss(trainOutputs).Loss(trainPrediction);

            // Now let's compute the machine's prediction for a test set
            double[][] testInputs = // test-set inputs
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0 
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] testOutputs = // those are the test set class labels
            {
                0, 0,  1,  2, 
            };

            // Compute precomputed matrix between train and testing
            pre.Values = kernel.ToJagged2(trainInputs, testInputs);

            // Update the kernel
            machine.Kernel = pre;

            // Compute the machine's prediction for the test set
            int[] testPrediction = machine.Decide(pre.Indices);

            // Evaluate prediction error for the training set using mean accuracy (mAcc)
            double testError = new ZeroOneLoss(testOutputs).Loss(testPrediction);
            #endregion


            Assert.AreEqual(0, trainingError);
            Assert.AreEqual(0, testError);

            // Create a one-vs-one learning algorithm using SMO
            var teacher2 = new MulticlassSupportVectorLearning<Polynomial>()
            {
                Learner = (p) => new SequentialMinimalOptimization<Polynomial>()
                {
                    Kernel = kernel
                }
            };

#if DEBUG
            teacher.ParallelOptions.MaxDegreeOfParallelism = 1;
#endif

            // Learn a machine
            var expected = teacher2.Learn(trainInputs, trainOutputs);

            Assert.AreEqual(4, expected.NumberOfInputs);
            Assert.AreEqual(3, expected.NumberOfOutputs);
            Assert.AreEqual(0, machine.NumberOfInputs);
            Assert.AreEqual(3, machine.NumberOfOutputs);

            var machines = Enumerable.Zip(machine, expected, (a,b) => Tuple.Create(a.Value, b.Value));

            foreach (var pair in machines)
            {
                var a = pair.Item1;
                var e = pair.Item2;

                Assert.AreEqual(0, a.NumberOfInputs);
                Assert.AreEqual(2, a.NumberOfOutputs);

                Assert.AreEqual(4, e.NumberOfInputs);
                Assert.AreEqual(2, e.NumberOfOutputs);

                Assert.IsTrue(a.Weights.IsEqual(e.Weights));
            }
        }
        public void multiclass_calibration_generic_kernel()
        {
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] inputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0 
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 
                2, 2, 2, 2, 
            };

            // Create the multi-class learning algorithm for the machine
            var teacher = new MulticlassSupportVectorLearning<IKernel>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization<IKernel>()
                {
                    UseKernelEstimation = false,
                    Kernel = Gaussian.FromGamma(0.5)
                }
            };

            // Learn a machine
            var machine = teacher.Learn(inputs, outputs);


            // Create the multi-class learning algorithm for the machine
            var calibration = new MulticlassSupportVectorLearning<IKernel>(machine)
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new ProbabilisticOutputCalibration<IKernel>(param.Model)
            };


            // Configure parallel execution options
            calibration.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Learn a machine
            calibration.Learn(inputs, outputs);

            // Obtain class predictions for each sample
            int[] predicted = machine.Decide(inputs);

            // Get class scores for each sample
            double[] scores = machine.Score(inputs);

            // Get log-likelihoods (should be same as scores)
            double[][] logl = machine.LogLikelihoods(inputs);

            // Get probability for each sample
            double[][] prob = machine.Probabilities(inputs);

            // Compute classification error
            double error = new ZeroOneLoss(outputs).Loss(predicted);
            double loss = new CategoryCrossEntropyLoss(outputs).Loss(prob);
            

            //string str = logl.ToCSharp();

            double[] expectedScores =
            {
                1.87436400885238, 1.81168086449304, 1.74038320983522, 
                1.87436400885238, 1.81168086449304, 1.55446926953952, 
                1.67016543853596, 1.67016543853596, 1.83135194001403, 
                1.83135194001403, 1.59836868669125, 2.0618816310294 
            };

            double[][] expectedLogL =
            {
                new double[] { 1.87436400885238, -1.87436400885238, -1.7463646841257 },
                new double[] { 1.81168086449304, -1.81168086449304, -1.73142460658826 },
                new double[] { 1.74038320983522, -1.58848669816072, -1.74038320983522 },
                new double[] { 1.87436400885238, -1.87436400885238, -1.7463646841257 },
                new double[] { 1.81168086449304, -1.81168086449304, -1.73142460658826 },
                new double[] { -1.55446926953952, 1.55446926953952, -0.573599079216229 },
                new double[] { -0.368823000428743, 1.67016543853596, -1.67016543853596 },
                new double[] { -0.368823000428743, 1.67016543853596, -1.67016543853596 },
                new double[] { -1.83135194001403, -1.20039293330558, 1.83135194001403 },
                new double[] { -1.83135194001403, -1.20039293330558, 1.83135194001403 },
                new double[] { -0.894598978116595, -1.59836868669125, 1.59836868669125 },
                new double[] { -1.87336852014759, -2.0618816310294, 2.0618816310294 } 
            };

            double[][] expectedProbs =
            {
                new double[] { 0.95209908906855, 0.0224197237689656, 0.0254811871624848 },
                new double[] { 0.947314032745205, 0.0252864560196241, 0.0273995112351714 },
                new double[] { 0.937543314993345, 0.0335955309754816, 0.028861154031173 },
                new double[] { 0.95209908906855, 0.0224197237689656, 0.0254811871624848 },
                new double[] { 0.947314032745205, 0.0252864560196241, 0.0273995112351714 },
                new double[] { 0.0383670466237636, 0.859316640577158, 0.102316312799079 },
                new double[] { 0.111669460983068, 0.857937888238824, 0.0303926507781076 },
                new double[] { 0.111669460983068, 0.857937888238824, 0.0303926507781076 },
                new double[] { 0.0238971617859334, 0.0449126146360623, 0.931190223578004 },
                new double[] { 0.0238971617859334, 0.0449126146360623, 0.931190223578004 },
                new double[] { 0.0735735561383806, 0.0363980776342206, 0.890028366227399 },
                new double[] { 0.0188668069460003, 0.0156252941482294, 0.96550789890577 } 
            };

            // Must be exactly the same as test above
            Assert.AreEqual(0, error);
            Assert.AreEqual(0.5, ((Gaussian)machine[0].Value.Kernel).Gamma);
            Assert.AreEqual(0.5, ((Gaussian)machine[1].Value.Kernel).Gamma);
            Assert.AreEqual(0.5, ((Gaussian)machine[2].Value.Kernel).Gamma);
            Assert.AreEqual(1.0231652126930515, loss);
            Assert.IsTrue(predicted.IsEqual(outputs));
            Assert.IsTrue(expectedScores.IsEqual(scores, 1e-10));
            Assert.IsTrue(expectedLogL.IsEqual(logl, 1e-10));
            Assert.IsTrue(expectedProbs.IsEqual(prob, 1e-10));
        }
        public void new_method_create_tree()
        {
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>();

            string[] labels = text.GetColumn(4);

            var codebook = new Codification("Output", labels);
            int[] outputs = codebook.Translate("Output", labels);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning();

            // And finally induce the tree:
            var tree = teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);

            // And the classification error can be computed as 
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (2 <= 2.45)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 <= 2.85)
Iris-versicolor =: (2 > 2.45) && (3 <= 1.75) && (0 <= 7.05) && (1 > 2.85)
Iris-versicolor =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 > 3.05)
Iris-virginica =: (2 > 2.45) && (3 <= 1.75) && (0 > 7.05)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 > 5.95)
Iris-virginica =: (2 > 2.45) && (3 > 1.75) && (0 <= 5.95) && (1 <= 3.05)
";

            Assert.AreEqual(0.026666666666666668, error, 1e-10);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
        public void learn_test()
        {
            #region doc_learn
            // Generate always same random numbers
            Accord.Math.Random.Generator.Seed = 0;

            // The following is a simple auto association function in which 
            // the last column of each input correspond to its own class. This
            // problem should be easily solved using a Linear kernel.

            // Sample input data
            double[][] inputs =
            {
                new double[] { 1, 2, 0 },
                new double[] { 6, 2, 3 },
                new double[] { 1, 1, 1 },
                new double[] { 7, 6, 2 },
            };

            // Output for each of the inputs
            int[] outputs = { 0, 3, 1, 2 };


            // Create the multi-class learning algorithm for the machine
            var teacher = new MulticlassSupportVectorLearning<Linear>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization<Linear>()
                {
                    // If you would like to use other kernels, simply replace
                    // the generic parameter to the desired kernel class, such
                    // as for example, Polynomial or Gaussian:

                    Kernel = new Linear() // use the Linear kernel
                }
            };

            // Estimate the multi-class support vector machine using one-vs-one method
            MulticlassSupportVectorMachine<Linear> ovo = teacher.Learn(inputs, outputs);

            // Obtain class predictions for each sample
            int[] predicted = ovo.Decide(inputs);

            // Compute classification error
            double error = new ZeroOneLoss(outputs).Loss(predicted);
            #endregion

            Assert.AreEqual(0, error);
            Assert.IsTrue(predicted.IsEqual(outputs));
            Assert.IsTrue(ovo.Scores(inputs[0]).IsEqual(new double[] { 0.62, -0.25, -0.59, -0.62 }, 1e-2));
            Assert.IsTrue(ovo.Scores(inputs[1]).IsEqual(new double[] { -0.62, -0.57, -0.13, 0.62 }, 1e-2));
            Assert.IsTrue(ovo.Scores(inputs[2]).IsEqual(new double[] { -0.25, 0.63, -0.63, -0.51 }, 1e-2));
        }
        public void laplace_smoothing_missing_sample()
        {
            #region doc_laplace
            // To test the effectiveness of the Laplace rule for when
            // an example of a symbol is not present in the training set,
            // lets create dataset where the second column could contain
            // values 0, 1 or 2 but only actually contains examples with
            // containing 1 and 2:

            int[][] inputs =
            {
                //      input     output
                new [] { 0, 1 }, //  0 
                new [] { 0, 2 }, //  0
                new [] { 0, 1 }, //  0
                new [] { 1, 2 }, //  1
                new [] { 0, 2 }, //  1
                new [] { 0, 2 }, //  1
                new [] { 1, 1 }, //  2
                new [] { 0, 1 }, //  2
                new [] { 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 1, 1, 1, 2, 2, 2, 
            };

            // Since the data is not enough to determine which symbols we are
            // expecting in our model, we will have to specify the model by
            // hand. The first column can assume 2 different values, whereas
            // the third column can assume 3:
            var bayes = new NaiveBayes(classes: 3, symbols: new[] { 2, 3 });

            // Now we can create a learning algorithm
            var learning = new NaiveBayesLearning()
            {
                Model = bayes
            };

            // Enable the use of the Laplace rule
            learning.Options.InnerOption.UseLaplaceRule = true;

            // Learn the Naive Bayes model
            learning.Learn(inputs, outputs);

            // Estimate a sample with 0 in the second col
            int answer = bayes.Decide(new int[] { 0, 1 });
            #endregion

            Assert.AreEqual(0, answer);

            double prob = bayes.Probability(new int[] { 0, 1 }, out answer);
            Assert.AreEqual(0, answer);
            Assert.AreEqual(0.52173913043478259, prob, 1e-10);

            double error = new ZeroOneLoss(outputs)
            {
                Mean = true
            }.Loss(bayes.Decide(inputs));

            Assert.AreEqual(2 / 9.0, error);
        }
        public void IrisDatasetTest()
        {
            #region doc_iris
            // In this example, we will process the famous Fisher's Iris dataset in 
            // which the task is to classify weather the features of an Iris flower 
            // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica:
            //
            //  - https://en.wikipedia.org/wiki/Iris_flower_data_set
            //

            // First, let's load the dataset into an array of text that we can process
            string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(','));

            // The first four columns contain the flower features
            double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>();

            // The last column contains the expected flower type
            string[] labels = text.GetColumn(4);

            // Since the labels are represented as text, the first step is to convert
            // those text labels into integer class labels, so we can process them
            // more easily. For this, we will create a codebook to encode class labels:
            //
            var codebook = new Codification("Output", labels);

            // With the codebook, we can convert the labels:
            int[] outputs = codebook.Translate("Output", labels);

            // Let's declare the names of our input variables:
            DecisionVariable[] features =
            {
                new DecisionVariable("sepal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("sepal width", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal length", DecisionVariableKind.Continuous), 
                new DecisionVariable("petal width", DecisionVariableKind.Continuous), 
            };

            // Now, we can finally create our tree for the 3 classes:
            var tree = new DecisionTree(inputs: features, classes: 3);

            // And we can use the C4.5 for learning:
            var teacher = new C45Learning(tree);

            // And finally induce the tree:
            teacher.Learn(inputs, outputs);

            // To get the estimated class labels, we can use
            int[] predicted = tree.Decide(inputs);
            
            // And the classification error can be computed as 
            double error = new ZeroOneLoss(outputs) // 0.0266
            {
                Mean = true
            }.Loss(tree.Decide(inputs));

            // Moreover, we may decide to convert our tree to a set of rules:
            DecisionSet rules = tree.ToRules();

            // And using the codebook, we can inspect the tree reasoning:
            string ruleText = rules.ToString(codebook, "Output",
                System.Globalization.CultureInfo.InvariantCulture);

            // The output is:
            string expected = @"Iris-setosa =: (petal length <= 2.45)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85)
Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05)
Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95)
Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05)
";
            #endregion

            Assert.AreEqual(0.026666666666666668, error, 1e-10);
            Assert.AreEqual(4, tree.NumberOfInputs);
            Assert.AreEqual(3, tree.NumberOfOutputs);

            double newError = ComputeError(rules, inputs, outputs);
            Assert.AreEqual(0.026666666666666668, newError, 1e-10);
            Assert.AreEqual(expected, ruleText);
        }
        public void multiclass_gaussian_new_usage()
        {
            #region doc_learn_gaussian
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] inputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0 
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 1, 1, 1 }, //  2
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Create the multi-class learning algorithm for the machine
            var teacher = new MulticlassSupportVectorLearning<Gaussian>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization<Gaussian>()
                {
                    // Estimate a suitable guess for the Gaussian kernel's parameters.
                    // This estimate can serve as a starting point for a grid search.
                    UseKernelEstimation = true
                }
            };

            // Configure parallel execution options
            teacher.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Learn a machine
            var machine = teacher.Learn(inputs, outputs);

            // Obtain class predictions for each sample
            int[] predicted = machine.Decide(inputs);

            // Get class scores for each sample
            double[] scores = machine.Score(inputs);

            // Compute classification error
            double error = new ZeroOneLoss(outputs).Loss(predicted);
            #endregion

            // Get log-likelihoods (should be same as scores)
            double[][] logl = machine.LogLikelihoods(inputs);

            // Get probability for each sample
            double[][] prob = machine.Probabilities(inputs);

            // Compute classification error
            double loss = new CategoryCrossEntropyLoss(outputs).Loss(prob);

            string str = scores.ToCSharp();
 
            double[] expectedScores =
            { 
                1.00888999727541, 1.00303259868784, 1.00068403386636, 1.00888999727541,
                1.00303259868784, 1.00831890183328, 1.00831890183328, 0.843757409449037, 
                0.996768862332386, 0.996768862332386, 1.02627325826713, 1.00303259868784,
                0.996967401312164, 0.961947708617365, 1.02627325826713
            };

            double[][] expectedLogL =
            {
                new double[] { 1.00888999727541, -1.00888999727541, -1.00135670089335 },
                new double[] { 1.00303259868784, -0.991681098166717, -1.00303259868784 },
                new double[] { 1.00068403386636, -0.54983354268499, -1.00068403386636 },
                new double[] { 1.00888999727541, -1.00888999727541, -1.00135670089335 },
                new double[] { 1.00303259868784, -0.991681098166717, -1.00303259868784 },
                new double[] { -1.00831890183328, 1.00831890183328, -0.0542719287771535 },
                new double[] { -1.00831890183328, 1.00831890183328, -0.0542719287771535 },
                new double[] { -0.843757409449037, 0.843757409449037, -0.787899083913034 },
                new double[] { -0.178272229157676, 0.996768862332386, -0.996768862332386 },
                new double[] { -0.178272229157676, 0.996768862332386, -0.996768862332386 },
                new double[] { -1.02627325826713, -1.00323113766761, 1.02627325826713 },
                new double[] { -1.00303259868784, -0.38657999872922, 1.00303259868784 },
                new double[] { -0.996967401312164, -0.38657999872922, 0.996967401312164 },
                new double[] { -0.479189991343958, -0.961947708617365, 0.961947708617365 },
                new double[] { -1.02627325826713, -1.00323113766761, 1.02627325826713 } 
            };

            double[][] expectedProbs =
            {
                new double[] { 0.789324598208647, 0.104940932711551, 0.105734469079803 },
                new double[] { 0.78704862182644, 0.107080012017624, 0.105871366155937 },
                new double[] { 0.74223157627093, 0.157455631737191, 0.100312791991879 },
                new double[] { 0.789324598208647, 0.104940932711551, 0.105734469079803 },
                new double[] { 0.78704862182644, 0.107080012017624, 0.105871366155937 },
                new double[] { 0.0900153422818135, 0.676287261796794, 0.233697395921392 },
                new double[] { 0.0900153422818135, 0.676287261796794, 0.233697395921392 },
                new double[] { 0.133985810363445, 0.72433118122885, 0.141683008407705 },
                new double[] { 0.213703968297751, 0.692032433073136, 0.0942635986291124 },
                new double[] { 0.213703968297751, 0.692032433073136, 0.0942635986291124 },
                new double[] { 0.10192623206507, 0.104302095948601, 0.79377167198633 },
                new double[] { 0.0972161784678357, 0.180077937396817, 0.722705884135347 },
                new double[] { 0.0981785890979593, 0.180760971768703, 0.721060439133338 },
                new double[] { 0.171157270099157, 0.105617610634377, 0.723225119266465 },
                new double[] { 0.10192623206507, 0.104302095948601, 0.79377167198633 } 
            };

            Assert.AreEqual(0, error);
            Assert.AreEqual(4.5289447815997672, loss, 1e-10);
            Assert.IsTrue(predicted.IsEqual(outputs));
            Assert.IsTrue(expectedScores.IsEqual(scores, 1e-10));
            Assert.IsTrue(expectedLogL.IsEqual(logl, 1e-10));
            Assert.IsTrue(expectedProbs.IsEqual(prob, 1e-10));
        }
        public void learn_test_with_options()
        {
            #region doc_learn_options
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] inputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0 
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 1, 1, 1 }, //  2
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Create a new Gaussian distribution naive Bayes learner
            var teacher = new NaiveBayesLearning<NormalDistribution, NormalOptions>();

            // Set options for the component distributions
            teacher.Options.InnerOption.Regularization = 1e-5; // to avoid zero variances

            // Learn the naive Bayes model
            NaiveBayes<NormalDistribution> bayes = teacher.Learn(inputs, outputs);

            // Use the model to predict class labels
            int[] predicted = bayes.Decide(inputs);

            // Estimate the model error. The error should be zero:
            double error = new ZeroOneLoss(outputs).Loss(predicted);

            // Now, let's test  the model output for the first input sample:
            int answer = bayes.Decide(new double[] { 1, 0, 0, 1 }); // should be 1
            #endregion

            Assert.AreEqual(0, error);
            Assert.AreEqual(1, answer);
            Assert.IsTrue(predicted.IsEqual(outputs));
        }
        public void kaggle_digits_with_compress()
        {
            string root = Environment.CurrentDirectory;
            var training = Properties.Resources.trainingsample;
            var validation = Properties.Resources.validationsample;

            var tset = readData(training);
            var observations = tset.Item1;
            var labels = tset.Item2;

            var teacher = new MulticlassSupportVectorLearning<Linear>();

            var svm = teacher.Learn(observations, labels);

            Assert.AreEqual(50, svm.Models[0][0].SupportVectors.Length);
            Assert.AreEqual(127, svm.Models[1][0].SupportVectors.Length);
            svm.Compress();
            Assert.AreEqual(1, svm.Models[0][0].SupportVectors.Length);
            Assert.AreEqual(1, svm.Models[1][0].SupportVectors.Length);

            {
                var trainingLoss = new ZeroOneLoss(labels)
                {
                    Mean = true
                };

                double error = trainingLoss.Loss(svm.Decide(observations));
                Assert.AreEqual(0.054, error);
            }

            {
                var vset = readData(validation);
                var validationData = vset.Item1;
                var validationLabels = vset.Item2;

                var validationLoss = new ZeroOneLoss(validationLabels)
                {
                    Mean = true
                };

                double val = validationLoss.Loss(svm.Decide(validationData));
                Assert.AreEqual(0.082, val);
            }
        }
Exemple #39
0
        /// <summary>
        ///   Calibrates the current Support Vector Machine to produce
        ///   probabilistic outputs using ProbabilisticOutputLearning.
        /// </summary>
        /// 
        private void btnRunCalibration_Click(object sender, EventArgs e)
        {
            if (ksvm == null)
            {
                MessageBox.Show("Please train the machines first.");
                return;
            }

            // Extract inputs and outputs
            int rows = dgvTrainingSource.Rows.Count;
            double[][] input = new double[rows][];
            int[] output = new int[rows];
            for (int i = 0; i < rows; i++)
            {
                input[i] = (double[])dgvTrainingSource.Rows[i].Cells["colTrainingFeatures"].Value;
                output[i] = (int)dgvTrainingSource.Rows[i].Cells["colTrainingLabel"].Value;
            }



            // Create the calibration algorithm using the training data
            var ml = new MulticlassSupportVectorLearning<IKernel>()
            {
                Model = ksvm,

                // Configure the calibration algorithm
                Learner = (p) => new ProbabilisticOutputCalibration<IKernel>()
                {
                    Model = p.Model
                }
            };


            lbStatus.Text = "Calibrating the classifiers. This may take a (very) significant amount of time...";
            Application.DoEvents();

            Stopwatch sw = Stopwatch.StartNew();

            // Train the machines. It should take a while.
            ml.Learn(input, output);

            sw.Stop();

            double error = new ZeroOneLoss(output).Loss(ksvm.Decide(input));

            lbStatus.Text = String.Format(
                "Calibration complete ({0}ms, {1}er). Click Classify to test the classifiers.",
                sw.ElapsedMilliseconds, error);

            btnClassifyVoting.Enabled = true;
        }
        public void ComputeTest3()
        {
            #region doc_multiclass
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            int[][] inputs =
            {
                //               input      output
                new int[] { 0, 1, 1, 0 }, //  0 
                new int[] { 0, 1, 0, 0 }, //  0
                new int[] { 0, 0, 1, 0 }, //  0
                new int[] { 0, 1, 1, 0 }, //  0
                new int[] { 0, 1, 0, 0 }, //  0
                new int[] { 1, 0, 0, 0 }, //  1
                new int[] { 1, 0, 0, 0 }, //  1
                new int[] { 1, 0, 0, 1 }, //  1
                new int[] { 0, 0, 0, 1 }, //  1
                new int[] { 0, 0, 0, 1 }, //  1
                new int[] { 1, 1, 1, 1 }, //  2
                new int[] { 1, 0, 1, 1 }, //  2
                new int[] { 1, 1, 0, 1 }, //  2
                new int[] { 0, 1, 1, 1 }, //  2
                new int[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Let us create a learning algorithm
            var learner = new NaiveBayesLearning();

            // and teach a model on the data examples
            NaiveBayes nb = learner.Learn(inputs, outputs);

            // Now, let's test  the model output for the first input sample:
            int answer = nb.Decide(new int[] { 0, 1, 1, 0 }); // should be 1
            #endregion

            double error = new ZeroOneLoss(outputs).Loss(nb.Decide(inputs));
            Assert.AreEqual(0, error);

            for (int i = 0; i < inputs.Length; i++)
            {
                error = nb.Compute(inputs[i]);
                double expected = outputs[i];
                Assert.AreEqual(expected, error);
            }
        }
Exemple #41
0
        /// <summary>
        ///   Creates a Support Vector Machine and estimate 
        ///   its parameters using a learning algorithm.
        /// </summary>
        /// 
        private void btnRunTraining_Click(object sender, EventArgs e)
        {
            if (dgvTrainingSource.Rows.Count == 0)
            {
                MessageBox.Show("Please load the training data before clicking this button");
                return;
            }

            lbStatus.Text = "Gathering data. This may take a while...";
            Application.DoEvents();



            // Extract inputs and outputs
            int rows = dgvTrainingSource.Rows.Count;
            double[][] input = new double[rows][];
            int[] output = new int[rows];
            for (int i = 0; i < rows; i++)
            {
                input[i] = (double[])dgvTrainingSource.Rows[i].Cells["colTrainingFeatures"].Value;
                output[i] = (int)dgvTrainingSource.Rows[i].Cells["colTrainingLabel"].Value;
            }

            // Create the chosen kernel function 
            // using the user interface parameters
            //
            IKernel kernel = createKernel();

            // Extract training parameters from the interface
            double complexity = (double)numComplexity.Value;
            double tolerance = (double)numTolerance.Value;
            int cacheSize = (int)numCache.Value;
            SelectionStrategy strategy = (SelectionStrategy)cbStrategy.SelectedItem;

            // Create the learning algorithm using the machine and the training data
            var ml = new MulticlassSupportVectorLearning<IKernel>()
            {
                // Configure the learning algorithm
                Learner = (param) => new SequentialMinimalOptimization<IKernel>()
                {
                    Complexity = complexity,
                    Tolerance = tolerance,
                    CacheSize = cacheSize,
                    Strategy = strategy,
                    Kernel = kernel
                }
            };


            lbStatus.Text = "Training the classifiers. This may take a (very) significant amount of time...";
            Application.DoEvents();

            Stopwatch sw = Stopwatch.StartNew();

            // Train the machines. It should take a while.
            ksvm = ml.Learn(input, output);

            // If we created a linear machine, compress the support vectors 
            // into one single parameter vector for increased performance:
            if (ksvm.Kernel is Linear)
            {
                ksvm.Compress();
            }

            sw.Stop();

            double error = new ZeroOneLoss(output)
            {
                Mean = true
            }.Loss(ksvm.Decide(input));


            lbStatus.Text = String.Format(
                "Training complete ({0}ms, {1}er). Click Classify to test the classifiers.",
                sw.ElapsedMilliseconds, error);

            // Update the interface status
            btnClassifyVoting.Enabled = true;
            btnClassifyElimination.Enabled = true;
            btnCalibration.Enabled = true;


            // Populate the information tab with the machines
            dgvMachines.Rows.Clear();
            int k = 1;
            for (int i = 0; i < 10; i++)
            {
                for (int j = 0; j < i; j++, k++)
                {
                    var machine = ksvm[i, j];

                    int sv = machine.SupportVectors == null ? 0 : machine.SupportVectors.Length;

                    int c = dgvMachines.Rows.Add(k, i + "-vs-" + j, sv, machine.Threshold);
                    dgvMachines.Rows[c].Tag = machine;
                }
            }

            // approximate size in bytes = 
            //   number of support vectors * number of doubles in a support vector * size of double
            int bytes = ksvm.SupportVectorUniqueCount * 1024 * sizeof(double);
            float megabytes = bytes / (1024 * 1024);
            lbSize.Text = String.Format("{0} ({1} MB)", ksvm.SupportVectorUniqueCount, megabytes);
        }
        public void learn_test()
        {
            #region doc_learn
            double[][] inputs = // Example XOR problem
            {
                new double[] { 0, 0 }, // 0 xor 0: 1 (label +1)
                new double[] { 0, 1 }, // 0 xor 1: 0 (label -1)
                new double[] { 1, 0 }, // 1 xor 0: 0 (label -1)
                new double[] { 1, 1 }  // 1 xor 1: 1 (label +1)
            };

            int[] outputs = // XOR outputs
            {
                1, 0, 0, 1
            };

            // Instantiate a new SMO learning algorithm for SVMs
            var smo = new SequentialMinimalOptimization<Gaussian>()
            {
                Kernel = new Gaussian(0.1),
                Complexity = 1.0
            };

            // Learn a SVM using the algorithm
            var svm = smo.Learn(inputs, outputs);

            // Predict labels for each input sample
            bool[] predicted = svm.Decide(inputs);

            // Compute classification error
            double error = new ZeroOneLoss(outputs).Loss(predicted);
            
            // Instantiate the probabilistic calibration (using Platt's scaling)
            var calibration = new ProbabilisticOutputCalibration<Gaussian>(svm);

            // Run the calibration algorithm
            calibration.Learn(inputs, outputs); // returns the same machine

            // Predict probabilities of each input sample
            double[] probabilities = svm.Probability(inputs);

            // Compute the error based on a hard decision
            double loss = new BinaryCrossEntropyLoss(outputs).Loss(probabilities);

            // Compute the decision output for one of the input vectors,
            // while also retrieving the probability of the answer

            bool decision;
            double probability = svm.Probability(inputs[0], out decision);
            #endregion

            // At this point, decision is +1 with a probability of 75%

            Assert.AreEqual(true, decision);
            Assert.AreEqual(0, error);
            Assert.AreEqual(5.5451735748925355, loss);
            Assert.AreEqual(0.74999975815069375, probability, 1e-10);
            Assert.IsTrue(svm.IsProbabilistic);
            Assert.AreEqual(-1.0986109988055595, svm.Weights[0]);
            Assert.AreEqual(1.0986109988055595, svm.Weights[1]);
            Assert.AreEqual(-1.0986109988055595, svm.Weights[2]);
            Assert.AreEqual(1.0986109988055595, svm.Weights[3]);
        }
Exemple #43
0
        public static void BuildDecisionTreeOnYearAndUser()
        {
            DataHandler.ImportReviewData(5);
            DataHandler.Reviews.Shuffle();
            DataTable data = new DataTable("Review Simple Input Data");

            data.Columns.Add("Year");
            data.Columns.Add("User");
            data.Columns.Add("Review");
            for (int i = 0; i < 1500; i++)
            {
                var      currentReview = DataHandler.Reviews[i];
                object[] values        = new object[3];
                values[0] = currentReview.reviewTime.Year;
                values[1] = currentReview.reviewerID;
                values[2] = currentReview.overall;

                data.Rows.Add(values);
            }

            // Create a new codification codebook to
            // convert strings into integer symbols
            var       codebook = new Codification(data, "Year", "User", "Review");
            DataTable symbols  = codebook.Apply(data, "Year", "User", "Review");

            int[][] inputs  = symbols.ToJagged <int>("Year", "User");
            int[]   outputs = symbols.ToArray <int>("Review");

            // Gather information about decision variables
            DecisionVariable[] attributes =
            {
                new DecisionVariable("Year", 7),  // 3 years
                new DecisionVariable("User", 18), // 18 possible users
            };

            // Create a new instance of the ID3 algorithm
            var id3learning = new ID3Learning(attributes);
            // Learn the training instances!
            DecisionTree tree = id3learning.Learn(inputs, outputs);

            // Compute the training error when predicting training instances
            double error = new ZeroOneLoss(outputs).Loss(tree.Decide(inputs));

            // The tree can now be queried for new examples through
            // its decide method. For example, we can create a query

            DataTable newData = new DataTable("Review Simple Input Data");

            newData.Columns.Add("Year");
            newData.Columns.Add("User");
            newData.Columns.Add("Review");
            for (int i = 1500; i < 2000; i++)
            {
                var      currentReview = DataHandler.Reviews[i];
                object[] values        = new object[3];
                values[0] = currentReview.reviewTime.Year;
                values[1] = currentReview.reviewerID;
                values[2] = currentReview.overall;

                newData.Rows.Add(values);
            }

            DataTable newSymbols = codebook.Apply(data, "Year", "User", "Review");

            int[][] newInputs  = newSymbols.ToJagged <int>("Year", "User");
            int[]   newOutputs = newSymbols.ToArray <int>("Review");

            int[] answers = tree.Decide(newInputs);

            ScatterplotBox.Show("Expected results", newOutputs.Select(i => (double)i).ToArray());
            ScatterplotBox.Show("Decision Tree results", newOutputs.Select(i => (double)i).ToArray())
            .Hold();
        }
        public void multiclass_linear_new_usage()
        {
            #region doc_learn_ldcd
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] inputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0 
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 1, 1, 1 }, //  2
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Create a one-vs-one multi-class SVM learning algorithm 
            var teacher = new MulticlassSupportVectorLearning<Linear>()
            {
                // using LIBLINEAR's L2-loss SVC dual for each SVM
                Learner = (p) => new LinearDualCoordinateDescent()
                {
                    Loss = Loss.L2
                }
            };

            // Configure parallel execution options
            teacher.ParallelOptions.MaxDegreeOfParallelism = 1;

            // Learn a machine
            var machine = teacher.Learn(inputs, outputs);

            // Obtain class predictions for each sample
            int[] predicted = machine.Decide(inputs);

            // Compute classification error
            double error = new ZeroOneLoss(outputs).Loss(predicted);
            #endregion

            Assert.AreEqual(0, error);
            Assert.IsTrue(predicted.IsEqual(outputs));
        }