Beispiel #1
0
        public override Task TrainAsync(ClassificationModel classificationModel)
        {
            int numFeatures = classificationModel.FeatureVectors.Count;

            double[][] input     = new double[numFeatures][];
            int[]      responses = new int[numFeatures];

            for (int featureIndex = 0;
                 featureIndex < classificationModel.FeatureVectors.Count;
                 ++featureIndex)
            {
                var featureVector = classificationModel.FeatureVectors[featureIndex];
                input[featureIndex]     = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue);
                responses[featureIndex] = (int)featureVector.FeatureClass;
            }

            NaiveBayesLearning <NormalDistribution> learning = new NaiveBayesLearning <NormalDistribution>();

            return(Task.Factory.StartNew(() =>
            {
                learning.Options.InnerOption = new NormalOptions()
                {
                    Regularization = 1e-5
                };

                _bayes = learning.Learn(input, responses);
            }));
        }
        private static void BuildNBModel(double[][] trainInput, int[] trainOutput, double[][] testInput, int[] testOutput)
        {
            var teacher = new NaiveBayesLearning <NormalDistribution>();
            var nbModel = teacher.Learn(trainInput, trainOutput);

            int[] inSamplePreds  = nbModel.Decide(trainInput);
            int[] outSamplePreds = nbModel.Decide(testInput);

            // Accuracy
            double inSampleAccuracy  = 1 - new ZeroOneLoss(trainOutput).Loss(inSamplePreds);
            double outSampleAccuracy = 1 - new ZeroOneLoss(testOutput).Loss(outSamplePreds);

            Console.WriteLine("* In-Sample Accuracy: {0:0.0000}", inSampleAccuracy);
            Console.WriteLine("* Out-of-Sample Accuracy: {0:0.0000}", outSampleAccuracy);

            // Build confusion matrix
            int[][] confMatrix = BuildConfusionMatrix(
                testOutput, outSamplePreds, 10
                );
            System.IO.File.WriteAllLines(
                Path.Combine(
                    @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.8\input-data",
                    "nb-conf-matrix.csv"
                    ),
                confMatrix.Select(x => String.Join(",", x))
                );

            // Precision Recall
            PrintPrecisionRecall(confMatrix);
            DrawROCCurve(testOutput, outSamplePreds, 10, "NB");
        }
Beispiel #3
0
        private void DoBayesAnalyze()
        {
            Codification codebook = new Codification(specialValueColumnNames, data);

            int[][] symbols = codebook.Transform(data);
            int[][] inputs  = symbols.Get(null, 0, -1);
            int[]   outputs = symbols.GetColumn(-1);

            // Create a new Naive Bayes learning
            var        learner       = new NaiveBayesLearning();
            NaiveBayes nb            = learner.Learn(inputs, outputs);
            var        currentSprint = App.GetReleaseScrumData().CurrentSprintProxy.CurrentSprint;

            foreach (var story in currentSprint.Stories)
            {
                if (excludeOwners.Contains(story.Owner))
                {
                    story.PredicateSuccessRate = noResultReasonExcludeUser;
                    continue;
                }
                if (story.Size <= 0)
                {
                    story.PredicateSuccessRate = noResultReasonStorySize0;
                    continue;
                }

                int[]    storyInstance = codebook.Transform(new string[] { story.Size.ToString(), story.Owner });
                double[] probs         = nb.Probabilities(storyInstance);
                var      sucessRate    = probs[0] * 100;
                var      pSuccessRate  = string.Format("{0:N2}", sucessRate);
                story.PredicateSuccessRate = string.Format("{0}%", pSuccessRate);
            }
        }
Beispiel #4
0
        public void nativeBayesValidation()
        {
            var        learn = new NaiveBayesLearning();
            NaiveBayes nb    = learn.Learn(inputsInt, outputs);

            var cv = CrossValidation.Create(
                k: 3,

                learner: (p) => new NaiveBayesLearning(),

                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),

                x: inputsInt, y: outputs
                );

            var result = cv.Learn(inputsInt, outputs);

            int numberOfSamples = result.NumberOfSamples;
            int numberOfInputs  = result.NumberOfInputs;
            int numberOfOutputs = result.NumberOfOutputs;

            double trainingError       = result.Training.Mean;
            double validationError     = result.Validation.Mean;
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(inputsInt, outputs);
            double accuracy            = gcm.Accuracy;

            message += "Native Bayes Validacja\n";
            message += "trainingError " + trainingError.ToString() + "\n";
            message += "validationError " + validationError.ToString() + "\n";
            message += "accuracy " + accuracy.ToString() + "\n\n";
        }
Beispiel #5
0
        private static void naiveBayes(double[][] inputs, int[] outputs)
        {
            // Create a new Naive-Bayes teaching algorithm with normal distributions.
            // Note: the generic parameters are optional, but they help increasing the
            // type safety of the teacher. For example, by specifying the NormalOptions
            // argument, we will be able to configure settings that are very specific
            // to normal distributions in the NaiveBayes teacher:
            var teacher = new NaiveBayesLearning <NormalDistribution, NormalOptions>()
            {
                Empirical = true // Estimate class priors from the data
            };

            // As mentioned above, because we specified the NormalOptions generic
            // parameter, now we can configure settings such as Gaussian regularization,
            // robustness and constraints (note: setting those is completely optional).
            teacher.Options.InnerOption.Regularization = 1e-10;
            teacher.Options.InnerOption.Diagonal       = false;

            // Learn the Naive Bayes classifier
            var nb = teacher.Learn(inputs, outputs);

            // Get predictions according to kNN
            int[] predicted = nb.Decide(inputs);

            // Create a confusion matrix to check the quality of the predictions:
            var cm = new ConfusionMatrix(predicted: predicted, expected: outputs);

            // Check the accuracy measure:
            double accuracy = cm.Accuracy; // (should be 1.0 or 100%)
        }
        private void Train(IEnumerable <RestaurantReview> trainingDataset)
        {
            // Our independant variable of the review text
            string[] inputs = trainingDataset.Select(x => x.Review).ToArray();

            // Our dependant variable is whether or not the review is positive
            int[] outputs = trainingDataset.Select(x => Convert.ToInt32(x.IsPositive)).ToArray();

            // Clean review text
            inputs = inputs.Select(this.CleanReview).ToArray();

            // Convert the reviews into a multidimensial array. Each review will contain the words of of the review
            // Also removes any punctation and other marks
            string[][] wordsPerReview = inputs.Tokenize();

            // Use the bag of words model to creates a sparse matrix that will say wether or not a review contains a certain word
            // All words will be added a column
            this._bagOfWordsModel = new BagOfWords();
            this._bagOfWordsModel.Learn(wordsPerReview);
            double[][] bagOfWordsResult = this._bagOfWordsModel.Transform(wordsPerReview);

            // Use the naive bayes algorithm for our text classification.
            NaiveBayesLearning <NormalDistribution> naiveBayesTeacher = new NaiveBayesLearning <NormalDistribution>();

            naiveBayesTeacher.Options.InnerOption = new NormalOptions()
            {
                Regularization = 1e-5 // to avoid zero variances exceptions
            };
            this._naiveBayesModel = naiveBayesTeacher.Learn(bagOfWordsResult, outputs);
        }
        /// <summary>
        /// Constructs a new NaiveBayes classification machine.
        /// </summary>
        public AINaiveBayes(string[][] data, List <string> columnList, string outputColumn)
        {
            // validation
            if (data == null || columnList == null || outputColumn == null)
            {
                throw new ArgumentNullException("Neither the input list nor the column list can be NULL");
            }

            // initialise seed value
            Generator.Seed = new Random().Next();

            // process input and output lists into arrays
            this.dataset      = data;
            this.Outputs      = columnList.ToArray();
            this.outputColumn = outputColumn;

            // Create a new codification codebook to
            // convert strings into discrete symbols
            this.codebook = new Codification(Outputs, this.dataset);

            // Extract input and output pairs to train
            int[][] symbols = this.codebook.Transform(this.dataset);
            this.inputs  = symbols.Get(null, 0, -1); // Gets all rows, from 0 to the last (but not the last)
            this.outputs = symbols.GetColumn(-1);    // Gets only the last column

            // Create a new Naive Bayes learning
            this.learner = new NaiveBayesLearning();

            // nulls
            testValue    = null;
            result       = null;
            probs        = null;
            this.learned = false;
        }
Beispiel #8
0
        public void gh_758()
        {
            // Let's say we have the following data to be classified into three
            // non -mutually-exclusive possible classes. Those are the samples:
            //
            int[][] inputs =
            {
                //               input         output
                new int[] { 0, 1, 1, 0 }, //  0
                new int[] { 0, 1, 0, 0 }, //  0
                new int[] { 0, 0, 1, 0 }, //  0
                new int[] { 0, 1, 1, 0 }, //  0, 1
                new int[] { 0, 1, 0, 0 }, //  0, 1
                new int[] { 1, 0, 0, 0 }, //     1
                new int[] { 1, 0, 0, 0 }, //     1
                new int[] { 1, 0, 0, 1 }, //     1, 2
                new int[] { 0, 0, 0, 1 }, //     1, 2
                new int[] { 0, 0, 0, 1 }, //     1, 2
                new int[] { 1, 1, 1, 1 }, //        2
                new int[] { 1, 0, 1, 1 }, //        2
                new int[] { 1, 1, 0, 1 }, //        2
                new int[] { 0, 1, 1, 1 }, //        2
                new int[] { 1, 1, 1, 1 }, //        2
            };

            int[][] outputs = // those are the class labels
            {
                new[] { 1, 0, 0 },
                new[] { 1, 0, 0 },
                new[] { 1, 0, 0 },
                new[] { 1, 1, 0 },
                new[] { 1, 1, 0 },
                new[] { 0, 1, 0 },
                new[] { 0, 1, 0 },
                new[] { 0, 1, 1 },
                new[] { 0, 1, 1 },
                new[] { 0, 1, 1 },
                new[] { 0, 0, 1 },
                new[] { 0, 0, 1 },
                new[] { 0, 0, 1 },
                new[] { 0, 0, 1 },
                new[] { 0, 0, 1 },
            };

            // Create a new Naive Bayes teacher
            var teacher = new NaiveBayesLearning();

            teacher.ParallelOptions.MaxDegreeOfParallelism = 1;

            var bayes = teacher.Learn(inputs, outputs);

            double[][] prediction = bayes.Probabilities(inputs);

            // Teach the Naive Bayes model. The error should be zero:
            double error = new BinaryCrossEntropyLoss(outputs).Loss(prediction);

            Assert.AreEqual(11.566909963298386, error, 1e-8);

            Assert.IsTrue(teacher.optimized);
        }
        public UsersClassificationService()
        {
            var learningSet = new List <Tuple <UserType, UserStatistics> >()
            {
                Tuple.Create(UserType.Average, new UserStatistics(3, 3, 1, 100.5)),
                Tuple.Create(UserType.Average, new UserStatistics(2, 2, 1, 100.5)),
                Tuple.Create(UserType.Average, new UserStatistics(3, 0, 4, 200.5)),
                Tuple.Create(UserType.Average, new UserStatistics(3, 1, 3, 175.0)),
                Tuple.Create(UserType.Average, new UserStatistics(1, 3, 1, 50.0)),
                Tuple.Create(UserType.Average, new UserStatistics(0, 1, 3, 100.5)),
                Tuple.Create(UserType.Good, new UserStatistics(4, 1, 7, 400.5)),
                Tuple.Create(UserType.Good, new UserStatistics(2, 5, 7, 330.7)),
                Tuple.Create(UserType.Good, new UserStatistics(5, 4, 6, 300.5)),
                Tuple.Create(UserType.Good, new UserStatistics(6, 6, 6, 200.5)),
                Tuple.Create(UserType.Good, new UserStatistics(4, 5, 6, 323.5)),
                Tuple.Create(UserType.Good, new UserStatistics(6, 3, 3, 350.5)),
            };

            var learner = new NaiveBayesLearning <NormalDistribution>();

            var inputs  = learningSet.Select(x => x.Item2.ToArray()).ToArray();
            var outputs = learningSet.Select(x => (int)x.Item1).ToArray();

            _naiveBayes = learner.Learn(inputs, outputs);
        }
        public void gh1056()
        {
            string basePath      = NUnit.Framework.TestContext.CurrentContext.WorkDirectory;
            string worksheetPath = Path.Combine(basePath, "Resources", "examples.xls");
            // https://github.com/accord-net/framework/issues/1056
            DataTable table = new ExcelReader(worksheetPath).GetWorksheet("Classification - Yin Yang");

            double[][] inputs  = table.ToJagged <double>("X", "Y");
            int[]      outputs = Classes.ToZeroOne(table.Columns["G"].ToArray <int>());

            var teacher = new NaiveBayesLearning <NormalDistribution>();

            var nb = teacher.Learn(inputs, outputs);

            int numberOfClasses = nb.NumberOfClasses;
            int numberOfInputs  = nb.NumberOfInputs;

            int[] answers = nb.Decide(inputs);

            var    cm  = ConfusionMatrix.Estimate(nb, inputs, outputs);
            double acc = cm.Accuracy;

            Assert.AreEqual(0.859999, cm.Accuracy, 1e-4);
            Assert.AreEqual(2, numberOfClasses);
            Assert.AreEqual(2, numberOfInputs);
        }
Beispiel #11
0
    //START
    private void Start()
    {
        BlockLearn = new NaiveBayesLearning();
        //Get scripts
        bossStats = GetComponent <Stats>();
        //Bullet pattern
        bulletPattern = GameObject.FindGameObjectWithTag("BossAbilities").GetComponent <BulletPattern>();

        //Populate dictionary
        GameObject[] positions = GameObject.FindGameObjectsWithTag("Orientation");
        foreach (var position in positions)
        {
            switch (position.name)
            {
            case "Top":
                orientationDictionary.Add(Orientation.Top, position.transform);
                break;

            case "Left":
                orientationDictionary.Add(Orientation.Left, position.transform);
                break;

            case "Right":
                orientationDictionary.Add(Orientation.Right, position.transform);
                break;
            }
        }
    }
Beispiel #12
0
        public void learn_test()
        {
            #region doc_learn
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] inputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 1, 1, 1 }, //  2
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Create a new Gaussian distribution naive Bayes learner
            var teacher = new NaiveBayesLearning <NormalDistribution>();

            // Set options for the component distributions
            teacher.Options.InnerOption = new NormalOptions
            {
                Regularization = 1e-5 // to avoid zero variances
            };

            // Learn the naive Bayes model
            NaiveBayes <NormalDistribution> bayes = teacher.Learn(inputs, outputs);

            // Use the model to predict class labels
            int[] predicted = bayes.Decide(inputs);

            // Estimate the model error. The error should be zero:
            double error = new ZeroOneLoss(outputs).Loss(predicted);

            // Now, let's test  the model output for the first input sample:
            int answer = bayes.Decide(new double[] { 1, 0, 0, 1 }); // should be 1
            #endregion

            Assert.AreEqual(0, error);
            Assert.AreEqual(1, answer);
            Assert.IsTrue(predicted.IsEqual(outputs));
        }
Beispiel #13
0
        public void learn_no_datatable()
        {
            #region doc_mitchell_no_datatable
            string[] columnNames = { "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis" };

            string[][] data =
            {
                new string[] { "Sunny",    "Hot",  "High",   "Weak",   "No"  },
                new string[] { "Sunny",    "Hot",  "High",   "Strong", "No"  },
                new string[] { "Overcast", "Hot",  "High",   "Weak",   "Yes" },
                new string[] { "Rain",     "Mild", "High",   "Weak",   "Yes" },
                new string[] { "Rain",     "Cool", "Normal", "Weak",   "Yes" },
                new string[] { "Rain",     "Cool", "Normal", "Strong", "No"  },
                new string[] { "Overcast", "Cool", "Normal", "Strong", "Yes" },
                new string[] { "Sunny",    "Mild", "High",   "Weak",   "No"  },
                new string[] { "Sunny",    "Cool", "Normal", "Weak",   "Yes" },
                new string[] { "Rain",     "Mild", "Normal", "Weak",   "Yes" },
                new string[] { "Sunny",    "Mild", "Normal", "Strong", "Yes" },
                new string[] { "Overcast", "Mild", "High",   "Strong", "Yes" },
                new string[] { "Overcast", "Hot",  "Normal", "Weak",   "Yes" },
                new string[] { "Rain",     "Mild", "High",   "Strong", "No"  },
            };

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(columnNames, data);

            // Extract input and output pairs to train
            int[][] symbols = codebook.Transform(data);
            int[][] inputs  = symbols.Get(null, 0, -1); // Gets all rows, from 0 to the last (but not the last)
            int[]   outputs = symbols.GetColumn(-1);    // Gets only the last column

            // Create a new Naive Bayes learning
            var learner = new NaiveBayesLearning();

            NaiveBayes nb = learner.Learn(inputs, outputs);

            // Consider we would like to know whether one should play tennis at a
            // sunny, cool, humid and windy day. Let us first encode this instance
            int[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong");

            // Let us obtain the numeric output that represents the answer
            int c = nb.Decide(instance); // answer will be 0

            // Now let us convert the numeric output to an actual "Yes" or "No" answer
            string result = codebook.Translate("PlayTennis", c); // answer will be "No"

            // We can also extract the probabilities for each possible answer
            double[] probs = nb.Probabilities(instance); // { 0.795, 0.205 }
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, probs[0], 1e-3);
            Assert.AreEqual(0.205, probs[1], 1e-3);
            Assert.AreEqual(1, probs.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(probs[0]));
            Assert.AreEqual(2, probs.Length);
        }
Beispiel #14
0
        public void no_sample_test()
        {
            // Declare some boolean data
            bool[,] source =
            {
                // v1,v2,v3,v4,v5,v6,v7,v8,result
                { true,  true,  false, true,  true,  false, false, false, false },
                { true,  true,  true,  true,  true,  false, false, false, false },
                { true,  false, true,  true,  true,  false, false, true,  false },
                { true,  true,  true,  true,  true,  false, false, true,  false },
                { false, false, true,  true,  true,  false, false, true,  false },
                { true,  true,  true,  true,  false, false, false, false, false },
                { false, true,  true,  false, true,  false, false, false, false },
                { true,  true,  true,  false, true,  false, false, false, false },
                { false, true,  true,  false, true,  false, false, true,  false },
                { false, true,  true,  true,  true,  false, false, true,  false },
                { false, true,  true,  false, false, false, false, false, false },
                { true,  false, false, true,  false, false, false, true,  true  },
                { true,  true,  false, true,  false, false, false, true,  true  },
                { true,  true,  true,  true,  false, false, false, true,  true  },
                { false, true,  true,  true,  false, true,  true,  true,  true  },
                { true,  true,  false, false, false, true,  true,  true,  true  },
                { false, true,  false, false, false, true,  true,  true,  true  },
                { true,  true,  true,  true,  false, true,  true,  true,  true  },
                { false, false, false, false, false, true,  true,  true,  true  },
                { true,  true,  false, true,  false, true,  true,  true,  true  },
                { false, true,  false, true,  false, true,  true,  true,  true  },
                { false, true,  true,  false, false, true,  true,  true,  true  },
            };

            // Evaluation of a single point
            int[] sp = new[] { false, false, false, false, true, true, true, true }.ToInt32();


            // Transform to integers, then to jagged (matrix with [][] instead of [,])
            int[][] data = source.ToInt32().ToJagged();

            // Classification setup
            var inputs  = data.Get(null, 0, 8); // select all rows, with cols 0 to 8
            var outputs = data.GetColumn(8);    // select last column

            var learner2 = new NaiveBayesLearning <GeneralDiscreteDistribution, GeneralDiscreteOptions, int>();

            learner2.Options.InnerOption.UseLaplaceRule = true;
            learner2.Distribution = (i, j) => new GeneralDiscreteDistribution(symbols: 2);
            learner2.ParallelOptions.MaxDegreeOfParallelism = 1;
            var nb2 = learner2.Learn(inputs, outputs);

            test(nb2, inputs, sp);


            var learner1 = new NaiveBayesLearning();

            learner1.Options.InnerOption.UseLaplaceRule     = true;
            learner2.ParallelOptions.MaxDegreeOfParallelism = 1;
            var nb1 = learner1.Learn(inputs, outputs);

            test(nb1, inputs, sp);
        }
Beispiel #15
0
        public void buildModel()
        {
            // Now, let's create the forest learning algorithm
            var teacher = new NaiveBayesLearning <NormalDistribution>();

            // Finally, learn a random forest from data
            this.nb = teacher.Learn(inputs, outputs);
        }
Beispiel #16
0
        public void runNaiveBayes()
        {
            codebook = new Codification(trainingData, "Feature1", "Feature2", "GeneratedByProgram");


            //  Training data to symbol
            DataTable trainingsymbols = codebook.Apply(trainingData);;

            int[][] trainingInputs  = trainingsymbols.ToJagged <int>("Feature1", "Feature2");
            int[]   trainingOutputs = trainingsymbols.ToArray <int>("GeneratedByProgram");

            // Create a new Naive Bayes learning
            var learner = new NaiveBayesLearning();

            learner.Options.InnerOption.UseLaplaceRule = true;

            // We learn the algorithm:
            NaiveBayes nb = learner.Learn(trainingInputs, trainingOutputs);

            DataTable testdata = new DataTable("Sample Data");

            testdata.Columns.Add("Feature1", "Feature2", "GeneratedByProgram");

            testdata.Rows.Add("This", " is real", "No");
            testdata.Rows.Add("a", "8", "Yes");
            testdata.Rows.Add("b", "2000", "Yes");
            testdata.Rows.Add("a", "9", "Yes");
            testdata.Rows.Add("a", "90", "Yes");
            testdata.Rows.Add("a", "12", "Yes");
            testdata.Rows.Add("b", "15", "Yes");
            testdata.Rows.Add("b", "18", "Yes");
            testdata.Rows.Add("b", "200", "Yes");
            testdata.Rows.Add("a", "5", "Yes");
            testdata.Rows.Add("a", "62", "Yes");
            testdata.Rows.Add("b", "5000", "Yes");
            testdata.Rows.Add("b", "17", "Yes");
            testdata.Rows.Add("b", "62", "Yes");
            testdata.Rows.Add("b", "90", "Yes");
            testdata.Rows.Add("b", "123", "Yes");
            testdata.Rows.Add("This", " is Ok", "Yes");
            testdata.Rows.Add("b", "1", "Yes");
            testdata.Rows.Add("b", "64", "Yes");
            testdata.Rows.Add("I ", "am god", "No");
            testdata.Rows.Add("b", "33", "Yes");

            String[] inst = { "b", "15" };
            testForInstance(nb, inst, "Yes");

            DataTable testsymbols = codebook.Apply(testdata);

            int[][] testInput  = testsymbols.ToJagged <int>("Feature1", "Feature2");
            int[]   testOutput = testsymbols.ToArray <int>("GeneratedByProgram");
            int[]   answers    = nb.Decide(testInput);


            Console.WriteLine("\n Accuracy (Tested on 20 data set): " + calculateAccuracy(answers, testOutput));
        }
Beispiel #17
0
        private NaiveBayes <NormalDistribution> LearnNB(double[][] XKnownTrainSet, int[] YKnownTrainSet)
        {
            var NBLearning = new NaiveBayesLearning <NormalDistribution>();

            NBLearning.Options.InnerOption = new NormalOptions {
                Regularization = 1e-6,
            };

            return(NBLearning.Learn(XKnownTrainSet, YKnownTrainSet));
        }
        //Make this class a singleton so that it is not retrained for every class it is used by
        private TextAnalyzer()
        {
            //Usage of a Naive Bayes classifier
            //Create the trainer, allowing for some regularlizatiton
            var teacher = new NaiveBayesLearning <NormalDistribution, NormalOptions>()
            {
                Options = { InnerOption = { Regularization = 1e-6 } }
            };

            //Read in the training data and stop words
            string liberalTrainingPath      = System.Web.Hosting.HostingEnvironment.MapPath(@"~/Data/liberal_training.txt");
            string conservativeTrainingPath = System.Web.Hosting.HostingEnvironment.MapPath(@"~/Data/conservative_training.txt");
            string stopWordsPath            = System.Web.Hosting.HostingEnvironment.MapPath(@"~/Data/stop_words.txt");

            string[] liberalSamples      = File.ReadAllLines(liberalTrainingPath);
            string[] conservativeSamples = File.ReadAllLines(conservativeTrainingPath);
            stopWords = File.ReadAllLines(stopWordsPath);

            //Concat the samples into one array (They are first read into their own array to allow us to know the amount of samples in each file)
            string[] samples = liberalSamples.Concat(conservativeSamples).ToArray();

            //Break the text up into individual words
            string[][] words = samples.Tokenize();

            //If for some reason we didn't actually read any training data, throw an exception cuz the classifier wont work
            if (words.Length == 0)
            {
                throw new Exception("No training data for TextAnalyzer");
            }

            //Remove common english words
            words = TrimStopWords(words);

            //Create a bag of words using the tokenized sample data
            bagOfWords = new BagOfWords();
            bagOfWords.Learn(words);

            //Populate the output array using the known lengths of the sample files
            int[] outputs = new int[samples.Length];
            for (int i = 0; i < samples.Length; i++)
            {
                if (i < liberalSamples.Length)
                {
                    outputs[i] = 0;
                }
                else
                {
                    outputs[i] = 1;
                }
            }

            //Train the classifier
            double[][] inputs = bagOfWords.Transform(words);
            nbClassifier = teacher.Learn(inputs, outputs);
        }
        public void Learn()
        {
            DataTable symbols = codeBook.Apply(data);

            int[][] inputs  = symbols.ToJagged <int>(headers);
            int[]   outputs = symbols.ToArray <int>(headerToPredict);

            var learner = new NaiveBayesLearning();

            nativeBayes = learner.Learn(inputs, outputs);
        }
Beispiel #20
0
        public void ComputeTest3()
        {
            #region doc_multiclass
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            int[][] inputs =
            {
                //               input      output
                new int[] { 0, 1, 1, 0 }, //  0
                new int[] { 0, 1, 0, 0 }, //  0
                new int[] { 0, 0, 1, 0 }, //  0
                new int[] { 0, 1, 1, 0 }, //  0
                new int[] { 0, 1, 0, 0 }, //  0
                new int[] { 1, 0, 0, 0 }, //  1
                new int[] { 1, 0, 0, 0 }, //  1
                new int[] { 1, 0, 0, 1 }, //  1
                new int[] { 0, 0, 0, 1 }, //  1
                new int[] { 0, 0, 0, 1 }, //  1
                new int[] { 1, 1, 1, 1 }, //  2
                new int[] { 1, 0, 1, 1 }, //  2
                new int[] { 1, 1, 0, 1 }, //  2
                new int[] { 0, 1, 1, 1 }, //  2
                new int[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Let us create a learning algorithm
            var learner = new NaiveBayesLearning();

            // and teach a model on the data examples
            NaiveBayes nb = learner.Learn(inputs, outputs);

            // Now, let's test  the model output for the first input sample:
            int answer = nb.Decide(new int[] { 0, 1, 1, 0 }); // should be 1
            #endregion

            double error = new ZeroOneLoss(outputs).Loss(nb.Decide(inputs));
            Assert.AreEqual(0, error);

            for (int i = 0; i < inputs.Length; i++)
            {
                error = nb.Compute(inputs[i]);
                double expected = outputs[i];
                Assert.AreEqual(expected, error);
            }
        }
Beispiel #21
0
        public override Task <List <GeneralConfusionMatrix> > ComputeFoldedConfusionMatrixAsync(ClassificationModel classificationModel, int folds)
        {
            return(Task.Factory.StartNew(() =>
            {
                int numFeatures = classificationModel.FeatureVectors.Count;

                double[][] input = new double[numFeatures][];
                int[] responses = new int[numFeatures];

                for (int featureIndex = 0; featureIndex < classificationModel.FeatureVectors.Count; ++featureIndex)
                {
                    var featureVector = classificationModel.FeatureVectors[featureIndex];

                    input[featureIndex] = Array.ConvertAll(featureVector.FeatureVector.BandIntensities, s => (double)s / ushort.MaxValue);
                    responses[featureIndex] = featureVector.FeatureClass;
                }

                List <GeneralConfusionMatrix> confusionMatrices = new List <GeneralConfusionMatrix>();

                // Create a new Cross-validation algorithm passing the data set size and the number of folds
                var crossvalidation = new CrossValidation(input.Length, folds);

                crossvalidation.Fitting = delegate(int k, int[] indicesTrain, int[] indicesValidation)
                {
                    // Lets now grab the training data:
                    var trainingInputs = input.Get(indicesTrain);
                    var trainingOutputs = responses.Get(indicesTrain);

                    // And now the validation data:
                    var validationInputs = input.Get(indicesValidation);
                    var validationOutputs = responses.Get(indicesValidation);

                    NaiveBayesLearning <NormalDistribution> learning = new NaiveBayesLearning <NormalDistribution>();
                    var bayes = learning.Learn(trainingInputs, trainingOutputs);

                    var predictedTraining = bayes.Decide(trainingInputs);
                    var predictedValidation = bayes.Decide(validationInputs);

                    double trainingError = new ZeroOneLoss(trainingOutputs).Loss(predictedTraining);
                    double validationError = new ZeroOneLoss(validationOutputs).Loss(predictedValidation);

                    GeneralConfusionMatrix confusionMatrix = new GeneralConfusionMatrix(Enum.GetValues(typeof(LandcoverTypeViewModel)).Length - 1, validationOutputs, predictedValidation);
                    confusionMatrices.Add(confusionMatrix);

                    // Return a new information structure containing the model and the errors achieved.
                    return new CrossValidationValues(trainingError, validationError);
                };

                crossvalidation.Compute();

                return confusionMatrices;
            }));
        }
Beispiel #22
0
        private void btLearn_Click(object sender, EventArgs e)
        {
            mDT = (DataTable)dataGridView1.DataSource;

            double[][] inputs = mDT.ToJagged <double>("Height", "Weight", "FootSize");
            int[]      output = mDT.ToArray <int>("Sex");

            var learner = new NaiveBayesLearning <NormalDistribution, NormalOptions>();

            learner.Options.InnerOption.Regularization = 1e-5;

            nb = learner.Learn(inputs, output);
        }
Beispiel #23
0
        private void Train(IEnumerable <SubstanceData> trainingDataset)
        {
            double[][] inputs = trainingDataset.Select(x => new double[] { x.Type, x.Concentration }).ToArray();

            int[] outputs = trainingDataset.Select(x => x.IsDangerous ? 1 : 0).ToArray();

            NaiveBayesLearning <NormalDistribution> naiveBayesTeacher = new NaiveBayesLearning <NormalDistribution>();

            naiveBayesTeacher.Options.InnerOption = new NormalOptions()
            {
                Regularization = 1e-5
            };
            this._naiveBayesModel = naiveBayesTeacher.Learn(inputs, outputs);
        }
Beispiel #24
0
        public NaiveBayes <NormalDistribution> MachineLearning()
        {
            var teacher = new NaiveBayesLearning <NormalDistribution>();

            // Set options for the component distributions
            teacher.Options.InnerOption = new NormalOptions
            {
                Regularization = 1e-5 // to avoid zero variances
            };

            // Learn the naive Bayes model
            NaiveBayes <NormalDistribution> bayes = teacher.Learn(DataTrainInput, DataTrainOutput);

            return(bayes);
        }
        public void should_not_allow_negative_classes()
        {
            double[][] inputs =
            {
                new double[] { 0, 0 },
                new double[] { 0, 1 },
                new double[] { 1, 0 },
                new double[] { 1, 1 },
            };

            int[] outputs = { -1, 1, 1, -1 };

            var teacher = new NaiveBayesLearning <NormalDistribution>();

            Assert.Throws <ArgumentException>(() => teacher.Learn(inputs, outputs));
        }
        public TrainerHelper Train(System.Data.DataTable table, string columnName)
        {
            var container            = new TrainerHelper();
            var trainingCodification = new Codification()
            {
                DefaultMissingValueReplacement = Double.NaN
            };

            trainingCodification.Learn(table);
            DataTable symbols = trainingCodification.Apply(table);

            container.columnNamesArray =
                table.Columns.Cast <DataColumn>().Select(x => x.ColumnName).Where(s => s != columnName).ToArray();

            var columnOrdinal = table.Columns[columnName].Ordinal;

            double[][] tempInputs = symbols.ToJagged(container.columnNamesArray);
            double[][] inputs     = new double[tempInputs.Length][];
            for (var i = 0; i < tempInputs.Length; i++)
            {
                var flattened = this.ExpandRow(trainingCodification, tempInputs[i], columnOrdinal);
                inputs[i] = flattened;
            }


            int[] outputs = symbols.ToArray <int>(columnName);

            var teacher = new NaiveBayesLearning <NormalDistribution>();

            // Set options for the component distributions
            teacher.Options.InnerOption = new NormalOptions
            {
                Regularization = 1e-5 // to avoid zero variances
            };

            if (inputs.Length > 0)
            {
                NaiveBayes <NormalDistribution> learner = teacher.Learn(inputs, outputs);
                container.trainer = learner;
            }

            //var lbnr = new LowerBoundNewtonRaphson() { MaxIterations = 100, Tolerance = 1e-6 };
            //var mlr = lbnr.Learn(inputs, outputs);
            container.codification = trainingCodification;
            container.symbols      = symbols;
            return(container);
        }
Beispiel #27
0
        public void  Train(List <TrainingValue> trainingData)
        {
            List <DecisionVariable> trainingVariables = new List <DecisionVariable>();

            for (int i = 0; i < featureSize; i++)
            {
                trainingVariables.Add(DecisionVariable.Continuous(i.ToString()));
            }

            tree = new DecisionTree(inputs: trainingVariables, classes: 2);


            double[][] featuresArray = new double[trainingData.Count][];
            int[]      labels        = new int[trainingData.Count];

            for (int i = 0; i < featuresArray.Length; i++)
            {
                featuresArray[i] = trainingData[i].Features;
                labels[i]        = Convert.ToInt32(trainingData[i].State);
            }

            switch (type)
            {
            case ClassifierType.DecisionTree:
                C45Learning teacher = new C45Learning(tree);
                teacher.Learn(featuresArray, labels);
                break;

            case ClassifierType.LDA:
                LinearDiscriminantAnalysis lda = new LinearDiscriminantAnalysis();
                pipeline = lda.Learn(featuresArray, labels);
                break;

            case ClassifierType.SVM:
                LinearCoordinateDescent svmLearner = new LinearCoordinateDescent();
                svm = svmLearner.Learn(featuresArray, labels);
                break;

            case ClassifierType.Bayes:
                NaiveBayesLearning <NormalDistribution> learner = new NaiveBayesLearning <NormalDistribution>();
                bayes = learner.Learn(featuresArray, labels);
                break;
            }

            Trained = true;
        }
Beispiel #28
0
        public NaiveBayesClassifier()
        {
            Name           = "Naive Bayes Classifier";
            Type           = AlgorithmType.Classifier;
            IsTrained      = false;
            PredictionType = typeof(string[]);
            ResultType     = typeof(string);
            Dataset        = null;
            Outputs        = null;
            TestValue      = null;
            Result         = null;
            Probabilities  = null;

            // initialise seed value for Accord framework
            Generator.Seed = new Random().Next();

            // Create a new Naive Bayes learner
            this.learner = new NaiveBayesLearning();
        }
Beispiel #29
0
        public override void DoTraining(DocumentSetCaseCollectionSet trainingSet, classifierTools tools, ILogBuilder logger)
        {
            var state = states.SetState(trainingSet, GetExperimentSufix());

            if (isMultinominal)
            {
                NaiveBayesLearning <GeneralizedBetaDistribution> teacher = new NaiveBayesLearning <GeneralizedBetaDistribution>();

                // Set options for the component distributions
                teacher.Options.InnerOption = new NormalOptions
                {
                    Regularization = 1e-5 // to avoid zero variances
                };

                // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization
                teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism)
                _teacher = teacher;

                // Learn a machine
                //  state.machine = teacher.Learn(state.data.inputs, state.data.outputs);
            }
            else
            {
                NaiveBayesLearning <NormalDistribution> teacher = new NaiveBayesLearning <NormalDistribution>();

                // Set options for the component distributions
                teacher.Options.InnerOption = new NormalOptions
                {
                    Regularization = 1e-5 // to avoid zero variances
                };

                // The following line is only needed to ensure reproducible results. Please remove it to enable full parallelization
                teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism)
                _teacher = teacher;

                // Learn a machine
                state.machine = teacher.Learn(state.data.inputs, state.data.outputs);
            }


            state.SaveState();
        }
        public static void learnNaiveBayes(double percent, string newsType, Guid companyId)
        {
            try
            {
                var classNames = new string[] { "Percent", "NewsType", "CompanyId" };

                DataTable dt = new DataTable();
                dt.Columns.Add("Percent");
                dt.Columns.Add("NewsType");
                dt.Columns.Add("CompanyId");

                DataRow dr = dt.NewRow();
                dr["Percent"]   = percent.ToString();
                dr["NewsType"]  = newsType;
                dr["CompanyId"] = companyId.ToString();
                dt.Rows.Add(dr);

                // Creates a matrix from the source data table
                //double[,] table = dt.ToMatrix(out columnNames);

                // Get only the input vector values
                //double[][] inputs = table.GetColumns(0, 1).ToJagged();

                // Get only the label outputs
                // int[] outputs = table.GetColumn(2).ToInt32();
                if (codebook == null)
                {
                    codebook = new Codification(dt, "NewsType", "CompanyId", "Percent");
                }
                DataTable symbols = codebook.Apply(dt);
                int[][]   inputs  = symbols.ToJagged <int>("NewsType", "CompanyId");
                int[]     outputs = symbols.ToArray <int>("Percent");
                //var teacher = new NaiveBayesLearning<NormalDistribution>();
                var teacher = new NaiveBayesLearning();

                naiveBayes = teacher.Learn(inputs, outputs);
            }
            catch (Exception e)
            {
            }
        }
        public void learn_test_with_options()
        {
            #region doc_learn_options
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] inputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0 
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 1, 1, 1 }, //  2
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Create a new Gaussian distribution naive Bayes learner
            var teacher = new NaiveBayesLearning<NormalDistribution, NormalOptions>();

            // Set options for the component distributions
            teacher.Options.InnerOption.Regularization = 1e-5; // to avoid zero variances

            // Learn the naive Bayes model
            NaiveBayes<NormalDistribution> bayes = teacher.Learn(inputs, outputs);

            // Use the model to predict class labels
            int[] predicted = bayes.Decide(inputs);

            // Estimate the model error. The error should be zero:
            double error = new ZeroOneLoss(outputs).Loss(predicted);

            // Now, let's test  the model output for the first input sample:
            int answer = bayes.Decide(new double[] { 1, 0, 0, 1 }); // should be 1
            #endregion

            Assert.AreEqual(0, error);
            Assert.AreEqual(1, answer);
            Assert.IsTrue(predicted.IsEqual(outputs));
        }
Beispiel #32
0
        public void ComputeTest()
        {
            #region doc_mitchell
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");
            #endregion

            #region doc_codebook
            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data,
                "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // Extract input and output pairs to train
            DataTable symbols = codebook.Apply(data);
            int[][] inputs = symbols.ToArray<int>("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");
            #endregion

            #region doc_learn
            // Create a new Naive Bayes learning
            var learner = new NaiveBayesLearning();

            // Learn a Naive Bayes model from the examples
            NaiveBayes nb = learner.Learn(inputs, outputs);
            #endregion


            #region doc_test
            // Consider we would like to know whether one should play tennis at a
            // sunny, cool, humid and windy day. Let us first encode this instance
            int[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong");

            // Let us obtain the numeric output that represents the answer
            int c = nb.Decide(instance); // answer will be 0

            // Now let us convert the numeric output to an actual "Yes" or "No" answer
            string result = codebook.Translate("PlayTennis", c); // answer will be "No"

            // We can also extract the probabilities for each possible answer
            double[] probs = nb.Probabilities(instance); // { 0.795, 0.205 }
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, probs[0], 1e-3);
            Assert.AreEqual(0.205, probs[1], 1e-3);
            Assert.AreEqual(1, probs.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(probs[0]));
            Assert.AreEqual(2, probs.Length);
        }
Beispiel #33
0
        public void laplace_smoothing_missing_sample()
        {
            #region doc_laplace
            // To test the effectiveness of the Laplace rule for when
            // an example of a symbol is not present in the training set,
            // lets create dataset where the second column could contain
            // values 0, 1 or 2 but only actually contains examples with
            // containing 1 and 2:

            int[][] inputs =
            {
                //      input     output
                new [] { 0, 1 }, //  0 
                new [] { 0, 2 }, //  0
                new [] { 0, 1 }, //  0
                new [] { 1, 2 }, //  1
                new [] { 0, 2 }, //  1
                new [] { 0, 2 }, //  1
                new [] { 1, 1 }, //  2
                new [] { 0, 1 }, //  2
                new [] { 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 1, 1, 1, 2, 2, 2, 
            };

            // Since the data is not enough to determine which symbols we are
            // expecting in our model, we will have to specify the model by
            // hand. The first column can assume 2 different values, whereas
            // the third column can assume 3:
            var bayes = new NaiveBayes(classes: 3, symbols: new[] { 2, 3 });

            // Now we can create a learning algorithm
            var learning = new NaiveBayesLearning()
            {
                Model = bayes
            };

            // Enable the use of the Laplace rule
            learning.Options.InnerOption.UseLaplaceRule = true;

            // Learn the Naive Bayes model
            learning.Learn(inputs, outputs);

            // Estimate a sample with 0 in the second col
            int answer = bayes.Decide(new int[] { 0, 1 });
            #endregion

            Assert.AreEqual(0, answer);

            double prob = bayes.Probability(new int[] { 0, 1 }, out answer);
            Assert.AreEqual(0, answer);
            Assert.AreEqual(0.52173913043478259, prob, 1e-10);

            double error = new ZeroOneLoss(outputs)
            {
                Mean = true
            }.Loss(bayes.Decide(inputs));

            Assert.AreEqual(2 / 9.0, error);
        }
Beispiel #34
0
        public void ComputeTest3()
        {
            #region doc_multiclass
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            int[][] inputs =
            {
                //               input      output
                new int[] { 0, 1, 1, 0 }, //  0 
                new int[] { 0, 1, 0, 0 }, //  0
                new int[] { 0, 0, 1, 0 }, //  0
                new int[] { 0, 1, 1, 0 }, //  0
                new int[] { 0, 1, 0, 0 }, //  0
                new int[] { 1, 0, 0, 0 }, //  1
                new int[] { 1, 0, 0, 0 }, //  1
                new int[] { 1, 0, 0, 1 }, //  1
                new int[] { 0, 0, 0, 1 }, //  1
                new int[] { 0, 0, 0, 1 }, //  1
                new int[] { 1, 1, 1, 1 }, //  2
                new int[] { 1, 0, 1, 1 }, //  2
                new int[] { 1, 1, 0, 1 }, //  2
                new int[] { 0, 1, 1, 1 }, //  2
                new int[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Let us create a learning algorithm
            var learner = new NaiveBayesLearning();

            // and teach a model on the data examples
            NaiveBayes nb = learner.Learn(inputs, outputs);

            // Now, let's test  the model output for the first input sample:
            int answer = nb.Decide(new int[] { 0, 1, 1, 0 }); // should be 1
            #endregion

            double error = new ZeroOneLoss(outputs).Loss(nb.Decide(inputs));
            Assert.AreEqual(0, error);

            for (int i = 0; i < inputs.Length; i++)
            {
                error = nb.Compute(inputs[i]);
                double expected = outputs[i];
                Assert.AreEqual(expected, error);
            }
        }
Beispiel #35
0
        /// <summary>
        ///   Creates and learns a Naive Bayes classifier to recognize
        ///   the previously loaded dataset using the current settings.
        /// </summary>
        /// 
        private void btnCreate_Click(object sender, EventArgs e)
        {
            if (dgvLearningSource.DataSource == null)
            {
                MessageBox.Show("Please load some data first.");
                return;
            }

            classNames = new string[] { "G1", "G2" };


            // Finishes and save any pending changes to the given data
            dgvLearningSource.EndEdit();

            // Creates a matrix from the source data table
            double[,] table = (dgvLearningSource.DataSource as DataTable).ToMatrix(out columnNames);

            // Get only the input vector values
            double[][] inputs = table.Submatrix(null, 0, 1).ToArray();

            // Get only the label outputs
            int[] outputs = table.GetColumn(2).ToInt32();
            string[] colNames = columnNames.Submatrix(first: 2);

            // Create the Bayes classifier and perform classification
            var teacher = new NaiveBayesLearning<NormalDistribution>();

            // Estimate the model using the data
            bayes = teacher.Learn(inputs, outputs);

            // Show the estimated distributions and class probabilities
            dataGridView1.DataSource = new ArrayDataView(bayes.Distributions, colNames);


            // Generate samples for class 1
            var x1 = bayes.Distributions[0, 0].Generate(1000);
            var y1 = bayes.Distributions[0, 1].Generate(1000);

            // Generate samples for class 2
            var x2 = bayes.Distributions[1, 0].Generate(1000);
            var y2 = bayes.Distributions[1, 1].Generate(1000);

            // Combine in a single graph
            double[,] w1 = Matrix.Stack(x1, y1).Transpose();
            double[,] w2 = Matrix.Stack(x2, y2).Transpose();

            double[] z = Vector.Ones(2000);
            for (int i = 0; i < 1000; i++) 
                z[i] = 0;

            var a = Matrix.Stack<double>(new double[][,] { w1, w2 });
            var graph = a.Concatenate(z);

            CreateScatterplot(zedGraphControl2, graph);


            lbStatus.Text = "Classifier created! See the other tabs for details!";
        }
        public void learn_test_mitchell()
        {
            #region doc_mitchell_1
            // We will represent Mitchell's Tennis example using a DataTable. However,
            // the use of a DataTable is not required in order to use the Naive Bayes. 
            // Please take a look at the other examples below for simpler approaches.
            DataTable data = new DataTable("Mitchell's Tennis Example");
            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");
            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType = typeof(double);
            // Add some data
            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");
            #endregion

            #region doc_mitchell_2
            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);
            #endregion

            #region doc_mitchell_3
            // Some distributions require constructor parameters, and as such, cannot 
            // be automatically initialized by the learning algorithm. For this reason, 
            // we might need to specify how each component should be initialized:
            IUnivariateFittableDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (Celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var learner = new NaiveBayesLearning<IUnivariateFittableDistribution>()
            {
                // Tell the learner how to initialize the distributions
                Distribution = (classIndex, variableIndex) => priors[variableIndex]
            };

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");

            // Learn the Naive Bayes model
            var naiveBayes = learner.Learn(inputs, outputs);
            #endregion

            #region doc_mitchell_4
            // Create an instance representing a "sunny, cool, humid and windy day":
            double[] instance = new double[] 
            {
                codebook.Translate(columnName:"Outlook", value:"Sunny"), //n 0
                12.0, 
                90.0,
                codebook.Translate(columnName:"Wind", value:"Strong") // 1
            };

            // We can obtain a class prediction using
            int predicted = naiveBayes.Decide(instance);

            // Or compute probabilities of each class using
            double[] probabilities = naiveBayes.Probabilities(instance);

            // Or obtain the log-likelihood of prediction
            double ll = naiveBayes.LogLikelihood(instance);

            // Finally, the result can be translated back using
            string result = codebook.Translate("PlayTennis", predicted); // Should be "No"
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, predicted);
            Assert.AreEqual(0.840, probabilities[0], 1e-3);
            Assert.AreEqual(-10.493243476691351, ll, 1e-6);
            Assert.AreEqual(1, probabilities.Sum(), 1e-10);
            Assert.AreEqual(2, probabilities.Length);
        }
Beispiel #37
0
        public void no_sample_test()
        {
            // Declare some boolean data
            bool[,] source = 
            {
                // v1,v2,v3,v4,v5,v6,v7,v8,result
                { true,  true,  false, true,  true,  false, false, false, false },
                { true,  true,  true,  true,  true,  false, false, false, false },
                { true,  false, true,  true,  true,  false, false, true,  false },
                { true,  true,  true,  true,  true,  false, false, true,  false },
                { false, false, true,  true,  true,  false, false, true,  false },
                { true,  true,  true,  true,  false, false, false, false, false },
                { false, true,  true,  false, true,  false, false, false, false },
                { true,  true,  true,  false, true,  false, false, false, false },
                { false, true,  true,  false, true,  false, false, true,  false },
                { false, true,  true,  true,  true,  false, false, true,  false },
                { false, true,  true,  false, false, false, false, false, false },
                { true,  false, false, true,  false, false, false, true,  true  },
                { true,  true,  false, true,  false, false, false, true,  true  },
                { true,  true,  true,  true,  false, false, false, true,  true  },
                { false, true,  true,  true,  false, true,  true,  true,  true  },
                { true,  true,  false, false, false, true,  true,  true,  true  },
                { false, true,  false, false, false, true,  true,  true,  true  },
                { true,  true,  true,  true,  false, true,  true,  true,  true  },
                { false, false, false, false, false, true,  true,  true,  true  },
                { true,  true,  false, true,  false, true,  true,  true,  true  },
                { false, true,  false, true,  false, true,  true,  true,  true  },
                { false, true,  true,  false, false, true,  true,  true,  true  },
            };

            // Evaluation of a single point
            int[] sp = new[] { false, false, false, false, true, true, true, true }.ToInt32();


            // Transform to integers, then to jagged (matrix with [][] instead of [,])
            int[][] data = source.ToInt32().ToJagged();

            // Classification setup
            var inputs = data.Get(null, 0, 8); // select all rows, with cols 0 to 8
            var outputs = data.GetColumn(8);   // select last column

            var learner2 = new NaiveBayesLearning<GeneralDiscreteDistribution, GeneralDiscreteOptions, int>();
            learner2.Options.InnerOption.UseLaplaceRule = true;
            learner2.Distribution = (i, j) => new GeneralDiscreteDistribution(symbols: 2);
            learner2.ParallelOptions.MaxDegreeOfParallelism = 1;
            var nb2 = learner2.Learn(inputs, outputs);

            test(nb2, inputs, sp);


            var learner1 = new NaiveBayesLearning();
            learner1.Options.InnerOption.UseLaplaceRule = true;
            learner2.ParallelOptions.MaxDegreeOfParallelism = 1;
            var nb1 = learner1.Learn(inputs, outputs);

            test(nb1, inputs, sp);
        }