public void NaiveBayesConstructorTest4()
        {
            int classes = 2;
            int inputCount = 3;
            double[] priors = { 0.4, 0.6 };
            UniformDiscreteDistribution initial = new UniformDiscreteDistribution(0, 3);

            var target = new NaiveBayes<UniformDiscreteDistribution>(classes, inputCount, initial, priors);

            Assert.AreEqual(classes, target.ClassCount);
            Assert.AreEqual(inputCount, target.InputCount);
            Assert.AreEqual(priors.Length, target.Priors.Length);
            Assert.AreEqual(0.4, target.Priors[0]);
            Assert.AreEqual(0.6, target.Priors[1]);

            Assert.AreEqual(2, target.Distributions.GetLength(0));
            Assert.AreEqual(3, target.Distributions.GetLength(1));

            for (int i = 0; i < classes; i++)
            {
                for (int j = 0; j < inputCount; j++)
                {
                    Assert.AreNotSame(initial, target.Distributions[i, j]);
                    Assert.AreEqual(0, target.Distributions[i, j].Minimum);
                    Assert.AreEqual(3, target.Distributions[i, j].Maximum);
                }
            }
        }
        /// <summary>
        /// Use the object's inputs and outputs to learn the model of the linear regression, using OrdinaryLeastSquares
        /// </summary>
        public AINaiveBayes Learn()
        {
            this.classifier = this.learner.Learn(inputs, outputs);
            this.learned    = true;

            return(this);
        }
 public void Train(double[][] inputs, int[] outputs)
 {
     if (Initialized)
     {
         classifier = learner.Learn(inputs.Select(v => v.Select(u => Convert.ToInt32(u)).ToArray()).ToArray(), outputs);
     }
 }
示例#4
0
        public void NaiveBayesConstructorTest4()
        {
            int classes = 2;
            int[] symbols = { 2, 3, 1 };
            double[] priors = { 0.4, 0.6 };
            NaiveBayes target = new NaiveBayes(classes, priors, symbols);

            Assert.AreEqual(2, target.ClassCount);
            Assert.AreEqual(3, target.InputCount);
            Assert.AreEqual(2, target.Priors.Length);
            Assert.AreEqual(0.4, target.Priors[0]);
            Assert.AreEqual(0.6, target.Priors[1]);
        }
示例#5
0
        public override void TrainningModel(TrainningData trainningData)
        {
            Codification codification = trainningData.CodificationData;            
            int[][] inputs = trainningData.TrainningAttributes.ToInt32();
            int[] outputs = trainningData.ClassificationAttribute;

            int lastIndex = codification.Columns.Count - 1;            

            List<int> symbolCounts = new List<int>();

            for (int indexColumn = 0; indexColumn < lastIndex; indexColumn++)
            {
                symbolCounts.Add(codification[indexColumn].Symbols);
            }

            this.bayes = new NaiveBayes(2, symbolCounts.ToArray());
            this.bayes.Estimate(inputs, outputs); 
        }
示例#6
0
        private static void naiveBayes(double[][] inputs, int[] outputs)
        {
            // In our problem, we have 2 classes (samples can be either
            // positive or negative), and 2 inputs (x and y coordinates).

            var nb = new NaiveBayes<NormalDistribution>(classes: 2,
                inputs: 2, initial: new NormalDistribution());

            // The Naive Bayes expects the class labels to 
            // range from 0 to k, so we convert -1 to be 0:
            //
            outputs = outputs.Apply(x => x < 0 ? 0 : x);

            // Estimate the Naive Bayes
            double error = nb.Estimate(inputs, outputs);

            // Classify the samples using the model
            int[] answers = inputs.Apply(nb.Compute);

            // Plot the results
            ScatterplotBox.Show("Expected results", inputs, outputs);
            ScatterplotBox.Show("Naive Bayes results", inputs, answers)
                .Hold();
        }
示例#7
0
 public void build_model()
 {
     nb = naiveBayes.Learn(trainingInput, trainingOutput);
 }
示例#8
0
        private void btnCreate_Click(object sender, EventArgs e)
        {
            if (dgvLearningSource.DataSource == null)
            {
                MessageBox.Show("Please load some data first.");
                return;
            }

            classNames = new string[] { "G1", "G2" };


            // Finishes and save any pending changes to the given data
            dgvLearningSource.EndEdit();

            // Creates a matrix from the source data table
            double[,] table = (dgvLearningSource.DataSource as DataTable).ToMatrix(out columnNames);

            // Get only the input vector values
            double[][] inputs = table.Submatrix(null, 0, 1).ToArray();

            // Get only the label outputs
            int[] outputs = table.GetColumn(2).ToInt32();
            string[] colNames = columnNames.Submatrix(first: 2);


            // Create the Bayes classifier and perform classification
            bayes = new NaiveBayes<NormalDistribution>(2, 2, new NormalDistribution());

            // Estimate the model parameters from the data
            double error = bayes.Estimate(inputs, outputs);


            // Show the estimated distributions and class probabilities
            dataGridView1.DataSource = new ArrayDataView(bayes.Distributions, colNames, classNames);


            // Generate samples for class 1
            var x1 = bayes.Distributions[0, 0].Generate(1000);
            var y1 = bayes.Distributions[0, 1].Generate(1000);

            // Generate samples for class 2
            var x2 = bayes.Distributions[1, 0].Generate(1000);
            var y2 = bayes.Distributions[1, 1].Generate(1000);

            // Combine in a single graph
            var w1 = Matrix.Stack(x1, y1).Transpose();
            var w2 = Matrix.Stack(x2, y2).Transpose();

            var z = Matrix.Vector(2000, value: 1.0);
            for (int i = 0; i < 1000; i++) z[i] = 0;

            var graph = Matrix.Stack(w1, w2).Concatenate(z);

            CreateScatterplot(zedGraphControl2, graph);
        }
示例#9
0
        public void ComputeTest2()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType = typeof(double);

            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4; // 4 variables (Outlook, Temperature, Humidity, Wind)

            IUnivariateDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes<IUnivariateDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;
            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = new double[] 
            {
                codebook.Translate(columnName:"Outlook", value:"Sunny"), 
                12.0, 
                90.0,
                codebook.Translate(columnName:"Wind", value:"Strong")
            };

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.840, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
示例#10
0
        public void DistributionsTest()
        {
            int classes = 3;
            int[] symbols = { 2, 1 };
            NaiveBayes target = new NaiveBayes(classes, symbols);
            double[,][] actual = target.Distributions;

            Assert.IsNotNull(actual);
            Assert.AreEqual(classes, actual.GetLength(0));
            Assert.AreEqual(symbols.Length, actual.GetLength(1));
        }
        public void NaiveBayesConstructorTest5()
        {
            const int classes = 2;
            const int inputCount = 3;
            double[] classPriors = {0.4, 0.6};
            var inputPriors = new [,]
            {
                {new UniformDiscreteDistribution(0,10), new UniformDiscreteDistribution(0,10), new UniformDiscreteDistribution(0,10)},
                {new UniformDiscreteDistribution(0,10), new UniformDiscreteDistribution(0,10), new UniformDiscreteDistribution(0,10)}
            };

            var target = new NaiveBayes<UniformDiscreteDistribution>(classes, inputCount, inputPriors, classPriors);

            Assert.AreEqual(classes, target.ClassCount);
            Assert.AreEqual(inputCount, target.InputCount);
            Assert.AreEqual(classPriors.Length, target.Priors.Length);
            Assert.AreEqual(0.4, target.Priors[0]);
            Assert.AreEqual(0.6, target.Priors[1]);

            Assert.AreEqual(2, target.Distributions.GetLength(0));
            Assert.AreEqual(3, target.Distributions.GetLength(1));            
        }
示例#12
0
        public void ComputeTest()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4; // 4 variables (Outlook, Temperature, Humidity, Wind)

            GeneralDiscreteDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),     // 3 possible values (Sunny, overcast, rain)
                new GeneralDiscreteDistribution(codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool)
                new GeneralDiscreteDistribution(codebook["Humidity"].Symbols),    // 2 possible values (High, normal)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)         // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes<GeneralDiscreteDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);
            double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[] outputs = symbols.ToArray<int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;
            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong").ToDouble();

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
 // Creates Naive Bayes Machine
 public void CreateNaiveBayes()
 {
     string[] cols = { "Array Size", "Runs" };
     nb = new NaiveBayes(ClassCount, SymbolCounts);
 }
示例#14
0
        public void ComputeTest3()
        {
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            double[][] inputs =
            {
                //               input         output
                new double[] { 0, 1, 1, 0 }, //  0 
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 0, 0, 1, 0 }, //  0
                new double[] { 0, 1, 1, 0 }, //  0
                new double[] { 0, 1, 0, 0 }, //  0
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 0 }, //  1
                new double[] { 1, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 0, 0, 0, 1 }, //  1
                new double[] { 1, 1, 1, 1 }, //  2
                new double[] { 1, 0, 1, 1 }, //  2
                new double[] { 1, 1, 0, 1 }, //  2
                new double[] { 0, 1, 1, 1 }, //  2
                new double[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Create a new continuous naive Bayes model for 3 classes using 4-dimensional Gaussian distributions
            var bayes = new NaiveBayes<NormalDistribution>(inputs: 4, classes: 3, initial: NormalDistribution.Standard);

            // Teach the Naive Bayes model. The error should be zero:
            double error = bayes.Estimate(inputs, outputs, options: new NormalOptions
            {
                Regularization = 1e-5 // to avoid zero variances
            });

            // Now, let's test  the model output for the first input sample:
            int answer = bayes.Compute(new double[] { 0, 1, 1, 0 }); // should be 1


            Assert.AreEqual(0, error);
            for (int i = 0; i < inputs.Length; i++)
            {
                double actual = bayes.Compute(inputs[i]);
                double expected = outputs[i];
                Assert.AreEqual(expected, actual);
            }
        }
示例#15
0
        /// <summary>
        /// Learns a model that can map the given inputs to the given outputs.
        /// </summary>
        ///
        /// <param name="x">The model inputs.</param>
        /// <param name="y">The desired outputs associated with each <paramref name="x">inputs</paramref>.</param>
        /// <param name="weight">The weight of importance for each input-output pair.</param>
        ///
        /// <returns>
        ///   A model that has learned how to produce <paramref name="y" /> given <paramref name="x" />.
        /// </returns>
        ///
        public override NaiveBayes Learn(int[][] x, int[] y, double[] weight = null)
        {
            CheckArgs(x, y);

            if (Model == null)
            {
                int[] inputs  = x.DistinctCount();
                int   outputs = y.DistinctCount();
                Model = new NaiveBayes(outputs, inputs);
            }

            // For each class
            Parallel.For(0, Model.NumberOfOutputs, ParallelOptions, i =>
            {
                // Estimate conditional distributions
                // Get variables values in class i
                int[] idx      = y.Find(y_i => y_i == i);
                int[][] values = x.Get(idx);

                int n = idx.Length;

                if (Empirical)
                {
                    Model.Priors[i] = n / (double)x.Length;
                }

                double regularization = Options.InnerOption.Regularization;
                if (Options.InnerOptions != null)
                {
                    regularization = Options.InnerOptions[i].Regularization;
                }

                // For each variable (col)
                Parallel.For(0, Model.NumberOfInputs, ParallelOptions, j =>
                {
                    // Count value occurrences and store
                    // frequencies to form probabilities
                    int numberOfSymbols    = Model.NumberOfSymbols[j];
                    double[] frequencies   = new double[numberOfSymbols];
                    double[] probabilities = Model.Distributions[i, j];

                    // For each input row (instance)
                    // belonging to the current class
                    for (int k = 0; k < values.Length; k++)
                    {
                        frequencies[values[k][j]]++;
                    }

                    // Transform into probabilities
                    for (int k = 0; k < frequencies.Length; k++)
                    {
                        // Use a M-estimator using the previously
                        // available probabilities as priors.
                        double prior = probabilities[k];
                        double num   = frequencies[k] + regularization;
                        double den   = values.Length + regularization;

                        probabilities[k] = (num / den) * prior;
                    }
                });
            });

            return(Model);
        }
示例#16
0
        public void ComputeTest2()
        {

            // Some sample texts
            string[] spamTokens = Tokenize(@"I decided to sign up for the Disney Half Marathon. Half of a marathon is 13.1 miles. A full marathon is 26.2 miles. You may wonder why the strange number of miles. “26.2” is certainly not an even number. And after running 26 miles who cares about the point two? You might think that 26.2 miles is a whole number of kilometers. It isn’t. In fact, it is even worse in kilometers – 42.1648128. I bet you don’t see many t-shirts in England with that number printed on the front.");

            string[] loremTokens = Tokenize(@"Lorem ipsum dolor sit amet,  Nulla nec tortor. Donec id elit quis purus consectetur consequat. Nam congue semper tellus. Sed erat dolor, dapibus sit amet, venenatis ornare, ultrices ut, nisi. Aliquam ante. Suspendisse scelerisque dui nec velit. Duis augue augue, gravida euismod, vulputate ac, facilisis id, sem. Morbi in orci. Nulla purus lacus, pulvinar vel, malesuada ac, mattis nec, quam. Nam molestie scelerisque quam. Nullam feugiat cursus lacus.orem ipsum dolor sit amet.");

            // Their respective classes
            string[] classes = { "spam", "lorem" };


            // Create a new Bag-of-Words for the texts
            BagOfWords bow = new BagOfWords(spamTokens, loremTokens)
            {
                // Limit the maximum number of occurrences in 
                // the feature vector to a single instance
                MaximumOccurance = 1
            };

            // Define the symbols for the Naïve Bayes
            int[] symbols = new int[bow.NumberOfWords];
            for (int i = 0; i < symbols.Length; i++)
                symbols[i] = bow.MaximumOccurance + 1;

            // Create input and outputs for training
            int[][] inputs =
            {
                bow.GetFeatureVector(spamTokens),
                bow.GetFeatureVector(loremTokens)
            };

            int[] outputs =
            {
                0, // spam
                1, // lorem
            };

            // Create the naïve Bayes model
            NaiveBayes bayes = new NaiveBayes(2, symbols);

            for (int i = 0; i < bayes.ClassCount; i++)
                for (int j = 0; j < bayes.SymbolCount.Length; j++)
                    for (int k = 0; k < bayes.SymbolCount[j]; k++)
                        bayes.Distributions[i, j][k] = 1e-10;

            // Estimate the model
            bayes.Estimate(inputs, outputs);


            // Initialize with prior probabilities
            for (int i = 0; i < bayes.ClassCount; i++)
                for (int j = 0; j < bayes.SymbolCount.Length; j++)
                {
                    double sum = bayes.Distributions[i, j].Sum();
                    Assert.AreEqual(1, sum, 1e-5);
                }

            // Consume the model
            {
                // First an example to classify as lorem
                int[] input = bow.GetFeatureVector(loremTokens);
                int answer = bayes.Compute(input);
                string result = classes[answer];

                Assert.AreEqual("lorem", result);
            }

            {
                // Then an example to classify as spam
                int[] input = bow.GetFeatureVector(spamTokens);
                int answer = bayes.Compute(input);
                string result = classes[answer];

                Assert.AreEqual("spam", result);
            }

        }
示例#17
0
        private static void test(NaiveBayes<GeneralDiscreteDistribution, int> nb, int[][] inputs, int[] sp)
        {
            int c = nb.Decide(sp); // 1
            double[] p = nb.Probabilities(sp); // 0.015, 0.985

            // Evaluation of all points
            int[] actual = nb.Decide(inputs);

            Assert.AreEqual(1, c);
            Assert.AreEqual(0.015197568389057824, p[0], 1e-10);
            Assert.AreEqual(0.98480243161094227, p[1], 1e-10);

            Assert.AreEqual(nb.Distributions[0].Components[0].Frequencies[0], 0.46153846153846156);
            Assert.AreEqual(nb.Distributions[0].Components[1].Frequencies[0], 0.23076923076923078);
            Assert.AreEqual(nb.Distributions[0].Components[2].Frequencies[0], 0.15384615384615385);
            Assert.AreEqual(nb.Distributions[0].Components[3].Frequencies[0], 0.38461538461538464);
            Assert.AreEqual(nb.Distributions[0].Components[4].Frequencies[0], 0.23076923076923078);
            Assert.AreEqual(nb.Distributions[0].Components[5].Frequencies[0], 0.92307692307692313);
            Assert.AreEqual(nb.Distributions[0].Components[6].Frequencies[0], 0.92307692307692313);
            Assert.AreEqual(nb.Distributions[0].Components[7].Frequencies[0], 0.53846153846153844);

            Assert.AreEqual(nb.Distributions[1].Components[0].Frequencies[0], 0.46153846153846156);
            Assert.AreEqual(nb.Distributions[1].Components[1].Frequencies[0], 0.23076923076923078);
            Assert.AreEqual(nb.Distributions[1].Components[2].Frequencies[0], 0.61538461538461542);
            Assert.AreEqual(nb.Distributions[1].Components[3].Frequencies[0], 0.38461538461538464);
            Assert.AreEqual(nb.Distributions[1].Components[4].Frequencies[0], 0.92307692307692313);
            Assert.AreEqual(nb.Distributions[1].Components[5].Frequencies[0], 0.30769230769230771);
            Assert.AreEqual(nb.Distributions[1].Components[6].Frequencies[0], 0.30769230769230771);
            Assert.AreEqual(nb.Distributions[1].Components[7].Frequencies[0], 0.076923076923076927);

            int[] last = actual.Get(new[] { 11, 12 }.Concatenate(Vector.Range(14, 22)));
            int[] others = actual.Get(Vector.Range(0, 10).Concatenate(13));
            Assert.IsTrue(1.IsEqual(last));
            Assert.IsTrue(0.IsEqual(others));
        }
示例#18
0
        public void SerializeTest()
        {
            int classes = 2;
            int[] symbols = { 2, 3, 1 };
            double[] priors = { 0.4, 0.6 };
            NaiveBayes target = new NaiveBayes(classes, priors, symbols);

            Assert.AreEqual(2, target.ClassCount);
            Assert.AreEqual(3, target.InputCount);
            Assert.AreEqual(2, target.Priors.Length);
            Assert.AreEqual(0.4, target.Priors[0]);
            Assert.AreEqual(0.6, target.Priors[1]);

            //target.Save(@"C:\Projects\Accord.NET\framework\nb2.bin");
            target = Serializer.Load<NaiveBayes>(Properties.Resources.nb2);

            Assert.AreEqual(2, target.NumberOfOutputs);
            Assert.AreEqual(3, target.NumberOfInputs);
            Assert.AreEqual(2, target.Priors.Length);
            Assert.AreEqual(0.4, target.Priors[0]);
            Assert.AreEqual(0.6, target.Priors[1]);
        }
示例#19
0
        public void laplace_smoothing_missing_sample()
        {
            #region doc_laplace
            // To test the effectiveness of the Laplace rule for when
            // an example of a symbol is not present in the training set,
            // lets create dataset where the second column could contain
            // values 0, 1 or 2 but only actually contains examples with
            // containing 1 and 2:

            int[][] inputs =
            {
                //      input     output
                new [] { 0, 1 }, //  0 
                new [] { 0, 2 }, //  0
                new [] { 0, 1 }, //  0
                new [] { 1, 2 }, //  1
                new [] { 0, 2 }, //  1
                new [] { 0, 2 }, //  1
                new [] { 1, 1 }, //  2
                new [] { 0, 1 }, //  2
                new [] { 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 1, 1, 1, 2, 2, 2, 
            };

            // Since the data is not enough to determine which symbols we are
            // expecting in our model, we will have to specify the model by
            // hand. The first column can assume 2 different values, whereas
            // the third column can assume 3:
            var bayes = new NaiveBayes(classes: 3, symbols: new[] { 2, 3 });

            // Now we can create a learning algorithm
            var learning = new NaiveBayesLearning()
            {
                Model = bayes
            };

            // Enable the use of the Laplace rule
            learning.Options.InnerOption.UseLaplaceRule = true;

            // Learn the Naive Bayes model
            learning.Learn(inputs, outputs);

            // Estimate a sample with 0 in the second col
            int answer = bayes.Decide(new int[] { 0, 1 });
            #endregion

            Assert.AreEqual(0, answer);

            double prob = bayes.Probability(new int[] { 0, 1 }, out answer);
            Assert.AreEqual(0, answer);
            Assert.AreEqual(0.52173913043478259, prob, 1e-10);

            double error = new ZeroOneLoss(outputs)
            {
                Mean = true
            }.Loss(bayes.Decide(inputs));

            Assert.AreEqual(2 / 9.0, error);
        }
示例#20
0
        public void ComputeTest3_Obsolete()
        {
            // Let's say we have the following data to be classified
            // into three possible classes. Those are the samples:
            //
            int[][] inputs =
            {
                //               input      output
                new int[] { 0, 1, 1, 0 }, //  0 
                new int[] { 0, 1, 0, 0 }, //  0
                new int[] { 0, 0, 1, 0 }, //  0
                new int[] { 0, 1, 1, 0 }, //  0
                new int[] { 0, 1, 0, 0 }, //  0
                new int[] { 1, 0, 0, 0 }, //  1
                new int[] { 1, 0, 0, 0 }, //  1
                new int[] { 1, 0, 0, 1 }, //  1
                new int[] { 0, 0, 0, 1 }, //  1
                new int[] { 0, 0, 0, 1 }, //  1
                new int[] { 1, 1, 1, 1 }, //  2
                new int[] { 1, 0, 1, 1 }, //  2
                new int[] { 1, 1, 0, 1 }, //  2
                new int[] { 0, 1, 1, 1 }, //  2
                new int[] { 1, 1, 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 0, 0,
                1, 1, 1, 1, 1,
                2, 2, 2, 2, 2,
            };

            // Create a discrete naive Bayes model for 3 classes and 4 binary inputs
            int[] symbols = new int[] { 2, 2, 2, 2 };
            var bayes = new NaiveBayes(3, symbols);

            // Teach the model. The error should be zero:
            double error = bayes.Estimate(inputs, outputs);

            // Now, let's test  the model output for the first input sample:
            int answer = bayes.Compute(new int[] { 0, 1, 1, 0 }); // should be 1


            Assert.AreEqual(0, error);
            for (int i = 0; i < inputs.Length; i++)
            {
                error = bayes.Compute(inputs[i]);
                double expected = outputs[i];
                Assert.AreEqual(expected, error);
            }
        }
        private string bayes(DataTable tbl)
        {
            Codification codebook = new Codification(tbl,
             "Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses", "Class");

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(tbl);
            int[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses");
            int[] outputs = symbols.ToIntArray("Class").GetColumn(0);

            // Gather information about decision variables
            int[] symbolCounts =
            {
                codebook["Clump Thickness"].Symbols,     // 3 possible values (Sunny, overcast, rain)
                codebook["Uniformity of Cell Size"].Symbols, // 3 possible values (Hot, mild, cool)
                codebook["Uniformity of Cell Shape"].Symbols,    // 2 possible values (High, normal)
                codebook["Marginal Adhesion"].Symbols ,        // 2 possible values (Weak, strong)
                codebook["Single Epithelial Cell Size"].Symbols  ,
                codebook["Bare Nuclei"].Symbols  ,
                codebook["Bland Chromatin"].Symbols ,
                codebook["Normal Nucleoli"].Symbols ,
                codebook["Mitoses"].Symbols
            };

            int classCount = codebook["Class"].Symbols; // 2 possible values (yes, no)

            // Create a new Naive Bayes classifiers for the two classes
            NaiveBayes target = new NaiveBayes(classCount, symbolCounts);

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);

            // We will be computing the label for a sunny, cool, humid and windy day:
            int[] instance = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3],
                inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]);

            // Now, we can feed this instance to our model
            int output = target.Compute(instance);

            // Finally, the result can be translated back to one of the codewords using
            string result = codebook.Translate("Class", output); // result is "No"
            return result;
        }