public void ComputeTest2() { DataTable data = new DataTable("Mitchell's Tennis Example"); data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"); // We will set Temperature and Humidity to be continuous data.Columns["Temperature"].DataType = typeof(double); data.Columns["Humidity"].DataType = typeof(double); data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No"); data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No"); data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes"); data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes"); data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes"); data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No"); data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes"); data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No"); data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes"); data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes"); data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes"); data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes"); data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes"); data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No"); // Create a new codification codebook to // convert strings into discrete symbols Codification codebook = new Codification(data); int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no) int inputCount = 4; // 4 variables (Outlook, Temperature, Humidity, Wind) IUnivariateDistribution[] priors = { new GeneralDiscreteDistribution(codebook["Outlook"].Symbols), // 3 possible values (Sunny, overcast, rain) new NormalDistribution(), // Continuous value (celsius) new NormalDistribution(), // Continuous value (percentage) new GeneralDiscreteDistribution(codebook["Wind"].Symbols) // 2 possible values (Weak, strong) }; // Create a new Naive Bayes classifiers for the two classes var target = new NaiveBayes<IUnivariateDistribution>(classCount, inputCount, priors); // Extract symbols from data and train the classifier DataTable symbols = codebook.Apply(data); double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind"); int[] outputs = symbols.ToArray<int>("PlayTennis"); // Compute the Naive Bayes model target.Estimate(inputs, outputs); double logLikelihood; double[] responses; // Compute the result for a sunny, cool, humid and windy day: double[] instance = new double[] { codebook.Translate(columnName:"Outlook", value:"Sunny"), 12.0, 90.0, codebook.Translate(columnName:"Wind", value:"Strong") }; int c = target.Compute(instance, out logLikelihood, out responses); string result = codebook.Translate("PlayTennis", c); Assert.AreEqual("No", result); Assert.AreEqual(0, c); Assert.AreEqual(0.840, responses[0], 1e-3); Assert.AreEqual(1, responses.Sum(), 1e-10); Assert.IsFalse(double.IsNaN(responses[0])); Assert.AreEqual(2, responses.Length); }
public void ComputeTest() { DataTable data = new DataTable("Mitchell's Tennis Example"); data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"); data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No"); data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No"); data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes"); data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes"); data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes"); data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No"); data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes"); data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No"); data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes"); data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes"); data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes"); data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes"); data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes"); data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No"); // Create a new codification codebook to // convert strings into discrete symbols Codification codebook = new Codification(data); int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no) int inputCount = 4; // 4 variables (Outlook, Temperature, Humidity, Wind) GeneralDiscreteDistribution[] priors = { new GeneralDiscreteDistribution(codebook["Outlook"].Symbols), // 3 possible values (Sunny, overcast, rain) new GeneralDiscreteDistribution(codebook["Temperature"].Symbols), // 3 possible values (Hot, mild, cool) new GeneralDiscreteDistribution(codebook["Humidity"].Symbols), // 2 possible values (High, normal) new GeneralDiscreteDistribution(codebook["Wind"].Symbols) // 2 possible values (Weak, strong) }; // Create a new Naive Bayes classifiers for the two classes var target = new NaiveBayes<GeneralDiscreteDistribution>(classCount, inputCount, priors); // Extract symbols from data and train the classifier DataTable symbols = codebook.Apply(data); double[][] inputs = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind"); int[] outputs = symbols.ToArray<int>("PlayTennis"); // Compute the Naive Bayes model target.Estimate(inputs, outputs); double logLikelihood; double[] responses; // Compute the result for a sunny, cool, humid and windy day: double[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong").ToDouble(); int c = target.Compute(instance, out logLikelihood, out responses); string result = codebook.Translate("PlayTennis", c); Assert.AreEqual("No", result); Assert.AreEqual(0, c); Assert.AreEqual(0.795, responses[0], 1e-3); Assert.AreEqual(1, responses.Sum(), 1e-10); Assert.IsFalse(double.IsNaN(responses[0])); Assert.AreEqual(2, responses.Length); }
public void ComputeTest2() { // Some sample texts string[] spamTokens = Tokenize(@"I decided to sign up for the Disney Half Marathon. Half of a marathon is 13.1 miles. A full marathon is 26.2 miles. You may wonder why the strange number of miles. “26.2” is certainly not an even number. And after running 26 miles who cares about the point two? You might think that 26.2 miles is a whole number of kilometers. It isn’t. In fact, it is even worse in kilometers – 42.1648128. I bet you don’t see many t-shirts in England with that number printed on the front."); string[] loremTokens = Tokenize(@"Lorem ipsum dolor sit amet, Nulla nec tortor. Donec id elit quis purus consectetur consequat. Nam congue semper tellus. Sed erat dolor, dapibus sit amet, venenatis ornare, ultrices ut, nisi. Aliquam ante. Suspendisse scelerisque dui nec velit. Duis augue augue, gravida euismod, vulputate ac, facilisis id, sem. Morbi in orci. Nulla purus lacus, pulvinar vel, malesuada ac, mattis nec, quam. Nam molestie scelerisque quam. Nullam feugiat cursus lacus.orem ipsum dolor sit amet."); // Their respective classes string[] classes = { "spam", "lorem" }; // Create a new Bag-of-Words for the texts BagOfWords bow = new BagOfWords(spamTokens, loremTokens) { // Limit the maximum number of occurrences in // the feature vector to a single instance MaximumOccurance = 1 }; // Define the symbols for the Naïve Bayes int[] symbols = new int[bow.NumberOfWords]; for (int i = 0; i < symbols.Length; i++) symbols[i] = bow.MaximumOccurance + 1; // Create input and outputs for training int[][] inputs = { bow.GetFeatureVector(spamTokens), bow.GetFeatureVector(loremTokens) }; int[] outputs = { 0, // spam 1, // lorem }; // Create the naïve Bayes model NaiveBayes bayes = new NaiveBayes(2, symbols); for (int i = 0; i < bayes.ClassCount; i++) for (int j = 0; j < bayes.SymbolCount.Length; j++) for (int k = 0; k < bayes.SymbolCount[j]; k++) bayes.Distributions[i, j][k] = 1e-10; // Estimate the model bayes.Estimate(inputs, outputs); // Initialize with prior probabilities for (int i = 0; i < bayes.ClassCount; i++) for (int j = 0; j < bayes.SymbolCount.Length; j++) { double sum = bayes.Distributions[i, j].Sum(); Assert.AreEqual(1, sum, 1e-5); } // Consume the model { // First an example to classify as lorem int[] input = bow.GetFeatureVector(loremTokens); int answer = bayes.Compute(input); string result = classes[answer]; Assert.AreEqual("lorem", result); } { // Then an example to classify as spam int[] input = bow.GetFeatureVector(spamTokens); int answer = bayes.Compute(input); string result = classes[answer]; Assert.AreEqual("spam", result); } }
public void ComputeTest3_Obsolete() { // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // int[][] inputs = { // input output new int[] { 0, 1, 1, 0 }, // 0 new int[] { 0, 1, 0, 0 }, // 0 new int[] { 0, 0, 1, 0 }, // 0 new int[] { 0, 1, 1, 0 }, // 0 new int[] { 0, 1, 0, 0 }, // 0 new int[] { 1, 0, 0, 0 }, // 1 new int[] { 1, 0, 0, 0 }, // 1 new int[] { 1, 0, 0, 1 }, // 1 new int[] { 0, 0, 0, 1 }, // 1 new int[] { 0, 0, 0, 1 }, // 1 new int[] { 1, 1, 1, 1 }, // 2 new int[] { 1, 0, 1, 1 }, // 2 new int[] { 1, 1, 0, 1 }, // 2 new int[] { 0, 1, 1, 1 }, // 2 new int[] { 1, 1, 1, 1 }, // 2 }; int[] outputs = // those are the class labels { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, }; // Create a discrete naive Bayes model for 3 classes and 4 binary inputs int[] symbols = new int[] { 2, 2, 2, 2 }; var bayes = new NaiveBayes(3, symbols); // Teach the model. The error should be zero: double error = bayes.Estimate(inputs, outputs); // Now, let's test the model output for the first input sample: int answer = bayes.Compute(new int[] { 0, 1, 1, 0 }); // should be 1 Assert.AreEqual(0, error); for (int i = 0; i < inputs.Length; i++) { error = bayes.Compute(inputs[i]); double expected = outputs[i]; Assert.AreEqual(expected, error); } }
private string bayes(DataTable tbl) { Codification codebook = new Codification(tbl, "Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses", "Class"); // Translate our training data into integer symbols using our codebook: DataTable symbols = codebook.Apply(tbl); int[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses"); int[] outputs = symbols.ToIntArray("Class").GetColumn(0); // Gather information about decision variables int[] symbolCounts = { codebook["Clump Thickness"].Symbols, // 3 possible values (Sunny, overcast, rain) codebook["Uniformity of Cell Size"].Symbols, // 3 possible values (Hot, mild, cool) codebook["Uniformity of Cell Shape"].Symbols, // 2 possible values (High, normal) codebook["Marginal Adhesion"].Symbols , // 2 possible values (Weak, strong) codebook["Single Epithelial Cell Size"].Symbols , codebook["Bare Nuclei"].Symbols , codebook["Bland Chromatin"].Symbols , codebook["Normal Nucleoli"].Symbols , codebook["Mitoses"].Symbols }; int classCount = codebook["Class"].Symbols; // 2 possible values (yes, no) // Create a new Naive Bayes classifiers for the two classes NaiveBayes target = new NaiveBayes(classCount, symbolCounts); // Compute the Naive Bayes model target.Estimate(inputs, outputs); // We will be computing the label for a sunny, cool, humid and windy day: int[] instance = codebook.Translate(inputlar[0], inputlar[1], inputlar[2], inputlar[3], inputlar[4], inputlar[5], inputlar[6], inputlar[7], inputlar[8]); // Now, we can feed this instance to our model int output = target.Compute(instance); // Finally, the result can be translated back to one of the codewords using string result = codebook.Translate("Class", output); // result is "No" return result; }
public void ComputeTest3() { // Let's say we have the following data to be classified // into three possible classes. Those are the samples: // double[][] inputs = { // input output new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 0, 0, 1, 0 }, // 0 new double[] { 0, 1, 1, 0 }, // 0 new double[] { 0, 1, 0, 0 }, // 0 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 0 }, // 1 new double[] { 1, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 0, 0, 0, 1 }, // 1 new double[] { 1, 1, 1, 1 }, // 2 new double[] { 1, 0, 1, 1 }, // 2 new double[] { 1, 1, 0, 1 }, // 2 new double[] { 0, 1, 1, 1 }, // 2 new double[] { 1, 1, 1, 1 }, // 2 }; int[] outputs = // those are the class labels { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, }; // Create a new continuous naive Bayes model for 3 classes using 4-dimensional Gaussian distributions var bayes = new NaiveBayes<NormalDistribution>(inputs: 4, classes: 3, initial: NormalDistribution.Standard); // Teach the Naive Bayes model. The error should be zero: double error = bayes.Estimate(inputs, outputs, options: new NormalOptions { Regularization = 1e-5 // to avoid zero variances }); // Now, let's test the model output for the first input sample: int answer = bayes.Compute(new double[] { 0, 1, 1, 0 }); // should be 1 Assert.AreEqual(0, error); for (int i = 0; i < inputs.Length; i++) { double actual = bayes.Compute(inputs[i]); double expected = outputs[i]; Assert.AreEqual(expected, actual); } }