public void TestTrainInconsistentTrainingDataLength()
        {
            int numTrainingExamples            = 6;
            int numDataElements                = 5;
            List <List <object> > trainingData = new List <List <object> >();
            List <object>         targetData   = GenerateValidData(numTrainingExamples);
            Random rand = new Random();
            bool   hasInconsistentData = false;

            for (int i = 0; i < numTrainingExamples; i++)
            {
                if (rand.Next(2) == 0)
                {
                    trainingData.Add(GenerateValidData(rand.Next(numTrainingExamples)));
                    hasInconsistentData = true;
                }
                else
                {
                    trainingData.Add(GenerateValidData(numDataElements));
                }
            }
            if (!hasInconsistentData)
            {
                trainingData[rand.Next(numTrainingExamples)] = GenerateValidData(rand.Next(numTrainingExamples, 1000));
            }
            try
            {
                NaiveBayesModel.Train(trainingData, targetData);
                Assert.Fail("Model was required to throw an InvalidDataFormatException, but did not");
            }
            catch (InvalidDataFormatException)
            {
                //All ok...
            }
        }
        private void buttonTrain_Click(object sender, RoutedEventArgs e)
        {
            try
            {
                if (textBoxDescription.Text.Length > 0 && textBoxGenre.Text.Length > 0)
                {
                    List <string> genresArray = parseGenres();
                    if (genresArray.Count == 0)
                    {
                        throw new Exception("Genres not found!");
                    }
                    StringBuilder genres = new StringBuilder();
                    foreach (var genre in genresArray)
                    {
                        genres.Append("'" + genre + "'");
                        if (genre != genresArray.Last())
                        {
                            genres.Append("; ");
                        }
                        classifier.Train(textBoxDescription.Text, genre);
                    }
                    showlog();
                    MessageBox.Show("Train successfull!\n" + "Trained genres: " + genres);

                    textBoxGenre.Text = "";
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
        }
Example #3
0
        public static void Main(string[] args)
        {
            var                  smsCollection    = new DataReader($@"{Environment.CurrentDirectory}\Data\SMSSpamCollection").Read();
            var                  trainingData     = smsCollection.Skip(1000);
            var                  verificationData = smsCollection.Take(1000);
            ITokenizer           tokenizer        = new ToLowerWordsTokenizer();
            var                  classifier       = new NaiveBayes(tokenizer);
            IVocabularyGenerator vacabulary       = new TopWordsVocabulary();

            classifier.Train(trainingData.ToArray(), vacabulary.GetVocabulary(tokenizer, trainingData));
            Evaluate(classifier, verificationData);

            Console.WriteLine("___________________________________");

            trainingData     = smsCollection.Take(4800);
            verificationData = smsCollection.Skip(4800);

            tokenizer = new WordsTokenizer();
            classifier.Train(trainingData.ToArray(), new string[] { "FREE", "txt", "car", "call", "i", "mobile", "you", "me" });
            Evaluate(classifier, verificationData);

            Console.ReadKey();
        }
Example #4
0
        public void TestTrain()
        {
            var naiveBayes = new NaiveBayes();

            naiveBayes.Train(iris.GetInstanceList(), null);
            Assert.AreEqual(5.33, 100 * naiveBayes.Test(iris.GetInstanceList()).GetErrorRate(), 0.01);
            naiveBayes.Train(bupa.GetInstanceList(), null);
            Assert.AreEqual(38.55, 100 * naiveBayes.Test(bupa.GetInstanceList()).GetErrorRate(), 0.01);
            naiveBayes.Train(dermatology.GetInstanceList(), null);
            Assert.AreEqual(69.40, 100 * naiveBayes.Test(dermatology.GetInstanceList()).GetErrorRate(), 0.01);
            naiveBayes.Train(car.GetInstanceList(), null);
            Assert.AreEqual(12.91, 100 * naiveBayes.Test(car.GetInstanceList()).GetErrorRate(), 0.01);
            naiveBayes.Train(tictactoe.GetInstanceList(), null);
            Assert.AreEqual(30.17, 100 * naiveBayes.Test(tictactoe.GetInstanceList()).GetErrorRate(), 0.01);
            naiveBayes.Train(nursery.GetInstanceList(), null);
            Assert.AreEqual(9.70, 100 * naiveBayes.Test(nursery.GetInstanceList()).GetErrorRate(), 0.01);
        }
Example #5
0
        public static void Main(string[] args)
        {
            var smsCollection    = new DataReader($@"{Environment.CurrentDirectory}\Data\SMSSpamCollection").Read();
            var trainingData     = smsCollection.Skip(1000);
            var verificationData = smsCollection.Take(1000);
            var tokenizer        = new WordsTokenizer();

            Run("Using HAM classifier ... ", () =>
            {
                var hamClassifier = new HamClassifier();
                Evaluate(hamClassifier, verificationData);
            });

            Run("Using Primitive classifier ... ", () =>
            {
                var primitiveSpamClassifier = new PrimitiveSpamClassifier();
                Evaluate(primitiveSpamClassifier, verificationData);
            });

            Run("Using Primitive Naive Bayer classifier #1 (automatic token selection - 30 most popular tokens) ... ", () =>
            {
                var naiveBayesClassifier = new NaiveBayes(tokenizer);
                naiveBayesClassifier.Train(trainingData.ToArray(), new TopWordsVocabulary().GetVocabulary(tokenizer, trainingData));
                Evaluate(naiveBayesClassifier, verificationData);
            });

            Run("Using Primitive Naive Bayer classifier #2 (manually selected tokens) ... ", () =>
            {
                var naiveBayesClassifier = new NaiveBayes(tokenizer);
                naiveBayesClassifier.Train(trainingData.ToArray(), new [] { "FREE", "txt", "car", "call", "i", "mobile", "you", "me" });
                Evaluate(naiveBayesClassifier, verificationData);
            });

            Console.WriteLine("Press any key to terminate.");
            Console.ReadKey();
        }
Example #6
0
 public void Train(List <List <object> > X, List <object> Y)
 {
     Model = new NaiveBayes();
     Model.Train(X, Y);
 }