예제 #1
0
        public static void Main(string[] args)
        {
            var                  smsCollection    = new DataReader($@"{Environment.CurrentDirectory}\Data\SMSSpamCollection").Read();
            var                  trainingData     = smsCollection.Skip(1000);
            var                  verificationData = smsCollection.Take(1000);
            ITokenizer           tokenizer        = new ToLowerWordsTokenizer();
            var                  classifier       = new NaiveBayes(tokenizer);
            IVocabularyGenerator vacabulary       = new TopWordsVocabulary();

            classifier.Train(trainingData.ToArray(), vacabulary.GetVocabulary(tokenizer, trainingData));
            Evaluate(classifier, verificationData);

            Console.WriteLine("___________________________________");

            trainingData     = smsCollection.Take(4800);
            verificationData = smsCollection.Skip(4800);

            tokenizer = new WordsTokenizer();
            classifier.Train(trainingData.ToArray(), new string[] { "FREE", "txt", "car", "call", "i", "mobile", "you", "me" });
            Evaluate(classifier, verificationData);

            Console.ReadKey();
        }
예제 #2
0
        public static void Main(string[] args)
        {
            var smsCollection    = new DataReader($@"{Environment.CurrentDirectory}\Data\SMSSpamCollection").Read();
            var trainingData     = smsCollection.Skip(1000);
            var verificationData = smsCollection.Take(1000);
            var tokenizer        = new WordsTokenizer();

            Run("Using HAM classifier ... ", () =>
            {
                var hamClassifier = new HamClassifier();
                Evaluate(hamClassifier, verificationData);
            });

            Run("Using Primitive classifier ... ", () =>
            {
                var primitiveSpamClassifier = new PrimitiveSpamClassifier();
                Evaluate(primitiveSpamClassifier, verificationData);
            });

            Run("Using Primitive Naive Bayer classifier #1 (automatic token selection - 30 most popular tokens) ... ", () =>
            {
                var naiveBayesClassifier = new NaiveBayes(tokenizer);
                naiveBayesClassifier.Train(trainingData.ToArray(), new TopWordsVocabulary().GetVocabulary(tokenizer, trainingData));
                Evaluate(naiveBayesClassifier, verificationData);
            });

            Run("Using Primitive Naive Bayer classifier #2 (manually selected tokens) ... ", () =>
            {
                var naiveBayesClassifier = new NaiveBayes(tokenizer);
                naiveBayesClassifier.Train(trainingData.ToArray(), new [] { "FREE", "txt", "car", "call", "i", "mobile", "you", "me" });
                Evaluate(naiveBayesClassifier, verificationData);
            });

            Console.WriteLine("Press any key to terminate.");
            Console.ReadKey();
        }