static void Main(string[] args)
        {
            List <Record> TrainingSet = new List <Record>()
            {
                new Record("Conservative", false, false, true),
                new Record("Conservative", true, true, false),
                new Record("Conservative", true, false, false),

                new Record("Socialist", false, true, false),
                new Record("Socialist", true, true, true),

                new Record("Libertarian", true, true, true),
            };

            var toBeClassifiedRecord = new Record("Unknown", true, true, true);

            NaiveBayesClassifier bayesClassifier = new NaiveBayesClassifier(TrainingSet);

            toBeClassifiedRecord.classification = bayesClassifier.GetClassification(toBeClassifiedRecord);
        }
Exemple #2
0
        static void Main(string[] args)
        {
            var data = DataReader.ReadCsv();

            var dataSets = TrainTest.Split(data);

            var accuracies = new List <double>();

            dataSets.ToList().ForEach(sets =>
            {
                var model = new NaiveBayesClassifier();
                model.Fit(sets.training);
                var accuracy = model.Solve(sets.test);

                accuracies.Add(accuracy);
            });

            Console.WriteLine();
            Console.WriteLine($"Average accuracy: {accuracies.Sum() / (double)accuracies.Count}");
        }
Exemple #3
0
        static void Main(string[] args)
        {
            // Reading data from .txt files
            List <_DataSetItem> raw_data_train = ReadInData(Properties.Resources.SMSSpamTrain);
            List <_DataSetItem> raw_data_test  = ReadInData(Properties.Resources.SMSSpamTest);
            List <_DataSetItem> full_dataset   = new List <_DataSetItem>(raw_data_test.Count + raw_data_train.Count);

            full_dataset.AddRange(raw_data_train);
            full_dataset.AddRange(raw_data_test);

            // Extracting the vocabulary of the dataset. Only words that occur 3 or more times are considered part of the vocabulary
            HashSet <string> vocabulary = ExtractVocabulary(full_dataset, 3);

            // Transforming the raw SMS data into a bag of words from the previously extracted vocabulary
            DataFeaturesContainer X_train = BagOfWordsFromVocabularyAndSMS(raw_data_train, vocabulary);
            DataFeaturesContainer X_test  = BagOfWordsFromVocabularyAndSMS(raw_data_test, vocabulary);

            // Assigning the target values
            List <int> Y_train = raw_data_train.Select(x => x.target == "spam" ? 0 : 1).ToList();
            List <int> Y_test  = raw_data_test.Select(x => x.target == "spam" ? 0 : 1).ToList();

            // Defining the prior
            // Unbiased prior, carries no information, every class is as likely to occur
            Dictionary <int, double> unbiased_prior = new Dictionary <int, double>()
            {
                { 1, 0.5 },
                { 0, 0.5 }
            };

            // Reasearched prior found on the wikipedia articke
            // @"https://en.wikipedia.org/wiki/Naive_Bayes_spam_filtering"
            Dictionary <int, double> wiki_prior = new Dictionary <int, double>()
            {
                { 1, 0.2 },
                { 0, 0.8 }
            };

            // Calculating the prior from our dataset which includes finding the frequency of
            // actuall spam messages vs ham messages
            List <string>            tartets    = full_dataset.Select(x => x.target).ToList();
            double                   spam_prior = CalculateDatasetPrior(tartets);
            Dictionary <int, double> dataset_calculated_prior = new Dictionary <int, double>()
            {
                { 1, 1.0 - spam_prior },
                { 0, spam_prior }
            };

            // Training the model and doing the predictions
            NaiveBayesClassifier model = new NaiveBayesClassifier();

            model.Fit(X_train, Y_train, unbiased_prior);
            List <int> predictions = model.Predict(X_test.Data);

            // Measuring model performance
            ModelMetrics modelMetrics = new ModelMetrics(Y_test, predictions, 0);

            modelMetrics.PrintMeasures();

            // Saving the results
            List <string> test_sms = raw_data_train.Select(x => x.sms.Trim()).ToList();

            WriteResults("results.tsv", test_sms, predictions);
        }