예제 #1
0
        private static NaiveBayesCalculator TrainNaiveBayes(List <DataSetValue> trainingValues)
        {
            Dictionary <string, BucketCount> naiveBayesTrainingDataStructure =
                NaiveBayesDataTransform.CountSamples(trainingValues);

            double probabilityOfOne = 1.0 * trainingValues.Count(t => t.Output) / trainingValues.Count;

            var bayesPredictor = new NaiveBayesCalculator(probabilityOfOne, naiveBayesTrainingDataStructure);

            return(bayesPredictor);
        }
예제 #2
0
        private static void EvaluateNaiveBayesPredictor(ParserResults testData, NaiveBayesCalculator naiveBayesPredictor)
        {
            Console.WriteLine("Making predictions...");
            uint hits = 0, misses = 0;
            uint falsePositives = 0, falseNegatives = 0;

            foreach (var testExample in testData.Values)
            {
                var isOnePrediction = naiveBayesPredictor.CalculatePrediction(testExample);
                if (isOnePrediction && testExample.Output)
                {
                    hits++;
                }
                else if (!isOnePrediction && !testExample.Output)
                {
                    hits++;
                }
                else if (isOnePrediction && !testExample.Output)
                {
                    misses++;
                    falsePositives++;
                }
                else if (!isOnePrediction && testExample.Output)
                {
                    misses++;
                    falseNegatives++;
                }
                else
                {
                    throw new InvalidOperationException();
                }
            }

            Console.WriteLine("Score: {0}%. Hits: {1}, Misses: {2}", 100.0 * hits / (misses + hits), hits, misses);
            Console.WriteLine("FalsePositives: {0}. FalseNegatives: {1}", falsePositives, falseNegatives);
        }
예제 #3
0
        static void Main(string[] args)
        {
            string errorMessage = "";

            if (!File.Exists(TrainingDataPath))
            {
                errorMessage += $"Failed to find file ${TrainingDataPath} - please update variable ${nameof(TrainingDataPath)} or create that file.\n";
            }
            if (!File.Exists(TestDataPath))
            {
                errorMessage += $"Failed to find file ${TestDataPath} - please update variable ${nameof(TestDataPath)} or create that file.\n";
            }
            if (errorMessage != "")
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine("Not all files available - not running!");
                Console.WriteLine(errorMessage);
                Console.ResetColor();
                Console.WriteLine("Press any key to continue...");
                Console.ReadKey();
                return;
            }

            var startTime = DateTime.Now;

            Console.WriteLine(startTime);

            Console.WriteLine("Parsing...");
            List <EmailExample>            trainingEmails;
            Dictionary <string, WordCount> trainingCounts = CsvParserUtils.ParseEmailExamples(TrainingDataPath, out trainingEmails);
            List <EmailExample>            testEmails;

            CsvParserUtils.ParseEmailExamples(TestDataPath, out testEmails);

            double probabilitySpam = 1.0 * trainingEmails.Count(t => t.IsSpam) / trainingEmails.Count;

            Console.WriteLine("Making predictions...");
            uint hits = 0, misses = 0;
            uint falsePositives = 0, falseNegatives = 0;

            foreach (var emailExample in testEmails)
            {
                //double probabilityOfSpam = NaiveBayesCalculator.ObtainProbabilityOfSpam(emailExample.WordsInEmail, trainingCounts, probabilitySpam);
                //bool isSpamPrediction = probabilityOfSpam > 0.5;

                var  probabilityOfSpam = NaiveBayesCalculator.ObtainProbabilityOfSpam(emailExample.WordsInEmail, trainingCounts, probabilitySpam, trainingCounts.Count);
                bool isSpamPrediction  = probabilityOfSpam.Item1 > probabilityOfSpam.Item2;
                if (isSpamPrediction && emailExample.IsSpam)
                {
                    hits++;
                }
                else if (!isSpamPrediction && !emailExample.IsSpam)
                {
                    hits++;
                }
                else if (isSpamPrediction && !emailExample.IsSpam)
                {
                    misses++;
                    falsePositives++;
                }
                else if (!isSpamPrediction && emailExample.IsSpam)
                {
                    misses++;
                    falseNegatives++;
                }
                else
                {
                    throw new InvalidOperationException();
                }
            }

            Console.WriteLine("Score: {0}%. Hits: {1}, Misses: {2}", 100.0 * hits / (misses + hits), hits, misses);
            Console.WriteLine("FalsePositives: {0}. FalseNegatives: {1}", falsePositives, falseNegatives);

            var endTime = DateTime.Now;

            Console.WriteLine(endTime);
            var totalMinutes = (endTime - startTime).TotalMinutes;

            Console.WriteLine("Took {0} minutes.", totalMinutes);
            Console.WriteLine("Press any key to quit...");
            Console.ReadKey();
        }