public void TestMethod1()
        {
            string s =
                @"/000/025 ham don 2 fw 1 35 1 jeff 2 dave 2 lynn 2 over 1 jones 2 thread 2 40 1 year 2 correlator 1 california 1 williams 2 mon 2 copyright 1 168 1 content 4 2 2 1 1 price 1 5 1 9 1 privileged 1 go 2 frank 4 19 1 harris 2 john 8 use 3 kevin 2 11 1 12 1 current 1 ferc 1 classes 1 g 2 may 2 e 1 kimberly 2 markets 1 09 2 a 3 05 1 smith 2 m 1 produced 1 w 1 new 1 u 1 shelley 2 s 4 v6 1 part 1 comments 1 2001 3 week 1 x 3 to 7 bob 4 basis 1 110 1 change 3 terms 1 mike 4 smtpsvc 1 has 2 allen 2 ken 2 any 2 michelle 2 jan 2 follow 1 be 2 index 1 text 1 electric 1 likely 1 strictly 1 and 10 that 2 urn 1 make 1 natural 1 1600 1 58 1 bill 2 steven 4 well 1 corp 3 area 1 complete 1 tom 2 corman 2 above 1 65 1 plain 1 chris 2 confidential 1 mail 1 as 3 blair 2 sheila 2 which 1 prohibited 1 michael 6 an 1 there 2 off 2 for 3 tim 2 of 6 are 3 page 1 only 1 on 3 exchange 1 kay 2 topic 1 information 1 transfer 1 or 3 msmbx01v 1 4418 1 questions 1 distribution 1 gas 3 rob 2 http 4 will 2 eric 2 disclosure 1 david 2 mime 1 some 1 scott 4 binary 1 subject 1 tnef 1 nahou 4 version 2 karen 2 craig 2 larry 4 encoding 1 thomas 2 end 1 ms 2 return 1 0500 2 attach 1 laura 2 but 1 mimeole 1 last 2 type 1 192 1 2195 1 enron 82 inc 1 january 2 power 2 miller 2 robert 2 class 1 market 2 contain 1 this 2 call 1 june 1 watson 2 one 1 was 1 steve 6 order 1 if 1 path 1 stephen 2 attachments 2 is 4 with 3 rick 2 stephanie 2 your 2 into 2 susan 2 the 12 msmbx03v 3 in 7 prices 4 discussed 1 also 1 energy 9 lisa 2 changes 1";

            File.WriteAllText("temp.txt", s);
            List <EmailExample> emailExamples;
            var example = CsvParserUtils.ParseEmailExamples("temp.txt", out emailExamples);

            emailExamples.Should().HaveCount(1);
            emailExamples[0].IsSpam.Should().BeFalse();
            emailExamples[0].WordsInEmail.Should().HaveCount(c => c > 2);
            emailExamples[0].WordsInEmail["don"].Should().Be(2);

            example["dave"].HamCount.Should().Be(2);
            example["dave"].SpamCount.Should().Be(0);
        }
Example #2
0
        static void Main(string[] args)
        {
            string errorMessage = "";

            if (!File.Exists(TrainingDataPath))
            {
                errorMessage += $"Failed to find file ${TrainingDataPath} - please update variable ${nameof(TrainingDataPath)} or create that file.\n";
            }
            if (!File.Exists(TestDataPath))
            {
                errorMessage += $"Failed to find file ${TestDataPath} - please update variable ${nameof(TestDataPath)} or create that file.\n";
            }
            if (errorMessage != "")
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine("Not all files available - not running!");
                Console.WriteLine(errorMessage);
                Console.ResetColor();
                Console.WriteLine("Press any key to continue...");
                Console.ReadKey();
                return;
            }

            var startTime = DateTime.Now;

            Console.WriteLine(startTime);

            Console.WriteLine("Parsing...");
            List <EmailExample>            trainingEmails;
            Dictionary <string, WordCount> trainingCounts = CsvParserUtils.ParseEmailExamples(TrainingDataPath, out trainingEmails);
            List <EmailExample>            testEmails;

            CsvParserUtils.ParseEmailExamples(TestDataPath, out testEmails);

            double probabilitySpam = 1.0 * trainingEmails.Count(t => t.IsSpam) / trainingEmails.Count;

            Console.WriteLine("Making predictions...");
            uint hits = 0, misses = 0;
            uint falsePositives = 0, falseNegatives = 0;

            foreach (var emailExample in testEmails)
            {
                //double probabilityOfSpam = NaiveBayesCalculator.ObtainProbabilityOfSpam(emailExample.WordsInEmail, trainingCounts, probabilitySpam);
                //bool isSpamPrediction = probabilityOfSpam > 0.5;

                var  probabilityOfSpam = NaiveBayesCalculator.ObtainProbabilityOfSpam(emailExample.WordsInEmail, trainingCounts, probabilitySpam, trainingCounts.Count);
                bool isSpamPrediction  = probabilityOfSpam.Item1 > probabilityOfSpam.Item2;
                if (isSpamPrediction && emailExample.IsSpam)
                {
                    hits++;
                }
                else if (!isSpamPrediction && !emailExample.IsSpam)
                {
                    hits++;
                }
                else if (isSpamPrediction && !emailExample.IsSpam)
                {
                    misses++;
                    falsePositives++;
                }
                else if (!isSpamPrediction && emailExample.IsSpam)
                {
                    misses++;
                    falseNegatives++;
                }
                else
                {
                    throw new InvalidOperationException();
                }
            }

            Console.WriteLine("Score: {0}%. Hits: {1}, Misses: {2}", 100.0 * hits / (misses + hits), hits, misses);
            Console.WriteLine("FalsePositives: {0}. FalseNegatives: {1}", falsePositives, falseNegatives);

            var endTime = DateTime.Now;

            Console.WriteLine(endTime);
            var totalMinutes = (endTime - startTime).TotalMinutes;

            Console.WriteLine("Took {0} minutes.", totalMinutes);
            Console.WriteLine("Press any key to quit...");
            Console.ReadKey();
        }