public bool IsSpam(Corpus corpus) { var countAllOccurences = (double) CompleteCorpus.CountAllOccurences(); var beliefInCorpusBeingSpam = SpamCorpus.CountAllOccurences()/countAllOccurences; corpus.ForEveryOccurenceOfEachWord( word => { var occurencesOfWord = SpamCorpus.CountOccurencesOf(word); if(occurencesOfWord == 0) { beliefInCorpusBeingSpam = beliefInCorpusBeingSpam * .4; } else { var beliefInWordOccuringAtAll = CompleteCorpus.CountOccurencesOf(word)/countAllOccurences; var beliefInEvidenceAndSpam = SpamCorpus.CountOccurencesOf(word)/countAllOccurences; beliefInCorpusBeingSpam = beliefInCorpusBeingSpam * beliefInEvidenceAndSpam / beliefInWordOccuringAtAll; } }); Debug.WriteLine(String.Format("Belief: {0}", beliefInCorpusBeingSpam)); return beliefInCorpusBeingSpam > .16; }
public bool Contains(Corpus corpus) { var result = true; foreach (var word in corpus.Words) result &= Words.ContainsKey(word.Key); return result; }
public void Add(Corpus corpus) { foreach (var pair in corpus.Words) { var word = pair.Key; var count = pair.Value; if (Words.ContainsKey(word)) Words[word]++; else Words.Add(word, count); } }
public double BeliefOf(string word, Corpus corpus) { var occurencesOfWordInCorpus = corpus.CountOccurencesOf(word); var allOccurencesOfAllWords = corpus.CountAllOccurences(); return occurencesOfWordInCorpus / (double)allOccurencesOfAllWords; }
public void SpamFound(Corpus corpus) { SpamCorpus.Add(corpus); CompleteCorpus.Add(corpus); }