public IndexProcessor( IWordsExtractor wordsExtractor, IDocumentWithExtractedWordsStore documentWithExtractedWordsStore) { this.wordsExtractor = wordsExtractor; this.documentWithExtractedWordsStore = documentWithExtractedWordsStore; }
public ProbabilityProvider(ITrainingSample trainingSample, IWordsExtractor wordsExtractor, IWordsPreprocessor wordsPreprocessor) { spamCountWithWord = CalculateWordsCount(trainingSample.SpamFiles, wordsExtractor, wordsPreprocessor); totalSpamCount = trainingSample.SpamFiles.Count(); hamCountWithWord = CalculateWordsCount(trainingSample.HamFiles, wordsExtractor, wordsPreprocessor); totalHamCount = trainingSample.HamFiles.Count(); }
public BayesianSpamFilter(IProbabilityProvider probabilityProvider, IWordsExtractor wordsExtractor, IWordsPreprocessor wordsPreprocessor) { this.probabilityProvider = probabilityProvider; this.wordsExtractor = wordsExtractor; this.wordsPreprocessor = wordsPreprocessor; }
private static Dictionary <string, int> CalculateWordsCount(IEnumerable <FileInfo> msgFiles, IWordsExtractor wordsExtractor, IWordsPreprocessor wordsPreprocessor) { var wordToCountMessagesWithThisWord = new Dictionary <string, int>(); foreach (var messageFile in msgFiles) { var rawWords = wordsExtractor.ExtractWords(messageFile.OpenRead()); var preprocessedWords = wordsPreprocessor.PreprocessWords(rawWords); foreach (var word in preprocessedWords.Distinct()) { if (!wordToCountMessagesWithThisWord.ContainsKey(word)) { wordToCountMessagesWithThisWord[word] = 0; } wordToCountMessagesWithThisWord[word]++; } } return(wordToCountMessagesWithThisWord); }