Пример #1
0
 public IndexProcessor(
     IWordsExtractor wordsExtractor,
     IDocumentWithExtractedWordsStore documentWithExtractedWordsStore)
 {
     this.wordsExtractor = wordsExtractor;
     this.documentWithExtractedWordsStore = documentWithExtractedWordsStore;
 }
Пример #2
0
        public ProbabilityProvider(ITrainingSample trainingSample, IWordsExtractor wordsExtractor, IWordsPreprocessor wordsPreprocessor)
        {
            spamCountWithWord = CalculateWordsCount(trainingSample.SpamFiles, wordsExtractor, wordsPreprocessor);
            totalSpamCount    = trainingSample.SpamFiles.Count();

            hamCountWithWord = CalculateWordsCount(trainingSample.HamFiles, wordsExtractor, wordsPreprocessor);
            totalHamCount    = trainingSample.HamFiles.Count();
        }
 public BayesianSpamFilter(IProbabilityProvider probabilityProvider, IWordsExtractor wordsExtractor, IWordsPreprocessor wordsPreprocessor)
 {
     this.probabilityProvider = probabilityProvider;
     this.wordsExtractor      = wordsExtractor;
     this.wordsPreprocessor   = wordsPreprocessor;
 }
Пример #4
0
        private static Dictionary <string, int> CalculateWordsCount(IEnumerable <FileInfo> msgFiles, IWordsExtractor wordsExtractor, IWordsPreprocessor wordsPreprocessor)
        {
            var wordToCountMessagesWithThisWord = new Dictionary <string, int>();

            foreach (var messageFile in msgFiles)
            {
                var rawWords          = wordsExtractor.ExtractWords(messageFile.OpenRead());
                var preprocessedWords = wordsPreprocessor.PreprocessWords(rawWords);

                foreach (var word in preprocessedWords.Distinct())
                {
                    if (!wordToCountMessagesWithThisWord.ContainsKey(word))
                    {
                        wordToCountMessagesWithThisWord[word] = 0;
                    }
                    wordToCountMessagesWithThisWord[word]++;
                }
            }

            return(wordToCountMessagesWithThisWord);
        }