public double GetProbabilityOfSpam(Stream mimeMessageStream)
        {
            var rawWords          = wordsExtractor.ExtractWords(mimeMessageStream);
            var preprocessedWords = wordsPreprocessor.PreprocessWords(rawWords);

            var expDegree = preprocessedWords
                            .Where(probabilityProvider.HasProbability)
                            .Select(word => probabilityProvider.GetProbabilityOf(MsgClass.Spam, word))
                            .Sum(spamGivenWordPr => Math.Log(1 - spamGivenWordPr) - Math.Log(spamGivenWordPr));

            var spamGivenAllWordsProbability = 1 / (Math.Exp(expDegree) + 1);

            return(spamGivenAllWordsProbability);
        }
Exemplo n.º 2
0
        private static Dictionary <string, int> CalculateWordsCount(IEnumerable <FileInfo> msgFiles, IWordsExtractor wordsExtractor, IWordsPreprocessor wordsPreprocessor)
        {
            var wordToCountMessagesWithThisWord = new Dictionary <string, int>();

            foreach (var messageFile in msgFiles)
            {
                var rawWords          = wordsExtractor.ExtractWords(messageFile.OpenRead());
                var preprocessedWords = wordsPreprocessor.PreprocessWords(rawWords);

                foreach (var word in preprocessedWords.Distinct())
                {
                    if (!wordToCountMessagesWithThisWord.ContainsKey(word))
                    {
                        wordToCountMessagesWithThisWord[word] = 0;
                    }
                    wordToCountMessagesWithThisWord[word]++;
                }
            }

            return(wordToCountMessagesWithThisWord);
        }