示例#1
0
        private Result ProcessText(string text, Action action)
        {
            var words = TextUtils.SplitTextByWords(text);

            words = wordsPreprocessor.PreprocessWords(words);
            return(ProcessWords(words, action));
        }
示例#2
0
        private static Dictionary <string, int> ReadWords(IWordsDirectoryProvider wordsDirectoryProvider, IWordsPreprocessor preprocessor)
        {
            var words = File.ReadLines(wordsDirectoryProvider.WordsDirectory);

            return(preprocessor
                   .PreprocessWords(words)
                   .GroupBy(word => word)
                   .ToDictionary(wordGroup => wordGroup.Key, wordGroup => wordGroup.Count()));
        }
示例#3
0
 public Result <IReadOnlyCollection <Tag> > GetData()
 {
     return(Result.Of(textReader.ReadText)
            .Then(lines => wordsPreprocessor.PreprocessWords(lines.Value))
            .OnFail(logger.Log)
            .Then(preprocessedWords => preprocessedWords
                  .OrderByDescending(e => e.Value))
            .Then(orderedWords => algorithm.GenerateTags(orderedWords
                                                         .Take(cloudSettings.WordsToDisplay).ToDictionary(e => e.Key, e => e.Value)).Value)
            .OnFail(logger.Log));
 }
        public double GetProbabilityOfSpam(Stream mimeMessageStream)
        {
            var rawWords          = wordsExtractor.ExtractWords(mimeMessageStream);
            var preprocessedWords = wordsPreprocessor.PreprocessWords(rawWords);

            var expDegree = preprocessedWords
                            .Where(probabilityProvider.HasProbability)
                            .Select(word => probabilityProvider.GetProbabilityOf(MsgClass.Spam, word))
                            .Sum(spamGivenWordPr => Math.Log(1 - spamGivenWordPr) - Math.Log(spamGivenWordPr));

            var spamGivenAllWordsProbability = 1 / (Math.Exp(expDegree) + 1);

            return(spamGivenAllWordsProbability);
        }
示例#5
0
        private static Dictionary <string, int> CalculateWordsCount(IEnumerable <FileInfo> msgFiles, IWordsExtractor wordsExtractor, IWordsPreprocessor wordsPreprocessor)
        {
            var wordToCountMessagesWithThisWord = new Dictionary <string, int>();

            foreach (var messageFile in msgFiles)
            {
                var rawWords          = wordsExtractor.ExtractWords(messageFile.OpenRead());
                var preprocessedWords = wordsPreprocessor.PreprocessWords(rawWords);

                foreach (var word in preprocessedWords.Distinct())
                {
                    if (!wordToCountMessagesWithThisWord.ContainsKey(word))
                    {
                        wordToCountMessagesWithThisWord[word] = 0;
                    }
                    wordToCountMessagesWithThisWord[word]++;
                }
            }

            return(wordToCountMessagesWithThisWord);
        }