private Result ProcessText(string text, Action action) { var words = TextUtils.SplitTextByWords(text); words = wordsPreprocessor.PreprocessWords(words); return(ProcessWords(words, action)); }
private static Dictionary <string, int> ReadWords(IWordsDirectoryProvider wordsDirectoryProvider, IWordsPreprocessor preprocessor) { var words = File.ReadLines(wordsDirectoryProvider.WordsDirectory); return(preprocessor .PreprocessWords(words) .GroupBy(word => word) .ToDictionary(wordGroup => wordGroup.Key, wordGroup => wordGroup.Count())); }
public Result <IReadOnlyCollection <Tag> > GetData() { return(Result.Of(textReader.ReadText) .Then(lines => wordsPreprocessor.PreprocessWords(lines.Value)) .OnFail(logger.Log) .Then(preprocessedWords => preprocessedWords .OrderByDescending(e => e.Value)) .Then(orderedWords => algorithm.GenerateTags(orderedWords .Take(cloudSettings.WordsToDisplay).ToDictionary(e => e.Key, e => e.Value)).Value) .OnFail(logger.Log)); }
public double GetProbabilityOfSpam(Stream mimeMessageStream) { var rawWords = wordsExtractor.ExtractWords(mimeMessageStream); var preprocessedWords = wordsPreprocessor.PreprocessWords(rawWords); var expDegree = preprocessedWords .Where(probabilityProvider.HasProbability) .Select(word => probabilityProvider.GetProbabilityOf(MsgClass.Spam, word)) .Sum(spamGivenWordPr => Math.Log(1 - spamGivenWordPr) - Math.Log(spamGivenWordPr)); var spamGivenAllWordsProbability = 1 / (Math.Exp(expDegree) + 1); return(spamGivenAllWordsProbability); }
private static Dictionary <string, int> CalculateWordsCount(IEnumerable <FileInfo> msgFiles, IWordsExtractor wordsExtractor, IWordsPreprocessor wordsPreprocessor) { var wordToCountMessagesWithThisWord = new Dictionary <string, int>(); foreach (var messageFile in msgFiles) { var rawWords = wordsExtractor.ExtractWords(messageFile.OpenRead()); var preprocessedWords = wordsPreprocessor.PreprocessWords(rawWords); foreach (var word in preprocessedWords.Distinct()) { if (!wordToCountMessagesWithThisWord.ContainsKey(word)) { wordToCountMessagesWithThisWord[word] = 0; } wordToCountMessagesWithThisWord[word]++; } } return(wordToCountMessagesWithThisWord); }