private static WordsAndCounts RetrieveContextWordsForAllWordForms(string word, string langCode) { if (AllWordFormsContextCache.IsInCache(word, langCode)) { WordsAndCounts cachedContextWordsAndCounts = AllWordFormsContextCache.LoadFromCache(word, langCode); return(cachedContextWordsAndCounts); } // Retieve and merge the contexts of all word forms of the given word ICollection <string> allWordForms = LemmaDictionaryUtils.GetAllWordForms(word, langCode); WordsAndCounts contextWordsAndCounts = new WordsAndCounts(); foreach (string wordForm in allWordForms) { WordsAndCounts wordFormContextWordsAndCounts = RetrieveContextWords(wordForm, langCode); contextWordsAndCounts.AddAll(wordFormContextWordsAndCounts); } // Normalize the counts List <string> allContextWords = new List <string>(); allContextWords.AddRange(contextWordsAndCounts.Words); foreach (string contextWord in allContextWords) { double count = contextWordsAndCounts[contextWord]; contextWordsAndCounts[contextWord] = count / allWordForms.Count; } AllWordFormsContextCache.AddToCache(word, langCode, contextWordsAndCounts); return(contextWordsAndCounts); }
private static double GetWordCountInContext(string word, String langCode, WordsAndCounts context) { if (useLemmatization) { double wordCount = 0; var allWordFormsCollection = LemmaDictionaryUtils.GetAllWordForms(word, langCode); foreach (string wordForm in allWordFormsCollection) { wordCount += context[wordForm]; } return(wordCount); } else { double wordCount = context[word]; return(wordCount); } }
private static Dictionary <string, bool> GetAllWordFormsDictionary( string word, string langCode) { Dictionary <string, bool> allWordFormsDict = new Dictionary <string, bool>(); if (useLemmatization) { var allWordFormsCollection = LemmaDictionaryUtils.GetAllWordForms(word, langCode); foreach (string wordForm in allWordFormsCollection) { allWordFormsDict.Add(wordForm, true); } } else { allWordFormsDict.Add(word, true); } return(allWordFormsDict); }