private static WordsAndCounts RetrieveContextWordsForAllWordForms(string word, string langCode) { if (AllWordFormsContextCache.IsInCache(word, langCode)) { WordsAndCounts cachedContextWordsAndCounts = AllWordFormsContextCache.LoadFromCache(word, langCode); return(cachedContextWordsAndCounts); } // Retieve and merge the contexts of all word forms of the given word ICollection <string> allWordForms = LemmaDictionaryUtils.GetAllWordForms(word, langCode); WordsAndCounts contextWordsAndCounts = new WordsAndCounts(); foreach (string wordForm in allWordForms) { WordsAndCounts wordFormContextWordsAndCounts = RetrieveContextWords(wordForm, langCode); contextWordsAndCounts.AddAll(wordFormContextWordsAndCounts); } // Normalize the counts List <string> allContextWords = new List <string>(); allContextWords.AddRange(contextWordsAndCounts.Words); foreach (string contextWord in allContextWords) { double count = contextWordsAndCounts[contextWord]; contextWordsAndCounts[contextWord] = count / allWordForms.Count; } AllWordFormsContextCache.AddToCache(word, langCode, contextWordsAndCounts); return(contextWordsAndCounts); }
public static WordsAndCounts RetrieveContextWordsWithIndirectContext( string word, string langCode) { if (IndirectContextCache.IsInCache(word, langCode)) { WordsAndCounts cachedContextWordsAndCounts = IndirectContextCache.LoadFromCache(word, langCode); return(cachedContextWordsAndCounts); } WordsAndCounts contextWordsAndCounts = RetrieveContextWords(word, langCode); string[] contextWords = new string[contextWordsAndCounts.Words.Count]; contextWordsAndCounts.Words.CopyTo(contextWords, 0); double[] contextWordsCounts = new double[contextWordsAndCounts.Counts.Count]; contextWordsAndCounts.Counts.CopyTo(contextWordsCounts, 0); // Perform indirect context lookup and recalculate the occurences for (int i = 0; i < contextWords.Length; i++) { string contextWord = contextWords[i]; double occurences = contextWordsCounts[i]; if (occurences >= SemanticSimilarityUtils.minWordOccurencesForReverseOrIndirectContext) { WordsAndCounts indirectContextWordsAndCounts = RetrieveContextWords(contextWord, langCode); contextWordsAndCounts.AddAll(indirectContextWordsAndCounts); } } IndirectContextCache.AddToCache(word, langCode, contextWordsAndCounts); return(contextWordsAndCounts); }