Example #1
0
        private static WordsAndCounts RetrieveContextWordsForAllWordForms(string word, string langCode)
        {
            if (AllWordFormsContextCache.IsInCache(word, langCode))
            {
                WordsAndCounts cachedContextWordsAndCounts =
                    AllWordFormsContextCache.LoadFromCache(word, langCode);
                return(cachedContextWordsAndCounts);
            }

            // Retieve and merge the contexts of all word forms of the given word
            ICollection <string> allWordForms          = LemmaDictionaryUtils.GetAllWordForms(word, langCode);
            WordsAndCounts       contextWordsAndCounts = new WordsAndCounts();

            foreach (string wordForm in allWordForms)
            {
                WordsAndCounts wordFormContextWordsAndCounts =
                    RetrieveContextWords(wordForm, langCode);
                contextWordsAndCounts.AddAll(wordFormContextWordsAndCounts);
            }

            // Normalize the counts
            List <string> allContextWords = new List <string>();

            allContextWords.AddRange(contextWordsAndCounts.Words);
            foreach (string contextWord in allContextWords)
            {
                double count = contextWordsAndCounts[contextWord];
                contextWordsAndCounts[contextWord] = count / allWordForms.Count;
            }

            AllWordFormsContextCache.AddToCache(word, langCode, contextWordsAndCounts);

            return(contextWordsAndCounts);
        }
Example #2
0
        public static WordsAndCounts RetrieveContextWordsWithIndirectContext(
            string word, string langCode)
        {
            if (IndirectContextCache.IsInCache(word, langCode))
            {
                WordsAndCounts cachedContextWordsAndCounts =
                    IndirectContextCache.LoadFromCache(word, langCode);
                return(cachedContextWordsAndCounts);
            }

            WordsAndCounts contextWordsAndCounts = RetrieveContextWords(word, langCode);

            string[] contextWords = new string[contextWordsAndCounts.Words.Count];
            contextWordsAndCounts.Words.CopyTo(contextWords, 0);
            double[] contextWordsCounts = new double[contextWordsAndCounts.Counts.Count];
            contextWordsAndCounts.Counts.CopyTo(contextWordsCounts, 0);

            // Perform indirect context lookup and recalculate the occurences
            for (int i = 0; i < contextWords.Length; i++)
            {
                string contextWord = contextWords[i];
                double occurences  = contextWordsCounts[i];
                if (occurences >= SemanticSimilarityUtils.minWordOccurencesForReverseOrIndirectContext)
                {
                    WordsAndCounts indirectContextWordsAndCounts =
                        RetrieveContextWords(contextWord, langCode);
                    contextWordsAndCounts.AddAll(indirectContextWordsAndCounts);
                }
            }

            IndirectContextCache.AddToCache(word, langCode, contextWordsAndCounts);

            return(contextWordsAndCounts);
        }