private static double[] CalculateRuDictionaryContextVector(string ruWord) { // First check the vectors cache if (VectorsCache.IsInCache(ruWord, LANG_CODE_RU)) { double[] ruVectorFromCache = VectorsCache.LoadFromCache(ruWord, LANG_CODE_RU); return(ruVectorFromCache); } // Retrieve the word's local context WordsAndCounts ruWordContext; if (useIndirectContext) { ruWordContext = SemanticSimilarityUtils. RetrieveContextWordsWithIndirectContext(ruWord, LANG_CODE_RU); } else if (useQueryLemmatization) { ruWordContext = SemanticSimilarityUtils. RetrieveContextWordsForAllWordForms(ruWord, LANG_CODE_RU); } else { ruWordContext = SemanticSimilarityUtils. RetrieveContextWords(ruWord, LANG_CODE_RU); } if (useTFIDF) { ApplyTFIDFWeighting(ruWordContext, LANG_CODE_RU); } // Analyse the word's local context and match the dictionary words in it string[] dictionaryBgWords = BgRuDictionary.DictionaryBgWords; double[] ruVector = new double[dictionaryBgWords.Length]; for (int i = 0; i < dictionaryBgWords.Length; i++) { string bgDictWord = dictionaryBgWords[i]; List <string> ruDictWords = BgRuDictionary.GetTranslations(bgDictWord); foreach (string ruDictWord in ruDictWords) { double ruDictWordCount = ruWordContext[ruDictWord]; ruVector[i] += ruDictWordCount; } } if (useReverseContext) { // Reverse match the context vector with the dictionary word's contexts for (int i = 0; i < dictionaryBgWords.Length; i++) { double ruWordForwardCount = ruVector[i]; if (ruWordForwardCount >= SemanticSimilarityUtils.minWordOccurencesForReverseOrIndirectContext) { string bgDictWord = dictionaryBgWords[i]; List <string> ruDictWords = BgRuDictionary.GetTranslations(bgDictWord); double ruWordReverseTotalCount = 0; foreach (string ruDictWord in ruDictWords) { WordsAndCounts ruDictWordReverseContext = SemanticSimilarityUtils.RetrieveContextWords(ruDictWord, LANG_CODE_RU); double ruWordReverseCount = GetWordCountInContext(ruWord, LANG_CODE_RU, ruDictWordReverseContext); ruWordReverseTotalCount += ruWordReverseCount; } ruVector[i] = Math.Min(ruWordForwardCount, ruWordReverseTotalCount); } else { ruVector[i] = 0; } } } // Add the calculated context vector to the cache VectorsCache.AddToCache(ruWord, LANG_CODE_RU, ruVector); return(ruVector); }
private static double[] CalculateBgDictionaryContextVector(string bgWord) { // First check the vectors cache if (VectorsCache.IsInCache(bgWord, LANG_CODE_BG)) { double[] bgVectorFromCache = VectorsCache.LoadFromCache(bgWord, LANG_CODE_BG); return(bgVectorFromCache); } // Retrieve the word's local context WordsAndCounts bgWordContext; if (useIndirectContext) { bgWordContext = SemanticSimilarityUtils. RetrieveContextWordsWithIndirectContext(bgWord, LANG_CODE_BG); } else if (useQueryLemmatization) { bgWordContext = SemanticSimilarityUtils. RetrieveContextWordsForAllWordForms(bgWord, LANG_CODE_BG); } else { bgWordContext = SemanticSimilarityUtils. RetrieveContextWords(bgWord, LANG_CODE_BG); } if (useTFIDF) { ApplyTFIDFWeighting(bgWordContext, LANG_CODE_BG); } // Analyse the word's local context and match the dictionary words in it string[] dictionaryBgWords = BgRuDictionary.DictionaryBgWords; double[] bgVector = new double[dictionaryBgWords.Length]; for (int i = 0; i < dictionaryBgWords.Length; i++) { string bgDictWord = dictionaryBgWords[i]; double bgDictWordCount = bgWordContext[bgDictWord]; bgVector[i] = bgDictWordCount; } if (useReverseContext) { // Reverse match the context vector with the dictionary word's contexts for (int i = 0; i < dictionaryBgWords.Length; i++) { double bgWordForwardCount = bgVector[i]; if (bgWordForwardCount >= SemanticSimilarityUtils.minWordOccurencesForReverseOrIndirectContext) { string bgDictWord = dictionaryBgWords[i]; WordsAndCounts bgDictWordReverseContext = SemanticSimilarityUtils.RetrieveContextWords(bgDictWord, LANG_CODE_BG); double bgWordReverseCount = GetWordCountInContext(bgWord, LANG_CODE_BG, bgDictWordReverseContext); bgVector[i] = Math.Min(bgWordForwardCount, bgWordReverseCount); } else { bgVector[i] = 0; } } } // Add the calculated context vector to the cache VectorsCache.AddToCache(bgWord, LANG_CODE_BG, bgVector); return(bgVector); }