Exemplo n.º 1
0
        private static double[] CalculateRuDictionaryContextVector(string ruWord)
        {
            // First check the vectors cache
            if (VectorsCache.IsInCache(ruWord, LANG_CODE_RU))
            {
                double[] ruVectorFromCache = VectorsCache.LoadFromCache(ruWord, LANG_CODE_RU);
                return(ruVectorFromCache);
            }

            // Retrieve the word's local context
            WordsAndCounts ruWordContext;

            if (useIndirectContext)
            {
                ruWordContext = SemanticSimilarityUtils.
                                RetrieveContextWordsWithIndirectContext(ruWord, LANG_CODE_RU);
            }
            else if (useQueryLemmatization)
            {
                ruWordContext = SemanticSimilarityUtils.
                                RetrieveContextWordsForAllWordForms(ruWord, LANG_CODE_RU);
            }
            else
            {
                ruWordContext = SemanticSimilarityUtils.
                                RetrieveContextWords(ruWord, LANG_CODE_RU);
            }

            if (useTFIDF)
            {
                ApplyTFIDFWeighting(ruWordContext, LANG_CODE_RU);
            }

            // Analyse the word's local context and match the dictionary words in it
            string[] dictionaryBgWords = BgRuDictionary.DictionaryBgWords;
            double[] ruVector          = new double[dictionaryBgWords.Length];
            for (int i = 0; i < dictionaryBgWords.Length; i++)
            {
                string        bgDictWord  = dictionaryBgWords[i];
                List <string> ruDictWords = BgRuDictionary.GetTranslations(bgDictWord);
                foreach (string ruDictWord in ruDictWords)
                {
                    double ruDictWordCount = ruWordContext[ruDictWord];
                    ruVector[i] += ruDictWordCount;
                }
            }

            if (useReverseContext)
            {
                // Reverse match the context vector with the dictionary word's contexts
                for (int i = 0; i < dictionaryBgWords.Length; i++)
                {
                    double ruWordForwardCount = ruVector[i];
                    if (ruWordForwardCount >= SemanticSimilarityUtils.minWordOccurencesForReverseOrIndirectContext)
                    {
                        string        bgDictWord              = dictionaryBgWords[i];
                        List <string> ruDictWords             = BgRuDictionary.GetTranslations(bgDictWord);
                        double        ruWordReverseTotalCount = 0;
                        foreach (string ruDictWord in ruDictWords)
                        {
                            WordsAndCounts ruDictWordReverseContext =
                                SemanticSimilarityUtils.RetrieveContextWords(ruDictWord, LANG_CODE_RU);
                            double ruWordReverseCount = GetWordCountInContext(ruWord, LANG_CODE_RU, ruDictWordReverseContext);
                            ruWordReverseTotalCount += ruWordReverseCount;
                        }
                        ruVector[i] = Math.Min(ruWordForwardCount, ruWordReverseTotalCount);
                    }
                    else
                    {
                        ruVector[i] = 0;
                    }
                }
            }

            // Add the calculated context vector to the cache
            VectorsCache.AddToCache(ruWord, LANG_CODE_RU, ruVector);

            return(ruVector);
        }
Exemplo n.º 2
0
        private static double[] CalculateBgDictionaryContextVector(string bgWord)
        {
            // First check the vectors cache
            if (VectorsCache.IsInCache(bgWord, LANG_CODE_BG))
            {
                double[] bgVectorFromCache = VectorsCache.LoadFromCache(bgWord, LANG_CODE_BG);
                return(bgVectorFromCache);
            }

            // Retrieve the word's local context
            WordsAndCounts bgWordContext;

            if (useIndirectContext)
            {
                bgWordContext = SemanticSimilarityUtils.
                                RetrieveContextWordsWithIndirectContext(bgWord, LANG_CODE_BG);
            }
            else if (useQueryLemmatization)
            {
                bgWordContext = SemanticSimilarityUtils.
                                RetrieveContextWordsForAllWordForms(bgWord, LANG_CODE_BG);
            }
            else
            {
                bgWordContext = SemanticSimilarityUtils.
                                RetrieveContextWords(bgWord, LANG_CODE_BG);
            }

            if (useTFIDF)
            {
                ApplyTFIDFWeighting(bgWordContext, LANG_CODE_BG);
            }

            // Analyse the word's local context and match the dictionary words in it
            string[] dictionaryBgWords = BgRuDictionary.DictionaryBgWords;
            double[] bgVector          = new double[dictionaryBgWords.Length];
            for (int i = 0; i < dictionaryBgWords.Length; i++)
            {
                string bgDictWord      = dictionaryBgWords[i];
                double bgDictWordCount = bgWordContext[bgDictWord];
                bgVector[i] = bgDictWordCount;
            }

            if (useReverseContext)
            {
                // Reverse match the context vector with the dictionary word's contexts
                for (int i = 0; i < dictionaryBgWords.Length; i++)
                {
                    double bgWordForwardCount = bgVector[i];
                    if (bgWordForwardCount >= SemanticSimilarityUtils.minWordOccurencesForReverseOrIndirectContext)
                    {
                        string         bgDictWord = dictionaryBgWords[i];
                        WordsAndCounts bgDictWordReverseContext =
                            SemanticSimilarityUtils.RetrieveContextWords(bgDictWord, LANG_CODE_BG);
                        double bgWordReverseCount = GetWordCountInContext(bgWord, LANG_CODE_BG, bgDictWordReverseContext);
                        bgVector[i] = Math.Min(bgWordForwardCount, bgWordReverseCount);
                    }
                    else
                    {
                        bgVector[i] = 0;
                    }
                }
            }

            // Add the calculated context vector to the cache
            VectorsCache.AddToCache(bgWord, LANG_CODE_BG, bgVector);

            return(bgVector);
        }