Beispiel #1
0
        /// <summary>
        /// Calculates the distance between pair of words in the same language.
        /// </summary>
        public static double SemSim(
            string firstWord, string secondWord, string langCode)
        {
            WordsAndCounts firstWordContext;

            if (useReverseContext)
            {
                firstWordContext = SemanticSimilarityUtils.
                                   RetrieveContextWordsWithReverseContext(firstWord, langCode);
            }
            else if (useQueryLemmatization)
            {
                firstWordContext = SemanticSimilarityUtils.
                                   RetrieveContextWordsForAllWordForms(firstWord, langCode);
            }
            else if (useIndirectContext)
            {
                firstWordContext = SemanticSimilarityUtils.
                                   RetrieveContextWordsWithIndirectContext(firstWord, langCode);
            }
            else
            {
                firstWordContext = SemanticSimilarityUtils.
                                   RetrieveContextWords(firstWord, langCode);
            }

            WordsAndCounts secondWordContext;

            if (useReverseContext)
            {
                secondWordContext = SemanticSimilarityUtils.
                                    RetrieveContextWordsWithReverseContext(secondWord, langCode);
            }
            else if (useQueryLemmatization)
            {
                secondWordContext = SemanticSimilarityUtils.
                                    RetrieveContextWordsForAllWordForms(secondWord, langCode);
            }
            else if (useIndirectContext)
            {
                secondWordContext = SemanticSimilarityUtils.
                                    RetrieveContextWordsWithIndirectContext(secondWord, langCode);
            }
            else
            {
                secondWordContext = SemanticSimilarityUtils.
                                    RetrieveContextWords(secondWord, langCode);
            }

            if (useTFIDF)
            {
                ApplyTFIDFWeighting(firstWordContext, langCode);
                ApplyTFIDFWeighting(secondWordContext, langCode);
            }

            if (vectorDiffAlgorithm == VectorDiffAlgorithm.COSINE)
            {
                double distance = VectorUtils.CalcCosinusBetweenWordsCounts(
                    firstWordContext, secondWordContext);
                return(distance);
            }
            else
            {
                throw new Exception("Algorithm not supported!");
            }
        }