public static double CalcCosinusBetweenWordsCounts(
            WordsAndCounts wordsCounts1, WordsAndCounts wordsCounts2)
        {
            // Create a list (union) of all words from the two sets of words
            Dictionary <string, bool> allWords = new Dictionary <string, bool>();

            foreach (string word in wordsCounts1.Words)
            {
                allWords.Add(word, true);
            }
            foreach (string word in wordsCounts2.Words)
            {
                if (!allWords.ContainsKey(word))
                {
                    allWords.Add(word, true);
                }
            }

            // Create the first occurences vector
            double[] vector1 = new double[allWords.Count];
            int      index1  = 0;

            foreach (string word in allWords.Keys)
            {
                vector1[index1] = wordsCounts1[word];
                index1++;
            }

            // Create the second occurences vector
            double[] vector2 = new double[allWords.Count];
            int      index2  = 0;

            foreach (string word in allWords.Keys)
            {
                vector2[index2] = wordsCounts2[word];
                index2++;
            }

            double distance = VectorUtils.CalcCosinusBetweenVectors(vector1, vector2);

            return(distance);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Calculates the distance (similarity) between given Bulgarian and Russian words.
        /// </summary>
        public static double CrossSim(string bgWord, string ruWord)
        {
            double[] bgVector = CalculateBgDictionaryContextVector(bgWord);
            double[] ruVector = CalculateRuDictionaryContextVector(ruWord);

            double distance;

            if (vectorDiffAlgorithm == VectorDiffAlgorithm.COSINE)
            {
                distance = VectorUtils.CalcCosinusBetweenVectors(bgVector, ruVector);
            }
            else if (vectorDiffAlgorithm == VectorDiffAlgorithm.DICE_COEFFICIENT)
            {
                distance = VectorUtils.CalcDiceCoeffBetweenVectors(bgVector, ruVector);
            }
            else
            {
                throw new Exception("Invalid vector diff algorithm!");
            }
            return(distance);
        }