public static double CalcCosinusBetweenWordsCounts( WordsAndCounts wordsCounts1, WordsAndCounts wordsCounts2) { // Create a list (union) of all words from the two sets of words Dictionary <string, bool> allWords = new Dictionary <string, bool>(); foreach (string word in wordsCounts1.Words) { allWords.Add(word, true); } foreach (string word in wordsCounts2.Words) { if (!allWords.ContainsKey(word)) { allWords.Add(word, true); } } // Create the first occurences vector double[] vector1 = new double[allWords.Count]; int index1 = 0; foreach (string word in allWords.Keys) { vector1[index1] = wordsCounts1[word]; index1++; } // Create the second occurences vector double[] vector2 = new double[allWords.Count]; int index2 = 0; foreach (string word in allWords.Keys) { vector2[index2] = wordsCounts2[word]; index2++; } double distance = VectorUtils.CalcCosinusBetweenVectors(vector1, vector2); return(distance); }
/// <summary> /// Calculates the distance (similarity) between given Bulgarian and Russian words. /// </summary> public static double CrossSim(string bgWord, string ruWord) { double[] bgVector = CalculateBgDictionaryContextVector(bgWord); double[] ruVector = CalculateRuDictionaryContextVector(ruWord); double distance; if (vectorDiffAlgorithm == VectorDiffAlgorithm.COSINE) { distance = VectorUtils.CalcCosinusBetweenVectors(bgVector, ruVector); } else if (vectorDiffAlgorithm == VectorDiffAlgorithm.DICE_COEFFICIENT) { distance = VectorUtils.CalcDiceCoeffBetweenVectors(bgVector, ruVector); } else { throw new Exception("Invalid vector diff algorithm!"); } return(distance); }