/// <summary> /// Gets the similarity. /// </summary> /// <param name="patternTokens">The pattern tokens.</param> /// <param name="targetTokens">The target tokens.</param> /// <returns></returns> public double GetSimilarity(string[] patternTokens, string[] targetTokens) { var patternWeights = Vocabulary.GetSemanticWeight(patternTokens); var targetWeights = Vocabulary.GetSemanticWeight(targetTokens); SimMetric simMetric = SimHelpers.GetSimMetric(InternalTokenSimilarity); // unit vectorizing Utils.UnitVectorizing(patternWeights); Utils.UnitVectorizing(targetWeights); double finalScore = 0; for (int p = 0; p < patternTokens.Length; p++) { var pattern = patternTokens[p]; if (pattern == null) { continue; } double maxOverToken = SimHelpers.MinimumScore; double weightOverToken = 0; for (int t = 0; t < targetTokens.Length; t++) { var target = targetTokens[t]; if (target == null) { continue; } double currentScore = simMetric(pattern, target); if (currentScore > DefaultThreshold && currentScore > maxOverToken) { maxOverToken = currentScore; weightOverToken = patternWeights[p] * targetWeights[t]; } // if score achieves maximum score then breaks the loop and increases the performance if (Utils.Equals(currentScore, SimHelpers.MaximumScore)) { break; } } finalScore += weightOverToken * maxOverToken; } return(finalScore); }
/// <summary> /// Gets the similarity. /// </summary> /// <param name="patternTokens">The tokens pattern.</param> /// <param name="targetTokens">The tokens target.</param> /// <returns>System.Double.</returns> public double GetSimilarity(string[] patternTokens, string[] targetTokens) { int m = patternTokens.Length; int n = targetTokens.Length; SimMetric refSimilarity = SimHelpers.GetSimMetric(InternalTokenSimilarity); // re-calculate similarity symmetric vs. not symmetric if (IsSymmetric && m > n) { Utils.Swap(ref patternTokens, ref targetTokens); Utils.Swap(ref m, ref n); } double sumOverTokens = 0; foreach (var pattern in patternTokens) { if (pattern == null) { continue; } double maxOverToken = 0; foreach (var target in targetTokens) { if (target == null) { continue; } double currentScore = refSimilarity(pattern, target); // if score achieves maximum score then breaks the loop and increases the performance if (Utils.Equals(currentScore, SimHelpers.MaximumScore)) { maxOverToken = SimHelpers.MaximumScore; break; } maxOverToken = Math.Max(maxOverToken, currentScore); } sumOverTokens += maxOverToken; } return(sumOverTokens / m); }
/// <summary> /// Gets the semantic similarity between array of tokens. The position of token in array /// doesn't have an impact on resulting score. /// </summary> /// <param name="tokensPattern">The tokens pattern</param> /// <param name="tokensTarget">The tokens target.</param> /// <returns>the score between 0 and 1</returns> public double GetSimilarity(string[] tokensPattern, string[] tokensTarget) { var patternWeights = Vocabulary.GetSemanticWeight(tokensPattern); var targetWeights = Vocabulary.GetSemanticWeight(tokensTarget); SimMetric simMetric = SimHelpers.GetSimMetric(InternalTokenSimilarity); int pLen = tokensPattern.Length; int tLen = tokensTarget.Length; // re-calculate similarity symmetric vs. not symmetric if (IsSymmetric && pLen > tLen) { Utils.Swap(ref tokensPattern, ref tokensTarget); Utils.Swap(ref patternWeights, ref targetWeights); Utils.Swap(ref pLen, ref tLen); } double sumOverTokens = 0; double sumWeights = 0; for (int p = 0; p < tokensPattern.Length; p++) { string pattern = tokensPattern[p]; double pWeight = patternWeights[p]; if (pattern == null) { continue; } double maxOverToken = SimHelpers.MinimumScore; double weightOverToken = Math.Pow(pWeight, 2.0); for (int t = 0; t < tokensTarget.Length; p++) { string target = tokensTarget[t]; double tWeight = targetWeights[t]; if (target == null) { continue; } double currentScore = simMetric(pattern, target); if (currentScore > maxOverToken) { maxOverToken = currentScore; weightOverToken = pWeight * tWeight; } // if score achieves maximum score then breaks the loop and increases the performance if (Utils.Equals(currentScore, SimHelpers.MaximumScore)) { break; } } sumOverTokens += weightOverToken * maxOverToken; sumWeights += weightOverToken; } return(sumOverTokens / sumWeights); }