/// <summary> /// Compute QGram distance using precomputed profiles. /// </summary> /// <param name="profile1"></param> /// <param name="profile2"></param> /// <returns></returns> public double Distance(IDictionary <string, int> profile1, IDictionary <string, int> profile2) { //var union = new HashSet<string>(); var union = new HashCollection <string>(); union.UnionWith(profile1.Keys); union.UnionWith(profile2.Keys); int agg = 0; foreach (var key in union) { int v1 = 0; int v2 = 0; if (profile1.TryGetValue(key, out var iv1)) { v1 = iv1; } if (profile2.TryGetValue(key, out var iv2)) { v2 = iv2; } agg += Math.Abs(v1 - v2); } return(agg); }
/// <summary> /// Compute jaccard index: |A inter B| / |A union B|. /// </summary> /// <param name="s1">The first string to compare.</param> /// <param name="s2">The second string to compare.</param> /// <returns>The Jaccard index in the range [0, 1]</returns> /// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception> public double Similarity(string s1, string s2) { if (s1 == null) { throw new ArgumentNullException(nameof(s1)); } if (s2 == null) { throw new ArgumentNullException(nameof(s2)); } if (s1.Equals(s2)) { return(1); } var profile1 = GetProfile(s1); var profile2 = GetProfile(s2); //var union = new HashSet<string>(); var union = new HashCollection <string>(); union.UnionWith(profile1.Keys); union.UnionWith(profile2.Keys); int inter = profile1.Keys.Count + profile2.Keys.Count - union.Count; return(1.0 * inter / union.Count); }
/// <summary> /// Similarity is computed as 2 * |A inter B| / (|A| + |B|). /// </summary> /// <param name="s1">The first string to compare.</param> /// <param name="s2">The second string to compare.</param> /// <returns>The computed Sorensen-Dice similarity.</returns> /// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception> public double Similarity(string s1, string s2) { if (s1 == null) { throw new ArgumentNullException(nameof(s1)); } if (s2 == null) { throw new ArgumentNullException(nameof(s2)); } if (s1.Equals(s2)) { return(1); } var profile1 = GetProfile(s1); var profile2 = GetProfile(s2); //var union = new HashSet<string>(); var union = new HashCollection <string>(); union.UnionWith(profile1.Keys); union.UnionWith(profile2.Keys); int inter = 0; foreach (var key in union) { if (profile1.ContainsKey(key) && profile2.ContainsKey(key)) { inter++; } } return(2.0 * inter / (profile1.Count + profile2.Count)); }