Пример #1
0
        /// <summary>
        /// Compute QGram distance using precomputed profiles.
        /// </summary>
        /// <param name="profile1"></param>
        /// <param name="profile2"></param>
        /// <returns></returns>
        public double Distance(IDictionary <string, int> profile1, IDictionary <string, int> profile2)
        {
            //var union = new HashSet<string>();
            var union = new HashCollection <string>();

            union.UnionWith(profile1.Keys);
            union.UnionWith(profile2.Keys);

            int agg = 0;

            foreach (var key in union)
            {
                int v1 = 0;
                int v2 = 0;

                if (profile1.TryGetValue(key, out var iv1))
                {
                    v1 = iv1;
                }

                if (profile2.TryGetValue(key, out var iv2))
                {
                    v2 = iv2;
                }

                agg += Math.Abs(v1 - v2);
            }

            return(agg);
        }
Пример #2
0
        /// <summary>
        /// Compute jaccard index: |A inter B| / |A union B|.
        /// </summary>
        /// <param name="s1">The first string to compare.</param>
        /// <param name="s2">The second string to compare.</param>
        /// <returns>The Jaccard index in the range [0, 1]</returns>
        /// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
        public double Similarity(string s1, string s2)
        {
            if (s1 == null)
            {
                throw new ArgumentNullException(nameof(s1));
            }

            if (s2 == null)
            {
                throw new ArgumentNullException(nameof(s2));
            }

            if (s1.Equals(s2))
            {
                return(1);
            }

            var profile1 = GetProfile(s1);
            var profile2 = GetProfile(s2);

            //var union = new HashSet<string>();
            var union = new HashCollection <string>();

            union.UnionWith(profile1.Keys);
            union.UnionWith(profile2.Keys);

            int inter = profile1.Keys.Count + profile2.Keys.Count
                        - union.Count;

            return(1.0 * inter / union.Count);
        }
        /// <summary>
        /// Similarity is computed as 2 * |A inter B| / (|A| + |B|).
        /// </summary>
        /// <param name="s1">The first string to compare.</param>
        /// <param name="s2">The second string to compare.</param>
        /// <returns>The computed Sorensen-Dice similarity.</returns>
        /// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
        public double Similarity(string s1, string s2)
        {
            if (s1 == null)
            {
                throw new ArgumentNullException(nameof(s1));
            }

            if (s2 == null)
            {
                throw new ArgumentNullException(nameof(s2));
            }

            if (s1.Equals(s2))
            {
                return(1);
            }

            var profile1 = GetProfile(s1);
            var profile2 = GetProfile(s2);

            //var union = new HashSet<string>();
            var union = new HashCollection <string>();

            union.UnionWith(profile1.Keys);
            union.UnionWith(profile2.Keys);

            int inter = 0;

            foreach (var key in union)
            {
                if (profile1.ContainsKey(key) && profile2.ContainsKey(key))
                {
                    inter++;
                }
            }

            return(2.0 * inter / (profile1.Count + profile2.Count));
        }