コード例 #1
0
        /// <summary>
        /// Compute Levenshtein distance using provided weights for substitution.
        /// </summary>
        /// <param name="s1">The first string to compare.</param>
        /// <param name="s2">The second string to compare.</param>
        /// <returns>The computed weighted Levenshtein distance.</returns>
        /// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
        public double Distance(string s1, string s2)
        {
            if (s1 == null)
            {
                throw new ArgumentNullException(nameof(s1));
            }

            if (s2 == null)
            {
                throw new ArgumentNullException(nameof(s2));
            }

            if (s1.Equals(s2))
            {
                return(0);
            }

            if (s1.Length == 0)
            {
                return(s2.Length);
            }

            if (s2.Length == 0)
            {
                return(s1.Length);
            }

            // create two work vectors of integer distances
            double[] v0 = new double[s2.Length + 1];
            double[] v1 = new double[s2.Length + 1];
            double[] vtemp;

            // initialize v0 (the previous row of distances)
            // this row is A[0][i]: edit distance for an empty s
            // the distance is just the number of characters to delete from t
            for (int i = 0; i < v0.Length; i++)
            {
                v0[i] = i;
            }

            for (int i = 0; i < s1.Length; i++)
            {
                // calculate v1 (current row distances) from the previous row v0
                // first element of v1 is A[i+1][0]
                //   edit distance is delete (i+1) chars from s to match empty t
                v1[0] = i + 1;

                // use formula to fill in the rest of the row
                for (int j = 0; j < s2.Length; j++)
                {
                    double cost          = 0;
                    double insertioncost = 0;
                    double deletioncost  = 0;
                    if (s1[i] != s2[j])
                    {
                        cost          = _characterSubstitution.Cost(s1[i], s2[j]);
                        insertioncost = _characterSubstitution.InsertionCost(s2[j]);
                        deletioncost  = _characterSubstitution.DeletionCost(s1[i]);
                    }

                    v1[j + 1] = Math.Min(
                        v1[j] + insertioncost,        // Cost of insertion
                        Math.Min(
                            v0[j + 1] + deletioncost, // Cost of remove
                            v0[j] + cost));           // Cost of substitution
                }

                // copy v1 (current row) to v0 (previous row) for next iteration
                //System.arraycopy(v1, 0, v0, 0, v0.length);
                // Flip references to current and previous row
                vtemp = v0;
                v0    = v1;
                v1    = vtemp;
            }

            return(v0[s2.Length]);
        }
コード例 #2
0
        /// <summary>
        /// Compute Levenshtein distance using provided weights for substitution.
        /// </summary>
        /// <param name="s1">The first string to compare.</param>
        /// <param name="s2">The second string to compare.</param>
        /// <param name="limit">The maximum result to compute before stopping. This
        /// means that the calculation can terminate early if you
        /// only care about strings with a certain similarity.
        /// Set this to Double.MaxValue if you want to run the
        /// calculation to completion in every case.</param>
        /// <returns>The computed weighted Levenshtein distance.</returns>
        /// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
        public double Distance(string s1, string s2, double limit)
        {
            if (s1 == null)
            {
                throw new ArgumentNullException(nameof(s1));
            }

            if (s2 == null)
            {
                throw new ArgumentNullException(nameof(s2));
            }

            if (s1.Equals(s2))
            {
                return(0);
            }

            if (s1.Length == 0)
            {
                return(s2.Length);
            }

            if (s2.Length == 0)
            {
                return(s1.Length);
            }

            // create two work vectors of floating point (i.e. weighted) distances
            double[] v0 = new double[s2.Length + 1];
            double[] v1 = new double[s2.Length + 1];
            double[] vtemp;

            // initialize v0 (the previous row of distances)
            // this row is A[0][i]: edit distance for an empty s1
            // the distance is the cost of inserting each character of s2
            v0[0] = 0;
            for (int i = 1; i < v0.Length; i++)
            {
                v0[i] = v0[i - 1] + InsertionCost(s2[i - 1]);
            }

            for (int i = 0; i < s1.Length; i++)
            {
                char   s1i          = s1[i];
                double deletionCost = DeletionCost(s1i);

                // calculate v1 (current row distances) from the previous row v0
                // first element of v1 is A[i+1][0]
                // Edit distance is the cost of deleting characters from s1
                // to match empty t.
                v1[0] = v0[0] + deletionCost;

                double minv1 = v1[0];

                // use formula to fill in the rest of the row
                for (int j = 0; j < s2.Length; j++)
                {
                    char   s2j  = s2[j];
                    double cost = 0;

                    if (s1i != s2j)
                    {
                        cost = _characterSubstitution.Cost(s1i, s2j);
                    }

                    double insertionCost = InsertionCost(s2j);

                    v1[j + 1] = Math.Min(
                        v1[j] + insertionCost,        // Cost of insertion
                        Math.Min(
                            v0[j + 1] + deletionCost, // Cost of deletion
                            v0[j] + cost));           // Cost of substitution

                    minv1 = Math.Min(minv1, v1[j + 1]);
                }

                if (minv1 >= limit)
                {
                    return(limit);
                }

                // copy v1 (current row) to v0 (previous row) for next iteration
                // System.arraycopy(v1, 0, v0, 0, v0.length);
                // Flip references to current and previous row
                vtemp = v0;
                v0    = v1;
                v1    = vtemp;
            }

            return(v0[s2.Length]);
        }