/// <summary> /// Compute Levenshtein distance using provided weights for substitution. /// </summary> /// <param name="s1">The first string to compare.</param> /// <param name="s2">The second string to compare.</param> /// <returns>The computed weighted Levenshtein distance.</returns> /// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception> public double Distance(string s1, string s2) { if (s1 == null) { throw new ArgumentNullException(nameof(s1)); } if (s2 == null) { throw new ArgumentNullException(nameof(s2)); } if (s1.Equals(s2)) { return(0); } if (s1.Length == 0) { return(s2.Length); } if (s2.Length == 0) { return(s1.Length); } // create two work vectors of integer distances double[] v0 = new double[s2.Length + 1]; double[] v1 = new double[s2.Length + 1]; double[] vtemp; // initialize v0 (the previous row of distances) // this row is A[0][i]: edit distance for an empty s // the distance is just the number of characters to delete from t for (int i = 0; i < v0.Length; i++) { v0[i] = i; } for (int i = 0; i < s1.Length; i++) { // calculate v1 (current row distances) from the previous row v0 // first element of v1 is A[i+1][0] // edit distance is delete (i+1) chars from s to match empty t v1[0] = i + 1; // use formula to fill in the rest of the row for (int j = 0; j < s2.Length; j++) { double cost = 0; double insertioncost = 0; double deletioncost = 0; if (s1[i] != s2[j]) { cost = _characterSubstitution.Cost(s1[i], s2[j]); insertioncost = _characterSubstitution.InsertionCost(s2[j]); deletioncost = _characterSubstitution.DeletionCost(s1[i]); } v1[j + 1] = Math.Min( v1[j] + insertioncost, // Cost of insertion Math.Min( v0[j + 1] + deletioncost, // Cost of remove v0[j] + cost)); // Cost of substitution } // copy v1 (current row) to v0 (previous row) for next iteration //System.arraycopy(v1, 0, v0, 0, v0.length); // Flip references to current and previous row vtemp = v0; v0 = v1; v1 = vtemp; } return(v0[s2.Length]); }
/// <summary> /// Compute Levenshtein distance using provided weights for substitution. /// </summary> /// <param name="s1">The first string to compare.</param> /// <param name="s2">The second string to compare.</param> /// <param name="limit">The maximum result to compute before stopping. This /// means that the calculation can terminate early if you /// only care about strings with a certain similarity. /// Set this to Double.MaxValue if you want to run the /// calculation to completion in every case.</param> /// <returns>The computed weighted Levenshtein distance.</returns> /// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception> public double Distance(string s1, string s2, double limit) { if (s1 == null) { throw new ArgumentNullException(nameof(s1)); } if (s2 == null) { throw new ArgumentNullException(nameof(s2)); } if (s1.Equals(s2)) { return(0); } if (s1.Length == 0) { return(s2.Length); } if (s2.Length == 0) { return(s1.Length); } // create two work vectors of floating point (i.e. weighted) distances double[] v0 = new double[s2.Length + 1]; double[] v1 = new double[s2.Length + 1]; double[] vtemp; // initialize v0 (the previous row of distances) // this row is A[0][i]: edit distance for an empty s1 // the distance is the cost of inserting each character of s2 v0[0] = 0; for (int i = 1; i < v0.Length; i++) { v0[i] = v0[i - 1] + InsertionCost(s2[i - 1]); } for (int i = 0; i < s1.Length; i++) { char s1i = s1[i]; double deletionCost = DeletionCost(s1i); // calculate v1 (current row distances) from the previous row v0 // first element of v1 is A[i+1][0] // Edit distance is the cost of deleting characters from s1 // to match empty t. v1[0] = v0[0] + deletionCost; double minv1 = v1[0]; // use formula to fill in the rest of the row for (int j = 0; j < s2.Length; j++) { char s2j = s2[j]; double cost = 0; if (s1i != s2j) { cost = _characterSubstitution.Cost(s1i, s2j); } double insertionCost = InsertionCost(s2j); v1[j + 1] = Math.Min( v1[j] + insertionCost, // Cost of insertion Math.Min( v0[j + 1] + deletionCost, // Cost of deletion v0[j] + cost)); // Cost of substitution minv1 = Math.Min(minv1, v1[j + 1]); } if (minv1 >= limit) { return(limit); } // copy v1 (current row) to v0 (previous row) for next iteration // System.arraycopy(v1, 0, v0, 0, v0.length); // Flip references to current and previous row vtemp = v0; v0 = v1; v1 = vtemp; } return(v0[s2.Length]); }