/// <summary> /// Calculate how well the given words match the string. /// Uses case insensitive matching. /// Specify custom costs for the operations. /// </summary> public static StrMatch WordMatch(string text, string[] words, int cReplace, int cInsert, int cDelete) { if (text == null || words == null || words.Length == 0) { throw new ArgumentException("Arguments cannot be null or empty"); } string pattern = string.Join("", words); // Find word borders var slack = new HashSet <int>(); var index = 0; foreach (var word in words) { index += word.Length; slack.Add(index); } // Start node tracker var startNode = new int[pattern.Length + 1, text.Length + 1]; for (int y = 0; y < startNode.GetLength(1); y++) { startNode[0, y] = y; } // Calculate custom edit distance var matrix = new int[pattern.Length + 1, text.Length + 1]; for (int x = 0; x < matrix.GetLength(0); x++) { matrix[x, 0] = x * cDelete; } for (int y = 1; y < matrix.GetLength(1); y++) { matrix[0, y] = 0; } for (int y = 1; y <= text.Length; y++) { for (int x = 1; x <= pattern.Length; x++) { bool isMatch = char.ToLower(pattern[x - 1]) == char.ToLower(text[y - 1]); int replace = matrix[x - 1, y - 1] + (isMatch ? 0 : cReplace); int insert = matrix[x, y - 1] + (slack.Contains(x) ? 0 : cInsert); int delete = matrix[x - 1, y] + cDelete; if (replace <= insert && replace <= delete) { matrix[x, y] = replace; startNode[x, y] = startNode[x - 1, y - 1]; } else if (insert <= delete) { matrix[x, y] = insert; startNode[x, y] = startNode[x, y - 1]; } else { matrix[x, y] = delete; startNode[x, y] = startNode[x - 1, y]; } } } // Find best match distance and end position int best = matrix[pattern.Length, 0]; int end = 0; for (int i = 1; i <= text.Length; i++) { int value = matrix[pattern.Length, i]; if (value < best) { best = value; end = i; } } // Match failed if (best == pattern.Length) { return(StrMatch.Fail()); } // Final values int start = startNode[pattern.Length, end]; int matchLength = end - start; int middleInserts = matchLength - pattern.Length; int startInserts = end - pattern.Length - middleInserts; int distance = best * 100 + startInserts + middleInserts; return(new StrMatch(text.Substring(start, matchLength), distance, start, matchLength, text.Length, 1 - best / (double)pattern.Length)); }
/// <summary>Find the best matching substring from a string. Substring version of Edit Distance. /// Specify custom costs for the operations.</summary> public static StrMatch Find(string text, string pattern, int cReplace, int cInsert, int cDelete) { if (text == null || pattern == null) { throw new ArgumentException("Strings can't be null"); } int[,] matrix = new int[pattern.Length + 1, text.Length + 1]; for (int i = 0; i <= pattern.Length; i++) { matrix[i, 0] = i * cDelete; } for (int i = 1; i <= text.Length; i++) { matrix[0, i] = 0; } for (int y = 1; y <= text.Length; y++) { for (int x = 1; x <= pattern.Length; x++) { bool isMatch = pattern[x - 1] == text[y - 1]; var replace = matrix[x - 1, y - 1] + (isMatch ? 0 : cReplace); var insert = matrix[x, y - 1] + cInsert; var delete = matrix[x - 1, y] + cDelete; matrix[x, y] = Math.Min(Math.Min(insert, delete), replace); } } var best = matrix[pattern.Length, 0]; var start = 0; var end = 0; for (int i = 1; i <= text.Length; i++) { var value = matrix[pattern.Length, i]; if (value < best) { best = value; end = i; } } if (best == pattern.Length) { return(StrMatch.Fail()); } int X = pattern.Length; int Y = end; while (X != 0) { var left = matrix[X - 1, Y]; var corner = matrix[X - 1, Y - 1]; var up = matrix[X, Y - 1]; if (left <= corner && left <= up) { X--; } else if (corner <= up) { X--; Y--; } else { Y--; } start = Y; } start -= 1; end -= 1; var length = end - start + 1; return(new StrMatch(text.Substring(start, length), best, start, length, text.Length, 1 - best / (double)pattern.Length)); }