public static bool FuzzyMatch(string pattern, string str_origin, ref long outScore, List <int> matches) { int str_n; int pattern_n; int str_start; string str; using (new ScopedProfiler("[FM] Init")) { outScore = -100000; matches.Clear(); if (string.IsNullOrEmpty(str_origin)) { return(false); } if (string.IsNullOrEmpty(pattern)) { return(true); } str = str_origin.ToLowerInvariant(); pattern_n = pattern.Length; var allowEmptyQuery = false; //filter.HasFilters(); if (allowEmptyQuery && string.IsNullOrEmpty(pattern)) { outScore = 10; return(true); } // find [str_start..str_end) that contains pattern's first and last letter str_start = 0; var str_end = str.Length - 1; var pattern_first_lower = pattern[0]; var pattern_end_lower = pattern[pattern.Length - 1]; for (; str_start < str.Length; ++str_start) { if (pattern_first_lower == str[str_start]) { break; } } for (; str_end >= 0; --str_end) { if (pattern_end_lower == str[str_end]) { break; } } ++str_end; str_n = str_end - str_start; // str subset is shorter than pattern if (str_n < pattern_n) { return(false); } // do check that pattern is fully inside [str_start..str_end) var pattern_i = 0; var str_i = str_start; while (pattern_i < pattern_n && str_i < str_end) { if (pattern[pattern_i] == str[str_i]) { ++pattern_i; } ++str_i; } if (pattern_i < pattern_n) { return(false); } } using (new ScopedProfiler("[FM] Body")) using (var d = FuzzyMatchData.Request(str_n, pattern_n)) { var str_n_minus_pattern_n_plus_1 = str_n - pattern_n + 1; var prev_min_i = 0; using (var _3 = new ScopedProfiler("[FM] Match loop")) for (var j = 0; j < pattern_n; ++j) { var min_i = str_n + 1; var first_match = true; for (int i = Math.Max(j, prev_min_i), end_i = str_n_minus_pattern_n_plus_1 + j; i < end_i; ++i) { // Skip existing <> tags if (str[i] == '<') { for (; i < end_i; ++i) { if (str[i] == '>') { break; } } } var si = i + str_start; var inQuoteExpectEquality = false; //i > 0 && j > 0 && filter.indexesAreInSameQuotes(j, j - 1); var match = false; var cancel = false; if (pattern[j] == str[si]) { if (inQuoteExpectEquality) { if (d.matchData[i - 1, j - 1]) { match = true; } else { cancel = true; } } else { match = true; } } else { if (inQuoteExpectEquality && d.matchData[i - 1, j - 1]) { cancel = true; } } if (cancel) { // cancel quote var j_start = 0; //filter.indexOfQuoteStart(j); var n = j - j_start; for (var k = 1; k <= n; k++) { //Assert.IsTrue(matchData[i - k, j - k]); if (i < k || j < k) { break; } d.matchData[i - k, j - k] = false; } } if (i >= d.matchData.GetLength(0) || j >= d.matchData.GetLength(1)) { return(false); } d.matchData[i, j] = match; if (match) { if (first_match) { min_i = i; first_match = false; } d.matches_indx[j].Add(new ScoreIndx { i = i, score = 1, prev_mi = -1 }); } } if (first_match) { return(false); // no match for pattern[j] } prev_min_i = min_i; } const int sequential_bonus = 75; // bonus for adjacent matches const int separator_bonus = 30; // bonus if match occurs after a separator const int camel_bonus = 30; // bonus if match is uppercase and prev is lower or symbol const int first_letter_bonus = 35; // bonus if the first letter is matched const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match const int max_leading_letter_penalty = -15; // maximum penalty for leading letters const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter int unmatched = str_n - matches.Count; // find best score using (new ScopedProfiler("[FM] Best score 0")) for (var mi = 0; mi < d.matches_indx[0].Count; ++mi) { var i = d.matches_indx[0][mi].i; var si = str_start + i; var s = 100 + unmatched_letter_penalty * unmatched; var penalty = leading_letter_penalty * si; if (penalty < max_leading_letter_penalty) { penalty = max_leading_letter_penalty; } s += penalty; if (si == 0) { s += first_letter_bonus; } else { var currOrigI = str_origin[si]; var prevOrigI = str_origin[si - 1]; if (char.IsUpper(currOrigI) && char.IsUpper(prevOrigI) == false) { s += camel_bonus; } else if (prevOrigI == '_' || prevOrigI == ' ') { s += separator_bonus; } } d.matches_indx[0][mi] = new ScoreIndx { i = i, score = s, prev_mi = -1 }; } using (new ScopedProfiler("[FM] Best score 1..pattern_n")) for (var j = 1; j < pattern_n; ++j) { for (var mi = 0; mi < d.matches_indx[j].Count; ++mi) { var match = d.matches_indx[j][mi]; var si = str_start + d.matches_indx[j][mi].i; var currOrigI = str_origin[si]; var prevOrigI = str_origin[si - 1]; if (char.IsUpper(currOrigI) && char.IsUpper(prevOrigI) == false) { match.score += camel_bonus; } else if (prevOrigI == '_' || prevOrigI == ' ') { match.score += separator_bonus; } // select from prev var best_pmi = 0; var best_score = -1; for (var pmi = 0; pmi < d.matches_indx[j - 1].Count; ++pmi) { var prev_i = d.matches_indx[j - 1][pmi].i; if (prev_i >= match.i) { break; } var pmi_score = d.matches_indx[j - 1][pmi].score; if (prev_i == match.i - 1) { pmi_score += sequential_bonus; } if (best_score < pmi_score) { best_score = pmi_score; best_pmi = pmi; } } match.score += best_score; match.prev_mi = best_pmi; d.matches_indx[j][mi] = match; } } var best_mi = 0; var max_j = pattern_n - 1; for (var mi = 1; mi < d.matches_indx[max_j].Count; ++mi) { if (d.matches_indx[max_j][best_mi].score < d.matches_indx[max_j][mi].score) { best_mi = mi; } } var bestScore = d.matches_indx[max_j][best_mi]; // DebugPrint(pattern, str_origin, str_start, str_n, (i, j) => // { // var arr = d.matches_indx[j]; // for (int k = 0; k < arr.Count; ++k) // { // if (arr[k].i == i) // return $"{arr[k].i + str_start} {arr[k].score} {arr[k].prev_mi}"; // } // // return "."; // }); outScore = bestScore.score; using (new ScopedProfiler("[FM] Matches calc")) { matches.Capacity = pattern_n; matches.Add(bestScore.i + str_start); { var mi = bestScore.prev_mi; for (var j = pattern_n - 2; j >= 0; --j) { matches.Add(d.matches_indx[j][mi].i + str_start); mi = d.matches_indx[j][mi].prev_mi; } } matches.Reverse(); } /*var prev_si = -1; * foreach (var si in matches) * { * Assert.IsTrue(si > prev_si, $"find <{pattern}> in <{str_origin}>. {si} > {prev_si}"); * * Assert.IsTrue(si >= 0); * Assert.IsTrue(si < str_origin.Length); * * prev_si = si; * }*/ return(true); } }
/// <summary> /// Performs a fuzzy search on a string to see if it matches a pattern. /// </summary> /// <param name="pattern">Pattern that we try to match the source string</param> /// <param name="origin">String we are looking into for a match</param> /// <param name="outScore">Score of the match. A higher score means the pattern is a better match for the string.</param> /// <param name="matches">List of indices in the source string where a match was found.</param> /// <returns>Returns true if a match was found</returns> public static bool FuzzyMatch(string pattern, string origin, ref long outScore, List <int> matches = null) { int str_n; int pattern_n; int str_start; string str; using (new ScopedProfiler("[FM] Init")) { outScore = -100000; matches?.Clear(); if (string.IsNullOrEmpty(origin)) { return(false); } if (string.IsNullOrEmpty(pattern)) { return(true); } str = origin.ToLowerInvariant(); pattern_n = pattern.Length; // find [str_start..str_end) that contains pattern's first and last letter str_start = 0; var str_end = str.Length - 1; var pattern_first_lower = pattern[0]; var pattern_end_lower = pattern[pattern.Length - 1]; for (; str_start < str.Length; ++str_start) { if (pattern_first_lower == str[str_start]) { break; } } for (; str_end >= 0; --str_end) { if (pattern_end_lower == str[str_end]) { break; } } ++str_end; str_n = str_end - str_start; // str subset is shorter than pattern if (str_n < pattern_n) { return(false); } // do check that pattern is fully inside [str_start..str_end) var pattern_i = 0; var str_i = str_start; while (pattern_i < pattern_n && str_i < str_end) { if (pattern[pattern_i] == str[str_i]) { ++pattern_i; } ++str_i; } if (pattern_i < pattern_n) { return(false); } } using (new ScopedProfiler("[FM] Body")) using (var d = FuzzyMatchData.Request(str_n, pattern_n)) { var str_n_minus_pattern_n_plus_1 = str_n - pattern_n + 1; var prev_min_i = 0; using (var _3 = new ScopedProfiler("[FM] Match loop")) for (var j = 0; j < pattern_n; ++j) { var min_i = str_n + 1; var first_match = true; for (int i = Math.Max(j, prev_min_i), end_i = str_n_minus_pattern_n_plus_1 + j; i < end_i; ++i) { // Skip existing <> tags if (str[i] == '<') { for (; i < end_i; ++i) { if (str[i] == '>') { break; } } } var si = i + str_start; var match = false; if (pattern[j] == str[si]) { match = true; } if (i >= d.matchData.GetLength(0) || j >= d.matchData.GetLength(1)) { return(false); } d.matchData[i, j] = match; if (match) { if (first_match) { min_i = i; first_match = false; } d.matches_indx[j].Add(new ScoreIndx { i = i, score = 1, prev_mi = -1 }); } } if (first_match) { return(false); // no match for pattern[j] } prev_min_i = min_i; } const int sequential_bonus = 75; // bonus for adjacent matches const int separator_bonus = 30; // bonus if match occurs after a separator const int camel_bonus = 30; // bonus if match is uppercase and prev is lower or symbol const int first_letter_bonus = 35; // bonus if the first letter is matched const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match const int max_leading_letter_penalty = -15; // maximum penalty for leading letters const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter int unmatched = str_n - (matches?.Count ?? 0); // find best score using (new ScopedProfiler("[FM] Best score 0")) for (var mi = 0; mi < d.matches_indx[0].Count; ++mi) { var i = d.matches_indx[0][mi].i; var si = str_start + i; var s = 100 + unmatched_letter_penalty * unmatched; var penalty = leading_letter_penalty * si; if (penalty < max_leading_letter_penalty) { penalty = max_leading_letter_penalty; } s += penalty; if (si == 0) { s += first_letter_bonus; } else { var currOrigI = origin[si]; var prevOrigI = origin[si - 1]; if (char.IsUpper(currOrigI) && char.IsUpper(prevOrigI) == false) { s += camel_bonus; } else if (prevOrigI == '_' || prevOrigI == ' ') { s += separator_bonus; } } d.matches_indx[0][mi] = new ScoreIndx { i = i, score = s, prev_mi = -1 }; } using (new ScopedProfiler("[FM] Best score 1..pattern_n")) for (var j = 1; j < pattern_n; ++j) { for (var mi = 0; mi < d.matches_indx[j].Count; ++mi) { var match = d.matches_indx[j][mi]; var si = str_start + d.matches_indx[j][mi].i; var currOrigI = origin[si]; var prevOrigI = origin[si - 1]; if (char.IsUpper(currOrigI) && char.IsUpper(prevOrigI) == false) { match.score += camel_bonus; } else if (prevOrigI == '_' || prevOrigI == ' ') { match.score += separator_bonus; } // select from prev var best_pmi = 0; var best_score = -1; for (var pmi = 0; pmi < d.matches_indx[j - 1].Count; ++pmi) { var prev_i = d.matches_indx[j - 1][pmi].i; if (prev_i >= match.i) { break; } var pmi_score = d.matches_indx[j - 1][pmi].score; if (prev_i == match.i - 1) { pmi_score += sequential_bonus; } if (best_score < pmi_score) { best_score = pmi_score; best_pmi = pmi; } } match.score += best_score; match.prev_mi = best_pmi; d.matches_indx[j][mi] = match; } } var best_mi = 0; var max_j = pattern_n - 1; for (var mi = 1; mi < d.matches_indx[max_j].Count; ++mi) { if (d.matches_indx[max_j][best_mi].score < d.matches_indx[max_j][mi].score) { best_mi = mi; } } var bestScore = d.matches_indx[max_j][best_mi]; outScore = bestScore.score; if (matches != null) { using (new ScopedProfiler("[FM] Matches calc")) { matches.Capacity = pattern_n; matches.Add(bestScore.i + str_start); { var mi = bestScore.prev_mi; for (var j = pattern_n - 2; j >= 0; --j) { matches.Add(d.matches_indx[j][mi].i + str_start); mi = d.matches_indx[j][mi].prev_mi; } } matches.Reverse(); } } return(true); } }