public static MatchResult Match(string source, string target, bool pinyin = false) { if (string.IsNullOrEmpty(source) || string.IsNullOrEmpty(target)) return new MatchResult {Score = 0}; var matcher = FuzzyMatcher.Create(target); var result = matcher.Evaluate(source); if (pinyin) { // does pinyin score better? var pinyinScore = ScoreForPinyin(source, target); if (pinyinScore > result.Score) result = new MatchResult() {Score = pinyinScore}; } return result; }
public MatchResult FuzzyMatch(string query, string stringToCompare) { query = query.Trim(); if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) { return(new MatchResult(false, UserSettingSearchPrecision)); } var queryWithoutCase = query.ToLower(); string translated = _alphabet.Translate(stringToCompare); var fullStringToCompareWithoutCase = translated.ToLower(); string key = $"{queryWithoutCase}|{fullStringToCompareWithoutCase}"; MatchResult match = _cache[key] as MatchResult; if (match == null) { match = FuzzyMatchInternal(queryWithoutCase, fullStringToCompareWithoutCase); CacheItemPolicy policy = new CacheItemPolicy(); policy.SlidingExpiration = new TimeSpan(12, 0, 0); _cache.Set(key, match, policy); } return(match); }
/// <summary> /// Current method: /// Character matching + substring matching; /// 1. Query search string is split into substrings, separator is whitespace. /// 2. Check each query substring's characters against full compare string, /// 3. if a character in the substring is matched, loop back to verify the previous character. /// 4. If previous character also matches, and is the start of the substring, update list. /// 5. Once the previous character is verified, move on to the next character in the query substring. /// 6. Move onto the next substring's characters until all substrings are checked. /// 7. Consider success and move onto scoring if every char or substring without whitespaces matched /// </summary> public static MatchResult FuzzySearch(string query, string stringToCompare, MatchOption opt) { if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) { return new MatchResult { Success = false } } ; query = query.Trim(); var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare; var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query; var querySubstrings = queryWithoutCase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); int currentQuerySubstringIndex = 0; var currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; var currentQuerySubstringCharacterIndex = 0; var firstMatchIndex = -1; var firstMatchIndexInWord = -1; var lastMatchIndex = 0; bool allQuerySubstringsMatched = false; bool matchFoundInPreviousLoop = false; bool allSubstringsContainedInCompareString = true; var indexList = new List <int>(); for (var compareStringIndex = 0; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++) { if (fullStringToCompareWithoutCase[compareStringIndex] != currentQuerySubstring[currentQuerySubstringCharacterIndex]) { matchFoundInPreviousLoop = false; continue; } if (firstMatchIndex < 0) { // first matched char will become the start of the compared string firstMatchIndex = compareStringIndex; } if (currentQuerySubstringCharacterIndex == 0) { // first letter of current word matchFoundInPreviousLoop = true; firstMatchIndexInWord = compareStringIndex; } else if (!matchFoundInPreviousLoop) { // we want to verify that there is not a better match if this is not a full word // in order to do so we need to verify all previous chars are part of the pattern var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex; if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex, fullStringToCompareWithoutCase, currentQuerySubstring)) { matchFoundInPreviousLoop = true; // if it's the beginning character of the first query substring that is matched then we need to update start index firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex; indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex, firstMatchIndexInWord, indexList); } } lastMatchIndex = compareStringIndex + 1; indexList.Add(compareStringIndex); currentQuerySubstringCharacterIndex++; // if finished looping through every character in the current substring if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length) { // if any of the substrings was not matched then consider as all are not matched allSubstringsContainedInCompareString = matchFoundInPreviousLoop && allSubstringsContainedInCompareString; currentQuerySubstringIndex++; allQuerySubstringsMatched = AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length); if (allQuerySubstringsMatched) { break; } // otherwise move to the next query substring currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; currentQuerySubstringCharacterIndex = 0; } } // proceed to calculate score if every char or substring without whitespaces matched if (allQuerySubstringsMatched) { var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString); var pinyinScore = ScoreForPinyin(stringToCompare, query); var result = new MatchResult { Success = true, MatchData = indexList, RawScore = Math.Max(score, pinyinScore) }; return(result); } return(new MatchResult { Success = false }); }
public MatchResult FuzzyMatchRecurrsive( string query, string stringToCompare, int queryCurrentIndex, int stringCurrentIndex, List <int> sourceMatchData ) { if (queryCurrentIndex == query.Length || stringCurrentIndex == stringToCompare.Length) { return(new MatchResult(false, UserSettingSearchPrecision)); } bool recursiveMatch = false; List <int> bestRecursiveMatchData = new List <int>(); int bestRecursiveScore = 0; List <int> matchs = new List <int>(); if (sourceMatchData.Count > 0) { foreach (var data in sourceMatchData) { matchs.Add(data); } } while (queryCurrentIndex < query.Length && stringCurrentIndex < stringToCompare.Length) { char queryLower = char.ToLower(query[queryCurrentIndex]); char stringToCompareLower = char.ToLower(stringToCompare[stringCurrentIndex]); if (queryLower == stringToCompareLower) { MatchResult match = FuzzyMatchRecurrsive( query, stringToCompare, queryCurrentIndex, stringCurrentIndex + 1, matchs ); if (match.Success) { if (!recursiveMatch || match.RawScore > bestRecursiveScore) { bestRecursiveMatchData = new List <int>(); foreach (int data in match.MatchData) { bestRecursiveMatchData.Add(data); } bestRecursiveScore = match.Score; } recursiveMatch = true; } matchs.Add(stringCurrentIndex); queryCurrentIndex += 1; } stringCurrentIndex += 1; } bool matched = queryCurrentIndex == query.Length; int outScore; if (matched) { outScore = 100; int penality = 3 * matchs[0]; outScore = outScore - penality; int unmatched = stringToCompare.Length - matchs.Count; outScore = outScore - (5 * unmatched); int consecutiveMatch = 0; for (int i = 0; i < matchs.Count; i++) { int indexCurent = matchs[i]; if (i > 0) { int indexPrevious = matchs[i - 1]; if (indexCurent == indexPrevious + 1) { consecutiveMatch += 1; outScore += 10 * consecutiveMatch; } else { consecutiveMatch = 0; } } char current = stringToCompare[indexCurent]; bool currentUpper = char.IsUpper(current); if (indexCurent > 0) { char neighbor = stringToCompare[indexCurent - 1]; if (currentUpper && char.IsLower(neighbor)) { outScore += 30; } bool isNeighbourSeparator = neighbor == '_' || neighbor == ' '; if (isNeighbourSeparator) { outScore += 50; if (currentUpper) { outScore += 50; } } } else { outScore += 50; if (currentUpper) { outScore += 50; } } } } else { outScore = 0; } if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) { matchs = new List <int>(); foreach (int data in bestRecursiveMatchData) { matchs.Add(data); } outScore = bestRecursiveScore; return(new MatchResult(true, UserSettingSearchPrecision, matchs, outScore)); } else if (matched) { return(new MatchResult(true, UserSettingSearchPrecision, matchs, outScore)); } else { return(new MatchResult(false, UserSettingSearchPrecision)); } }
/// <summary> /// refer to https://github.com/mattyork/fuzzy /// </summary> public static MatchResult FuzzySearch(string query, string stringToCompare, MatchOption opt) { if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) { return new MatchResult { Success = false } } ; query = query.Trim(); var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare; var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query; int currentQueryToCompareIndex = 0; var queryToCompareSeparated = queryWithoutCase.Split(' '); var currentQueryToCompare = queryToCompareSeparated[currentQueryToCompareIndex]; var patternIndex = 0; var firstMatchIndex = -1; var firstMatchIndexInWord = -1; var lastMatchIndex = 0; bool allMatched = false; bool isFullWordMatched = false; bool allWordsFullyMatched = true; var indexList = new List <int>(); for (var index = 0; index < fullStringToCompareWithoutCase.Length; index++) { var ch = stringToCompare[index]; if (fullStringToCompareWithoutCase[index] == currentQueryToCompare[patternIndex]) { if (firstMatchIndex < 0) { // first matched char will become the start of the compared string firstMatchIndex = index; } if (patternIndex == 0) { // first letter of current word isFullWordMatched = true; firstMatchIndexInWord = index; } else if (!isFullWordMatched) { // we want to verify that there is not a better match if this is not a full word // in order to do so we need to verify all previous chars are part of the pattern int startIndexToVerify = index - patternIndex; bool allMatch = true; for (int indexToCheck = 0; indexToCheck < patternIndex; indexToCheck++) { if (fullStringToCompareWithoutCase[startIndexToVerify + indexToCheck] != currentQueryToCompare[indexToCheck]) { allMatch = false; } } if (allMatch) { // update to this as a full word isFullWordMatched = true; if (currentQueryToCompareIndex == 0) { // first word so we need to update start index firstMatchIndex = startIndexToVerify; } indexList.RemoveAll(x => x >= firstMatchIndexInWord); for (int indexToCheck = 0; indexToCheck < patternIndex; indexToCheck++) { // update the index list indexList.Add(startIndexToVerify + indexToCheck); } } } lastMatchIndex = index + 1; indexList.Add(index); // increase the pattern matched index and check if everything was matched if (++patternIndex == currentQueryToCompare.Length) { if (++currentQueryToCompareIndex >= queryToCompareSeparated.Length) { // moved over all the words allMatched = true; break; } // otherwise move to the next word currentQueryToCompare = queryToCompareSeparated[currentQueryToCompareIndex]; patternIndex = 0; if (!isFullWordMatched) { // if any of the words was not fully matched all are not fully matched allWordsFullyMatched = false; } } } else { isFullWordMatched = false; } } // return rendered string if we have a match for every char or all substring without whitespaces matched if (allMatched) { // check if all query string was contained in string to compare bool containedFully = lastMatchIndex - firstMatchIndex == queryWithoutCase.Length; var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex, lastMatchIndex - firstMatchIndex, containedFully, allWordsFullyMatched); var pinyinScore = ScoreForPinyin(stringToCompare, query); var result = new MatchResult { Success = true, MatchData = indexList, RawScore = Math.Max(score, pinyinScore) }; return(result); } return(new MatchResult { Success = false }); }
public static int ScoreAfterSearchPrecisionFilter(this MatchResult matchResult) { return(matchResult.IsSearchPrecisionScoreMet() ? matchResult.Score : 0); }