public MatchResult FuzzyMatch(string query, string stringToCompare) { query = query.Trim(); if (_alphabet != null) { stringToCompare = _alphabet.Translate(stringToCompare); } if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) { return(new MatchResult(false, UserSettingSearchPrecision)); } var fullStringToCompareWithoutCase = stringToCompare.ToLower(); var queryWithoutCase = query.ToLower(); string key = $"{queryWithoutCase}|{fullStringToCompareWithoutCase}"; MatchResult match = _cache[key] as MatchResult; if (match == null) { match = FuzzyMatchInternal(queryWithoutCase, fullStringToCompareWithoutCase); CacheItemPolicy policy = new CacheItemPolicy(); policy.SlidingExpiration = new TimeSpan(12, 0, 0); _cache.Set(key, match, policy); } return(match); }
/// <summary> /// Current method: /// Character matching + substring matching; /// 1. Query search string is split into substrings, separator is whitespace. /// 2. Check each query substring's characters against full compare string, /// 3. if a character in the substring is matched, loop back to verify the previous character. /// 4. If previous character also matches, and is the start of the substring, update list. /// 5. Once the previous character is verified, move on to the next character in the query substring. /// 6. Move onto the next substring's characters until all substrings are checked. /// 7. Consider success and move onto scoring if every char or substring without whitespaces matched /// </summary> public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt) { if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) { return(new MatchResult(false, UserSettingSearchPrecision)); } query = query.Trim(); if (_alphabet != null) { query = _alphabet.Translate(query); stringToCompare = _alphabet.Translate(stringToCompare); } var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare; var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query; var querySubstrings = queryWithoutCase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); int currentQuerySubstringIndex = 0; var currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; var currentQuerySubstringCharacterIndex = 0; var firstMatchIndex = -1; var firstMatchIndexInWord = -1; var lastMatchIndex = 0; bool allQuerySubstringsMatched = false; bool matchFoundInPreviousLoop = false; bool allSubstringsContainedInCompareString = true; var indexList = new List <int>(); List <int> spaceIndices = new List <int>(); for (var compareStringIndex = 0; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++) { // To maintain a list of indices which correspond to spaces in the string to compare // To populate the list only for the first query substring if (fullStringToCompareWithoutCase[compareStringIndex].Equals(' ') && currentQuerySubstringIndex == 0) { spaceIndices.Add(compareStringIndex); } if (fullStringToCompareWithoutCase[compareStringIndex] != currentQuerySubstring[currentQuerySubstringCharacterIndex]) { matchFoundInPreviousLoop = false; continue; } if (firstMatchIndex < 0) { // first matched char will become the start of the compared string firstMatchIndex = compareStringIndex; } if (currentQuerySubstringCharacterIndex == 0) { // first letter of current word matchFoundInPreviousLoop = true; firstMatchIndexInWord = compareStringIndex; } else if (!matchFoundInPreviousLoop) { // we want to verify that there is not a better match if this is not a full word // in order to do so we need to verify all previous chars are part of the pattern var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex; if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex, fullStringToCompareWithoutCase, currentQuerySubstring)) { matchFoundInPreviousLoop = true; // if it's the beginning character of the first query substring that is matched then we need to update start index firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex; indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex, firstMatchIndexInWord, indexList); } } lastMatchIndex = compareStringIndex + 1; indexList.Add(compareStringIndex); currentQuerySubstringCharacterIndex++; // if finished looping through every character in the current substring if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length) { // if any of the substrings was not matched then consider as all are not matched allSubstringsContainedInCompareString = matchFoundInPreviousLoop && allSubstringsContainedInCompareString; currentQuerySubstringIndex++; allQuerySubstringsMatched = AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length); if (allQuerySubstringsMatched) { break; } // otherwise move to the next query substring currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; currentQuerySubstringCharacterIndex = 0; } } // proceed to calculate score if every char or substring without whitespaces matched if (allQuerySubstringsMatched) { var nearestSpaceIndex = CalculateClosestSpaceIndex(spaceIndices, firstMatchIndex); var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex - nearestSpaceIndex - 1, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString); return(new MatchResult(true, UserSettingSearchPrecision, indexList, score)); } return(new MatchResult(false, UserSettingSearchPrecision)); }
/// <summary> /// Current method has two parts, Acronym Match and Fuzzy Search: /// /// Acronym Match: /// Charater listed below will be considered as acronym /// 1. Character on index 0 /// 2. Character appears after a space /// 3. Character that is UpperCase /// 4. Character that is number /// /// Acronym Match will succeed when all query characters match with acronyms in stringToCompare. /// If any of the characters in the query isn't matched with stringToCompare, Acronym Match will fail. /// Score will be calculated based the percentage of all query characters matched with total acronyms in stringToCompare. /// /// Fuzzy Search: /// Character matching + substring matching; /// 1. Query search string is split into substrings, separator is whitespace. /// 2. Check each query substring's characters against full compare string, /// 3. if a character in the substring is matched, loop back to verify the previous character. /// 4. If previous character also matches, and is the start of the substring, update list. /// 5. Once the previous character is verified, move on to the next character in the query substring. /// 6. Move onto the next substring's characters until all substrings are checked. /// 7. Consider success and move onto scoring if every char or substring without whitespaces matched /// </summary> public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt) { if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) { return(new MatchResult(false, UserSettingSearchPrecision)); } query = query.Trim(); TranslationMapping translationMapping; (stringToCompare, translationMapping) = _alphabet?.Translate(stringToCompare) ?? (stringToCompare, null); var currentAcronymQueryIndex = 0; var acronymMatchData = new List <int>(); int acronymsTotalCount = 0; int acronymsMatched = 0; var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare; var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query; var querySubstrings = queryWithoutCase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); int currentQuerySubstringIndex = 0; var currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; var currentQuerySubstringCharacterIndex = 0; var firstMatchIndex = -1; var firstMatchIndexInWord = -1; var lastMatchIndex = 0; bool allQuerySubstringsMatched = false; bool matchFoundInPreviousLoop = false; bool allSubstringsContainedInCompareString = true; var indexList = new List <int>(); List <int> spaceIndices = new List <int>(); for (var compareStringIndex = 0; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++) { // If acronyms matching successfully finished, this gets the remaining not matched acronyms for score calculation if (currentAcronymQueryIndex >= query.Length && acronymsMatched == query.Length) { if (IsAcronymCount(stringToCompare, compareStringIndex)) { acronymsTotalCount++; } continue; } if (currentAcronymQueryIndex >= query.Length || currentAcronymQueryIndex >= query.Length && allQuerySubstringsMatched) { break; } // To maintain a list of indices which correspond to spaces in the string to compare // To populate the list only for the first query substring if (fullStringToCompareWithoutCase[compareStringIndex] == ' ' && currentQuerySubstringIndex == 0) { spaceIndices.Add(compareStringIndex); } // Acronym Match if (IsAcronym(stringToCompare, compareStringIndex)) { if (fullStringToCompareWithoutCase[compareStringIndex] == queryWithoutCase[currentAcronymQueryIndex]) { acronymMatchData.Add(compareStringIndex); acronymsMatched++; currentAcronymQueryIndex++; } } if (IsAcronymCount(stringToCompare, compareStringIndex)) { acronymsTotalCount++; } if (allQuerySubstringsMatched || fullStringToCompareWithoutCase[compareStringIndex] != currentQuerySubstring[currentQuerySubstringCharacterIndex]) { matchFoundInPreviousLoop = false; continue; } if (firstMatchIndex < 0) { // first matched char will become the start of the compared string firstMatchIndex = compareStringIndex; } if (currentQuerySubstringCharacterIndex == 0) { // first letter of current word matchFoundInPreviousLoop = true; firstMatchIndexInWord = compareStringIndex; } else if (!matchFoundInPreviousLoop) { // we want to verify that there is not a better match if this is not a full word // in order to do so we need to verify all previous chars are part of the pattern var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex; if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex, fullStringToCompareWithoutCase, currentQuerySubstring)) { matchFoundInPreviousLoop = true; // if it's the beginning character of the first query substring that is matched then we need to update start index firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex; indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex, firstMatchIndexInWord, indexList); } } lastMatchIndex = compareStringIndex + 1; indexList.Add(compareStringIndex); currentQuerySubstringCharacterIndex++; // if finished looping through every character in the current substring if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length) { // if any of the substrings was not matched then consider as all are not matched allSubstringsContainedInCompareString = matchFoundInPreviousLoop && allSubstringsContainedInCompareString; currentQuerySubstringIndex++; allQuerySubstringsMatched = AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length); if (allQuerySubstringsMatched) { continue; } // otherwise move to the next query substring currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; currentQuerySubstringCharacterIndex = 0; } } // return acronym match if all query char matched if (acronymsMatched > 0 && acronymsMatched == query.Length) { int acronymScore = acronymsMatched * 100 / acronymsTotalCount; if (acronymScore >= (int)UserSettingSearchPrecision) { acronymMatchData = acronymMatchData.Select(x => translationMapping?.MapToOriginalIndex(x) ?? x).Distinct().ToList(); return(new MatchResult(true, UserSettingSearchPrecision, acronymMatchData, acronymScore)); } } // proceed to calculate score if every char or substring without whitespaces matched if (allQuerySubstringsMatched) { var nearestSpaceIndex = CalculateClosestSpaceIndex(spaceIndices, firstMatchIndex); var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex - nearestSpaceIndex - 1, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString); var resultList = indexList.Select(x => translationMapping?.MapToOriginalIndex(x) ?? x).Distinct().ToList(); return(new MatchResult(true, UserSettingSearchPrecision, resultList, score)); } return(new MatchResult(false, UserSettingSearchPrecision)); }