public static FuzzyMatcher Create(string query, MatchOption opt) { if (query == null) { throw new ArgumentNullException(nameof(query)); } return(new FuzzyMatcher(query, opt)); }
public static FuzzyMatcher Create(string query, MatchOption opt) { return new FuzzyMatcher(query, opt); }
private FuzzyMatcher(string query, MatchOption opt) { this.query = query.Trim(); this.opt = opt; }
/// <summary> /// Current method: /// Character matching + substring matching; /// 1. Query search string is split into substrings, separator is whitespace. /// 2. Check each query substring's characters against full compare string, /// 3. if a character in the substring is matched, loop back to verify the previous character. /// 4. If previous character also matches, and is the start of the substring, update list. /// 5. Once the previous character is verified, move on to the next character in the query substring. /// 6. Move onto the next substring's characters until all substrings are checked. /// 7. Consider success and move onto scoring if every char or substring without whitespaces matched /// </summary> public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt) { if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) { return(new MatchResult(false, UserSettingSearchPrecision)); } if (opt == null) { throw new ArgumentNullException(nameof(opt)); } query = query.Trim(); // Using InvariantCulture since this is internal var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToUpper(CultureInfo.InvariantCulture) : stringToCompare; var queryWithoutCase = opt.IgnoreCase ? query.ToUpper(CultureInfo.InvariantCulture) : query; var querySubstrings = queryWithoutCase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); int currentQuerySubstringIndex = 0; var currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; var currentQuerySubstringCharacterIndex = 0; var firstMatchIndex = -1; var firstMatchIndexInWord = -1; var lastMatchIndex = 0; bool allQuerySubstringsMatched = false; bool matchFoundInPreviousLoop = false; bool allSubstringsContainedInCompareString = true; var indexList = new List <int>(); List <int> spaceIndices = new List <int>(); for (var compareStringIndex = 0; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++) { // To maintain a list of indices which correspond to spaces in the string to compare // To populate the list only for the first query substring if (fullStringToCompareWithoutCase[compareStringIndex].Equals(' ') && currentQuerySubstringIndex == 0) { spaceIndices.Add(compareStringIndex); } bool compareResult; if (opt.IgnoreCase) { var fullStringToCompare = fullStringToCompareWithoutCase[compareStringIndex].ToString(); var querySubstring = currentQuerySubstring[currentQuerySubstringCharacterIndex].ToString(); compareResult = string.Compare(fullStringToCompare, querySubstring, CultureInfo.CurrentCulture, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace) != 0; } else { compareResult = fullStringToCompareWithoutCase[compareStringIndex] != currentQuerySubstring[currentQuerySubstringCharacterIndex]; } if (compareResult) { matchFoundInPreviousLoop = false; continue; } if (firstMatchIndex < 0) { // first matched char will become the start of the compared string firstMatchIndex = compareStringIndex; } if (currentQuerySubstringCharacterIndex == 0) { // first letter of current word matchFoundInPreviousLoop = true; firstMatchIndexInWord = compareStringIndex; } else if (!matchFoundInPreviousLoop) { // we want to verify that there is not a better match if this is not a full word // in order to do so we need to verify all previous chars are part of the pattern var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex; if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex, fullStringToCompareWithoutCase, currentQuerySubstring)) { matchFoundInPreviousLoop = true; // if it's the beginning character of the first query substring that is matched then we need to update start index firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex; indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex, firstMatchIndexInWord, indexList); } } lastMatchIndex = compareStringIndex + 1; indexList.Add(compareStringIndex); currentQuerySubstringCharacterIndex++; // if finished looping through every character in the current substring if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length) { // if any of the substrings was not matched then consider as all are not matched allSubstringsContainedInCompareString = matchFoundInPreviousLoop && allSubstringsContainedInCompareString; currentQuerySubstringIndex++; allQuerySubstringsMatched = AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length); if (allQuerySubstringsMatched) { break; } // otherwise move to the next query substring currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; currentQuerySubstringCharacterIndex = 0; } } // proceed to calculate score if every char or substring without whitespaces matched if (allQuerySubstringsMatched) { var nearestSpaceIndex = CalculateClosestSpaceIndex(spaceIndices, firstMatchIndex); var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex - nearestSpaceIndex - 1, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString); return(new MatchResult(true, UserSettingSearchPrecision, indexList, score)); } return(new MatchResult(false, UserSettingSearchPrecision)); }
public static FuzzyMatcher Create(string query, MatchOption opt) { return(new FuzzyMatcher(query, opt)); }
/// <summary> /// Current method: /// Character matching + substring matching; /// 1. Query search string is split into substrings, separator is whitespace. /// 2. Check each query substring's characters against full compare string, /// 3. if a character in the substring is matched, loop back to verify the previous character. /// 4. If previous character also matches, and is the start of the substring, update list. /// 5. Once the previous character is verified, move on to the next character in the query substring. /// 6. Move onto the next substring's characters until all substrings are checked. /// 7. Consider success and move onto scoring if every char or substring without whitespaces matched /// </summary> public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt) { if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) { return(new MatchResult(false, UserSettingSearchPrecision)); } query = query.Trim(); if (_alphabet != null) { query = _alphabet.Translate(query); stringToCompare = _alphabet.Translate(stringToCompare); } var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare; var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query; var querySubstrings = queryWithoutCase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); int currentQuerySubstringIndex = 0; var currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; var currentQuerySubstringCharacterIndex = 0; var firstMatchIndex = -1; var firstMatchIndexInWord = -1; var lastMatchIndex = 0; bool allQuerySubstringsMatched = false; bool matchFoundInPreviousLoop = false; bool allSubstringsContainedInCompareString = true; var indexList = new List <int>(); for (var compareStringIndex = 0; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++) { if (fullStringToCompareWithoutCase[compareStringIndex] != currentQuerySubstring[currentQuerySubstringCharacterIndex]) { matchFoundInPreviousLoop = false; continue; } if (firstMatchIndex < 0) { // first matched char will become the start of the compared string firstMatchIndex = compareStringIndex; } if (currentQuerySubstringCharacterIndex == 0) { // first letter of current word matchFoundInPreviousLoop = true; firstMatchIndexInWord = compareStringIndex; } else if (!matchFoundInPreviousLoop) { // we want to verify that there is not a better match if this is not a full word // in order to do so we need to verify all previous chars are part of the pattern var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex; if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex, fullStringToCompareWithoutCase, currentQuerySubstring)) { matchFoundInPreviousLoop = true; // if it's the beginning character of the first query substring that is matched then we need to update start index firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex; indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex, firstMatchIndexInWord, indexList); } } lastMatchIndex = compareStringIndex + 1; indexList.Add(compareStringIndex); currentQuerySubstringCharacterIndex++; // if finished looping through every character in the current substring if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length) { // if any of the substrings was not matched then consider as all are not matched allSubstringsContainedInCompareString = matchFoundInPreviousLoop && allSubstringsContainedInCompareString; currentQuerySubstringIndex++; allQuerySubstringsMatched = AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length); if (allQuerySubstringsMatched) { break; } // otherwise move to the next query substring currentQuerySubstring = querySubstrings[currentQuerySubstringIndex]; currentQuerySubstringCharacterIndex = 0; } } // proceed to calculate score if every char or substring without whitespaces matched if (allQuerySubstringsMatched) { var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString); return(new MatchResult(true, UserSettingSearchPrecision, indexList, score)); } return(new MatchResult(false, UserSettingSearchPrecision)); }
/// <summary> /// refer to https://github.com/mattyork/fuzzy /// </summary> public static MatchResult FuzzySearch(string query, string stringToCompare, MatchOption opt) { if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) { return new MatchResult { Success = false } } ; query = query.Trim(); var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare; var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query; int currentQueryToCompareIndex = 0; var queryToCompareSeparated = queryWithoutCase.Split(' '); var currentQueryToCompare = queryToCompareSeparated[currentQueryToCompareIndex]; var patternIndex = 0; var firstMatchIndex = -1; var firstMatchIndexInWord = -1; var lastMatchIndex = 0; bool allMatched = false; bool isFullWordMatched = false; bool allWordsFullyMatched = true; var indexList = new List <int>(); for (var index = 0; index < fullStringToCompareWithoutCase.Length; index++) { var ch = stringToCompare[index]; if (fullStringToCompareWithoutCase[index] == currentQueryToCompare[patternIndex]) { if (firstMatchIndex < 0) { // first matched char will become the start of the compared string firstMatchIndex = index; } if (patternIndex == 0) { // first letter of current word isFullWordMatched = true; firstMatchIndexInWord = index; } else if (!isFullWordMatched) { // we want to verify that there is not a better match if this is not a full word // in order to do so we need to verify all previous chars are part of the pattern int startIndexToVerify = index - patternIndex; bool allMatch = true; for (int indexToCheck = 0; indexToCheck < patternIndex; indexToCheck++) { if (fullStringToCompareWithoutCase[startIndexToVerify + indexToCheck] != currentQueryToCompare[indexToCheck]) { allMatch = false; } } if (allMatch) { // update to this as a full word isFullWordMatched = true; if (currentQueryToCompareIndex == 0) { // first word so we need to update start index firstMatchIndex = startIndexToVerify; } indexList.RemoveAll(x => x >= firstMatchIndexInWord); for (int indexToCheck = 0; indexToCheck < patternIndex; indexToCheck++) { // update the index list indexList.Add(startIndexToVerify + indexToCheck); } } } lastMatchIndex = index + 1; indexList.Add(index); // increase the pattern matched index and check if everything was matched if (++patternIndex == currentQueryToCompare.Length) { if (++currentQueryToCompareIndex >= queryToCompareSeparated.Length) { // moved over all the words allMatched = true; break; } // otherwise move to the next word currentQueryToCompare = queryToCompareSeparated[currentQueryToCompareIndex]; patternIndex = 0; if (!isFullWordMatched) { // if any of the words was not fully matched all are not fully matched allWordsFullyMatched = false; } } } else { isFullWordMatched = false; } } // return rendered string if we have a match for every char or all substring without whitespaces matched if (allMatched) { // check if all query string was contained in string to compare bool containedFully = lastMatchIndex - firstMatchIndex == queryWithoutCase.Length; var score = CalculateSearchScore(query, stringToCompare, firstMatchIndex, lastMatchIndex - firstMatchIndex, containedFully, allWordsFullyMatched); var pinyinScore = ScoreForPinyin(stringToCompare, query); var result = new MatchResult { Success = true, MatchData = indexList, RawScore = Math.Max(score, pinyinScore) }; return(result); } return(new MatchResult { Success = false }); }
/// <summary> /// refer to https://github.com/mattyork/fuzzy /// </summary> public static MatchResult FuzzySearch(string query, string stringToCompare, MatchOption opt) { if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query)) { return new MatchResult { Success = false } } ; query.Trim(); var len = stringToCompare.Length; var compareString = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare; var pattern = opt.IgnoreCase ? query.ToLower() : query; var sb = new StringBuilder(stringToCompare.Length + (query.Length * (opt.Prefix.Length + opt.Suffix.Length))); var patternIdx = 0; var firstMatchIndex = -1; var lastMatchIndex = 0; char ch; for (var idx = 0; idx < len; idx++) { ch = stringToCompare[idx]; if (compareString[idx] == pattern[patternIdx]) { if (firstMatchIndex < 0) { firstMatchIndex = idx; } lastMatchIndex = idx + 1; sb.Append(opt.Prefix + ch + opt.Suffix); patternIdx += 1; } else { sb.Append(ch); } // match success, append remain char if (patternIdx == pattern.Length && (idx + 1) != compareString.Length) { sb.Append(stringToCompare.Substring(idx + 1)); break; } } // return rendered string if we have a match for every char if (patternIdx == pattern.Length) { return(new MatchResult { Success = true, Value = sb.ToString(), Score = CalScore(query, stringToCompare, firstMatchIndex, lastMatchIndex - firstMatchIndex) }); } return(new MatchResult { Success = false }); }