public static FuzzyMatcher Create(string query, MatchOption opt)
 {
     return(new FuzzyMatcher(query, opt));
 }
Пример #2
0
        /// <summary>
        /// Current method:
        /// Character matching + substring matching;
        /// 1. Query search string is split into substrings, separator is whitespace.
        /// 2. Check each query substring's characters against full compare string,
        /// 3. if a character in the substring is matched, loop back to verify the previous character.
        /// 4. If previous character also matches, and is the start of the substring, update list.
        /// 5. Once the previous character is verified, move on to the next character in the query substring.
        /// 6. Move onto the next substring's characters until all substrings are checked.
        /// 7. Consider success and move onto scoring if every char or substring without whitespaces matched
        /// </summary>
        public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt)
        {
            if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query))
            {
                return(new MatchResult(false, UserSettingSearchPrecision));
            }

            query = query.Trim();

            if (_alphabet != null)
            {
                query           = _alphabet.Translate(query);
                stringToCompare = _alphabet.Translate(stringToCompare);
            }

            var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare;

            var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query;

            var querySubstrings                     = queryWithoutCase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            int currentQuerySubstringIndex          = 0;
            var currentQuerySubstring               = querySubstrings[currentQuerySubstringIndex];
            var currentQuerySubstringCharacterIndex = 0;

            var  firstMatchIndex                       = -1;
            var  firstMatchIndexInWord                 = -1;
            var  lastMatchIndex                        = 0;
            bool allQuerySubstringsMatched             = false;
            bool matchFoundInPreviousLoop              = false;
            bool allSubstringsContainedInCompareString = true;

            var        indexList    = new List <int>();
            List <int> spaceIndices = new List <int>();

            for (var compareStringIndex = 0; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++)
            {
                // To maintain a list of indices which correspond to spaces in the string to compare
                // To populate the list only for the first query substring
                if (fullStringToCompareWithoutCase[compareStringIndex].Equals(' ') && currentQuerySubstringIndex == 0)
                {
                    spaceIndices.Add(compareStringIndex);
                }

                if (fullStringToCompareWithoutCase[compareStringIndex] != currentQuerySubstring[currentQuerySubstringCharacterIndex])
                {
                    matchFoundInPreviousLoop = false;
                    continue;
                }

                if (firstMatchIndex < 0)
                {
                    // first matched char will become the start of the compared string
                    firstMatchIndex = compareStringIndex;
                }

                if (currentQuerySubstringCharacterIndex == 0)
                {
                    // first letter of current word
                    matchFoundInPreviousLoop = true;
                    firstMatchIndexInWord    = compareStringIndex;
                }
                else if (!matchFoundInPreviousLoop)
                {
                    // we want to verify that there is not a better match if this is not a full word
                    // in order to do so we need to verify all previous chars are part of the pattern
                    var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex;

                    if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex, fullStringToCompareWithoutCase, currentQuerySubstring))
                    {
                        matchFoundInPreviousLoop = true;

                        // if it's the beginning character of the first query substring that is matched then we need to update start index
                        firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex;

                        indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex, firstMatchIndexInWord, indexList);
                    }
                }

                lastMatchIndex = compareStringIndex + 1;
                indexList.Add(compareStringIndex);

                currentQuerySubstringCharacterIndex++;

                // if finished looping through every character in the current substring
                if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length)
                {
                    // if any of the substrings was not matched then consider as all are not matched
                    allSubstringsContainedInCompareString = matchFoundInPreviousLoop && allSubstringsContainedInCompareString;

                    currentQuerySubstringIndex++;

                    allQuerySubstringsMatched = AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length);
                    if (allQuerySubstringsMatched)
                    {
                        break;
                    }

                    // otherwise move to the next query substring
                    currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
                    currentQuerySubstringCharacterIndex = 0;
                }
            }

            // proceed to calculate score if every char or substring without whitespaces matched
            if (allQuerySubstringsMatched)
            {
                var nearestSpaceIndex = CalculateClosestSpaceIndex(spaceIndices, firstMatchIndex);
                var score             = CalculateSearchScore(query, stringToCompare, firstMatchIndex - nearestSpaceIndex - 1, lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString);

                return(new MatchResult(true, UserSettingSearchPrecision, indexList, score));
            }

            return(new MatchResult(false, UserSettingSearchPrecision));
        }
 private FuzzyMatcher(string query, MatchOption opt)
 {
     this.query = query.Trim();
     this.opt   = opt;
 }
Пример #4
0
        /// <summary>
        /// Current method has two parts, Acronym Match and Fuzzy Search:
        ///
        /// Acronym Match:
        /// Charater listed below will be considered as acronym
        /// 1. Character on index 0
        /// 2. Character appears after a space
        /// 3. Character that is UpperCase
        /// 4. Character that is number
        ///
        /// Acronym Match will succeed when all query characters match with acronyms in stringToCompare.
        /// If any of the characters in the query isn't matched with stringToCompare, Acronym Match will fail.
        /// Score will be calculated based the percentage of all query characters matched with total acronyms in stringToCompare.
        ///
        /// Fuzzy Search:
        /// Character matching + substring matching;
        /// 1. Query search string is split into substrings, separator is whitespace.
        /// 2. Check each query substring's characters against full compare string,
        /// 3. if a character in the substring is matched, loop back to verify the previous character.
        /// 4. If previous character also matches, and is the start of the substring, update list.
        /// 5. Once the previous character is verified, move on to the next character in the query substring.
        /// 6. Move onto the next substring's characters until all substrings are checked.
        /// 7. Consider success and move onto scoring if every char or substring without whitespaces matched
        /// </summary>
        public MatchResult FuzzyMatch(string query, string stringToCompare, MatchOption opt)
        {
            if (string.IsNullOrEmpty(stringToCompare) || string.IsNullOrEmpty(query))
            {
                return(new MatchResult(false, UserSettingSearchPrecision));
            }

            query = query.Trim();
            TranslationMapping translationMapping;

            (stringToCompare, translationMapping) = _alphabet?.Translate(stringToCompare) ?? (stringToCompare, null);

            var currentAcronymQueryIndex = 0;
            var acronymMatchData         = new List <int>();
            int acronymsTotalCount       = 0;
            int acronymsMatched          = 0;

            var fullStringToCompareWithoutCase = opt.IgnoreCase ? stringToCompare.ToLower() : stringToCompare;
            var queryWithoutCase = opt.IgnoreCase ? query.ToLower() : query;

            var querySubstrings                     = queryWithoutCase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            int currentQuerySubstringIndex          = 0;
            var currentQuerySubstring               = querySubstrings[currentQuerySubstringIndex];
            var currentQuerySubstringCharacterIndex = 0;

            var  firstMatchIndex                       = -1;
            var  firstMatchIndexInWord                 = -1;
            var  lastMatchIndex                        = 0;
            bool allQuerySubstringsMatched             = false;
            bool matchFoundInPreviousLoop              = false;
            bool allSubstringsContainedInCompareString = true;

            var        indexList    = new List <int>();
            List <int> spaceIndices = new List <int>();

            for (var compareStringIndex = 0; compareStringIndex < fullStringToCompareWithoutCase.Length; compareStringIndex++)
            {
                // If acronyms matching successfully finished, this gets the remaining not matched acronyms for score calculation
                if (currentAcronymQueryIndex >= query.Length && acronymsMatched == query.Length)
                {
                    if (IsAcronymCount(stringToCompare, compareStringIndex))
                    {
                        acronymsTotalCount++;
                    }
                    continue;
                }

                if (currentAcronymQueryIndex >= query.Length ||
                    currentAcronymQueryIndex >= query.Length && allQuerySubstringsMatched)
                {
                    break;
                }

                // To maintain a list of indices which correspond to spaces in the string to compare
                // To populate the list only for the first query substring
                if (fullStringToCompareWithoutCase[compareStringIndex] == ' ' && currentQuerySubstringIndex == 0)
                {
                    spaceIndices.Add(compareStringIndex);
                }

                // Acronym Match
                if (IsAcronym(stringToCompare, compareStringIndex))
                {
                    if (fullStringToCompareWithoutCase[compareStringIndex] ==
                        queryWithoutCase[currentAcronymQueryIndex])
                    {
                        acronymMatchData.Add(compareStringIndex);
                        acronymsMatched++;

                        currentAcronymQueryIndex++;
                    }
                }

                if (IsAcronymCount(stringToCompare, compareStringIndex))
                {
                    acronymsTotalCount++;
                }

                if (allQuerySubstringsMatched || fullStringToCompareWithoutCase[compareStringIndex] !=
                    currentQuerySubstring[currentQuerySubstringCharacterIndex])
                {
                    matchFoundInPreviousLoop = false;

                    continue;
                }

                if (firstMatchIndex < 0)
                {
                    // first matched char will become the start of the compared string
                    firstMatchIndex = compareStringIndex;
                }

                if (currentQuerySubstringCharacterIndex == 0)
                {
                    // first letter of current word
                    matchFoundInPreviousLoop = true;
                    firstMatchIndexInWord    = compareStringIndex;
                }
                else if (!matchFoundInPreviousLoop)
                {
                    // we want to verify that there is not a better match if this is not a full word
                    // in order to do so we need to verify all previous chars are part of the pattern
                    var startIndexToVerify = compareStringIndex - currentQuerySubstringCharacterIndex;

                    if (AllPreviousCharsMatched(startIndexToVerify, currentQuerySubstringCharacterIndex,
                                                fullStringToCompareWithoutCase, currentQuerySubstring))
                    {
                        matchFoundInPreviousLoop = true;

                        // if it's the beginning character of the first query substring that is matched then we need to update start index
                        firstMatchIndex = currentQuerySubstringIndex == 0 ? startIndexToVerify : firstMatchIndex;

                        indexList = GetUpdatedIndexList(startIndexToVerify, currentQuerySubstringCharacterIndex,
                                                        firstMatchIndexInWord, indexList);
                    }
                }

                lastMatchIndex = compareStringIndex + 1;
                indexList.Add(compareStringIndex);

                currentQuerySubstringCharacterIndex++;

                // if finished looping through every character in the current substring
                if (currentQuerySubstringCharacterIndex == currentQuerySubstring.Length)
                {
                    // if any of the substrings was not matched then consider as all are not matched
                    allSubstringsContainedInCompareString =
                        matchFoundInPreviousLoop && allSubstringsContainedInCompareString;

                    currentQuerySubstringIndex++;

                    allQuerySubstringsMatched =
                        AllQuerySubstringsMatched(currentQuerySubstringIndex, querySubstrings.Length);

                    if (allQuerySubstringsMatched)
                    {
                        continue;
                    }

                    // otherwise move to the next query substring
                    currentQuerySubstring = querySubstrings[currentQuerySubstringIndex];
                    currentQuerySubstringCharacterIndex = 0;
                }
            }

            // return acronym match if all query char matched
            if (acronymsMatched > 0 && acronymsMatched == query.Length)
            {
                int acronymScore = acronymsMatched * 100 / acronymsTotalCount;

                if (acronymScore >= (int)UserSettingSearchPrecision)
                {
                    acronymMatchData = acronymMatchData.Select(x => translationMapping?.MapToOriginalIndex(x) ?? x).Distinct().ToList();
                    return(new MatchResult(true, UserSettingSearchPrecision, acronymMatchData, acronymScore));
                }
            }

            // proceed to calculate score if every char or substring without whitespaces matched
            if (allQuerySubstringsMatched)
            {
                var nearestSpaceIndex = CalculateClosestSpaceIndex(spaceIndices, firstMatchIndex);
                var score             = CalculateSearchScore(query, stringToCompare, firstMatchIndex - nearestSpaceIndex - 1,
                                                             lastMatchIndex - firstMatchIndex, allSubstringsContainedInCompareString);

                var resultList = indexList.Select(x => translationMapping?.MapToOriginalIndex(x) ?? x).Distinct().ToList();
                return(new MatchResult(true, UserSettingSearchPrecision, resultList, score));
            }

            return(new MatchResult(false, UserSettingSearchPrecision));
        }