/// <summary> /// Construct a new PatternMatcher using the specified culture. /// </summary> /// <param name="pattern">The pattern to make the pattern matcher for.</param> /// <param name="culture">The culture to use for string searching and comparison.</param> /// <param name="allowFuzzyMatching">Whether or not close matches should count as matches.</param> public PatternMatcher( string pattern, CultureInfo culture = null, bool allowFuzzyMatching = false) { culture = culture ?? CultureInfo.CurrentCulture; pattern = pattern.Trim(); _compareInfo = culture.CompareInfo; _allowFuzzyMatching = allowFuzzyMatching; _fullPatternSegment = new PatternSegment(pattern, allowFuzzyMatching); if (pattern.IndexOf('.') < 0) { // PERF: Avoid string.Split allocations when the pattern doesn't contain a dot. _dotSeparatedPatternSegments = pattern.Length > 0 ? new PatternSegment[1] { _fullPatternSegment } : Array.Empty <PatternSegment>(); } else { _dotSeparatedPatternSegments = pattern.Split(s_dotCharacterArray, StringSplitOptions.RemoveEmptyEntries) .Select(text => new PatternSegment(text.Trim(), allowFuzzyMatching)) .ToArray(); } _invalidPattern = _dotSeparatedPatternSegments.Length == 0 || _dotSeparatedPatternSegments.Any(s => s.IsInvalid); }
public SimplePatternMatcher( string pattern, CultureInfo culture, bool includeMatchedSpans, bool allowFuzzyMatching) : base(includeMatchedSpans, culture, allowFuzzyMatching) { pattern = pattern.Trim(); _fullPatternSegment = new PatternSegment(pattern, allowFuzzyMatching); _invalidPattern = _fullPatternSegment.IsInvalid; }
public SimplePatternMatcher( string pattern, CultureInfo culture, bool includeMatchedSpans, bool allowFuzzyMatching, bool allowSimpleSubstringMatching = false, PatternMatcher linkedMatcher = null) : base(includeMatchedSpans, culture, allowFuzzyMatching, allowSimpleSubstringMatching, linkedMatcher) { pattern = pattern.Trim(); _fullPatternSegment = new PatternSegment(pattern, allowFuzzyMatching); HasInvalidPattern = _fullPatternSegment.IsInvalid; }
private ImmutableArray <PatternMatch> MatchPatternSegment( string candidate, bool includeMatchSpans, PatternSegment patternSegment, bool fuzzyMatch) { if (fuzzyMatch && !_allowFuzzyMatching) { return(ImmutableArray <PatternMatch> .Empty); } var singleMatch = MatchPatternSegment(candidate, includeMatchSpans, patternSegment, wantAllMatches: true, fuzzyMatch: fuzzyMatch, allMatches: out var matches); return(singleMatch.HasValue ? ImmutableArray.Create(singleMatch.Value) : matches); }
/// <summary> /// Internal helper for MatchPatternInternal /// </summary> /// <remarks> /// PERF: Designed to minimize allocations in common cases. /// If there's no match, then null is returned. /// If there's a single match, or the caller only wants the first match, then it is returned (as a Nullable) /// If there are multiple matches, and the caller wants them all, then a List is allocated. /// </remarks> /// <param name="candidate">The word being tested.</param> /// <param name="segment">The segment of the pattern to check against the candidate.</param> /// <param name="matches">The result array to place the matches in.</param> /// <param name="fuzzyMatch">If a fuzzy match should be performed</param> /// <returns>If there's only one match, then the return value is that match. Otherwise it is null.</returns> private bool MatchPatternSegment( string candidate, PatternSegment segment, ArrayBuilder <PatternMatch> matches, bool fuzzyMatch) { if (fuzzyMatch && !_allowFuzzyMatching) { return(false); } // First check if the segment matches as is. This is also useful if the segment contains // characters we would normally strip when splitting into parts that we also may want to // match in the candidate. For example if the segment is "@int" and the candidate is // "@int", then that will show up as an exact match here. // // Note: if the segment contains a space or an asterisk then we must assume that it's a // multi-word segment. if (!ContainsSpaceOrAsterisk(segment.TotalTextChunk.Text)) { var match = MatchPatternChunk( candidate, segment.TotalTextChunk, punctuationStripped: false, fuzzyMatch: fuzzyMatch); if (match != null) { matches.Add(match.Value); return(true); } } // The logic for pattern matching is now as follows: // // 1) Break the segment passed in into words. Breaking is rather simple and a // good way to think about it that if gives you all the individual alphanumeric words // of the pattern. // // 2) For each word try to match the word against the candidate value. // // 3) Matching logic is outlined in NonFuzzyMatchPatternChunk. It's not repeated here to // prevent having multiple places to keep up to date. // // Only if all words have some sort of match is the pattern considered matched. // Special case a simple pattern (alpha-numeric with no spaces). This is the common // case and we want to prevent unnecessary overhead. var subWordTextChunks = segment.SubWordTextChunks; if (subWordTextChunks.Length == 1) { var result = MatchPatternChunk( candidate, subWordTextChunks[0], punctuationStripped: true, fuzzyMatch: fuzzyMatch); if (result == null) { return(false); } matches.Add(result.Value); return(true); } else { using var _ = ArrayBuilder <PatternMatch> .GetInstance(out var tempMatches); foreach (var subWordTextChunk in subWordTextChunks) { // Try to match the candidate with this word var result = MatchPatternChunk( candidate, subWordTextChunk, punctuationStripped: true, fuzzyMatch: fuzzyMatch); if (result == null) { return(false); } tempMatches.Add(result.Value); } matches.AddRange(tempMatches); return(tempMatches.Count > 0); } }
/// <summary> /// Internal helper for MatchPatternInternal /// </summary> /// <remarks> /// PERF: Designed to minimize allocations in common cases. /// If there's no match, then null is returned. /// If there's a single match, or the caller only wants the first match, then it is returned (as a Nullable) /// If there are multiple matches, and the caller wants them all, then a List is allocated. /// </remarks> /// <param name="candidate">The word being tested.</param> /// <param name="segment">The segment of the pattern to check against the candidate.</param> /// <param name="matches">The result array to place the matches in.</param> /// <param name="fuzzyMatch">If a fuzzy match should be performed</param> /// <returns>If there's only one match, then the return value is that match. Otherwise it is null.</returns> private bool MatchPatternSegment( string candidate, PatternSegment segment, ArrayBuilder <PatternMatch> matches, bool fuzzyMatch) { if (fuzzyMatch && !_allowFuzzyMatching) { return(false); } // First check if the segment matches as is. This is also useful if the segment contains // characters we would normally strip when splitting into parts that we also may want to // match in the candidate. For example if the segment is "@int" and the candidate is // "@int", then that will show up as an exact match here. // // Note: if the segment contains a space or an asterisk then we must assume that it's a // multi-word segment. if (!ContainsSpaceOrAsterisk(segment.TotalTextChunk.Text)) { var match = MatchPatternChunk( candidate, segment.TotalTextChunk, punctuationStripped: false, fuzzyMatch: fuzzyMatch); if (match != null) { matches.Add(match.Value); return(true); } } // The logic for pattern matching is now as follows: // // 1) Break the segment passed in into words. Breaking is rather simple and a // good way to think about it that if gives you all the individual alphanumeric words // of the pattern. // // 2) For each word try to match the word against the candidate value. // // 3) Matching is as follows: // // a) Check if the word matches the candidate entirely, in an case insensitive or // sensitive manner. If it does, return that there was an exact match. // // b) Check if the word is a prefix of the candidate, in a case insensitive or // sensitive manner. If it does, return that there was a prefix match. // // c) If the word is entirely lowercase, then check if it is contained anywhere in the // candidate in a case insensitive manner. If so, return that there was a substring // match. // // Note: We only have a substring match if the lowercase part is prefix match of // some word part. That way we don't match something like 'Class' when the user // types 'a'. But we would match 'FooAttribute' (since 'Attribute' starts with // 'a'). // // d) If the word was not entirely lowercase, then check if it is contained in the // candidate in a case *sensitive* manner. If so, return that there was a substring // match. // // e) If the word was entirely lowercase, then attempt a special lower cased camel cased // match. i.e. cofipro would match CodeFixProvider. // // f) If the word was not entirely lowercase, then attempt a normal camel cased match. // i.e. CoFiPro would match CodeFixProvider, but CofiPro would not. // // g) The word is all lower case. Is it a case insensitive substring of the candidate starting // on a part boundary of the candidate? // // Only if all words have some sort of match is the pattern considered matched. var tempMatches = ArrayBuilder <PatternMatch> .GetInstance(); try { var subWordTextChunks = segment.SubWordTextChunks; foreach (var subWordTextChunk in subWordTextChunks) { // Try to match the candidate with this word var result = MatchPatternChunk( candidate, subWordTextChunk, punctuationStripped: true, fuzzyMatch: fuzzyMatch); if (result == null) { return(false); } tempMatches.Add(result.Value); } matches.AddRange(tempMatches); return(tempMatches.Count > 0); } finally { tempMatches.Free(); } }