public TextChunk(string text, bool allowFuzzingMatching) { this.Text = text; this.CharacterSpans = StringBreaker.BreakIntoCharacterParts(text); this.SimilarityChecker = allowFuzzingMatching ? new WordSimilarityChecker(text, substringsAreSimilar: false) : null; }
/// <summary> /// Breaks an identifier string into constituent parts. /// </summary> public static StringBreaks BreakIntoWordParts(string identifier) => StringBreaks.Create(identifier, s_wordPartsGenerator);
/// <summary> /// Breaks an identifier string into constituent parts. /// </summary> public static StringBreaks BreakIntoCharacterParts(string identifier) => StringBreaks.Create(identifier, s_characterPartsGenerator);
private static IList<string> PartListToSubstrings(string identifier, StringBreaks parts) { List<string> result = new List<string>(); for (int i = 0; i < parts.Count; i++) { var span = parts[i]; result.Add(identifier.Substring(span.Start, span.Length)); } return result; }
private int?TryCamelCaseMatch(string candidate, StringBreaks candidateParts, TextChunk chunk, CompareOptions compareOption) { var chunkCharacterSpans = chunk.CharacterSpans; // Note: we may have more pattern parts than candidate parts. This is because multiple // pattern parts may match a candidate part. For example "SiUI" against "SimpleUI". // We'll have 3 pattern parts Si/U/I against two candidate parts Simple/UI. However, U // and I will both match in UI. int currentCandidate = 0; int currentChunkSpan = 0; int? firstMatch = null; bool?contiguous = null; while (true) { // Let's consider our termination cases if (currentChunkSpan == chunkCharacterSpans.Count) { Contract.Requires(firstMatch.HasValue); Contract.Requires(contiguous.HasValue); // We did match! We shall assign a weight to this int weight = 0; // Was this contiguous? if (contiguous.Value) { weight += 1; } // Did we start at the beginning of the candidate? if (firstMatch.Value == 0) { weight += 2; } return(weight); } else if (currentCandidate == candidateParts.Count) { // No match, since we still have more of the pattern to hit return(null); } var candidatePart = candidateParts[currentCandidate]; bool gotOneMatchThisCandidate = false; // Consider the case of matching SiUI against SimpleUIElement. The candidate parts // will be Simple/UI/Element, and the pattern parts will be Si/U/I. We'll match 'Si' // against 'Simple' first. Then we'll match 'U' against 'UI'. However, we want to // still keep matching pattern parts against that candidate part. for (; currentChunkSpan < chunkCharacterSpans.Count; currentChunkSpan++) { var chunkCharacterSpan = chunkCharacterSpans[currentChunkSpan]; if (gotOneMatchThisCandidate) { // We've already gotten one pattern part match in this candidate. We will // only continue trying to consumer pattern parts if the last part and this // part are both upper case. if (!char.IsUpper(chunk.Text[chunkCharacterSpans[currentChunkSpan - 1].Start]) || !char.IsUpper(chunk.Text[chunkCharacterSpans[currentChunkSpan].Start])) { break; } } if (!PartStartsWith(candidate, candidatePart, chunk.Text, chunkCharacterSpan, compareOption)) { break; } gotOneMatchThisCandidate = true; firstMatch = firstMatch ?? currentCandidate; // If we were contiguous, then keep that value. If we weren't, then keep that // value. If we don't know, then set the value to 'true' as an initial match is // obviously contiguous. contiguous = contiguous ?? true; candidatePart = new TextSpan(candidatePart.Start + chunkCharacterSpan.Length, candidatePart.Length - chunkCharacterSpan.Length); } // Check if we matched anything at all. If we didn't, then we need to unset the // contiguous bit if we currently had it set. // If we haven't set the bit yet, then that means we haven't matched anything so // far, and we don't want to change that. if (!gotOneMatchThisCandidate && contiguous.HasValue) { contiguous = false; } // Move onto the next candidate. currentCandidate++; } }
public TextChunk(string text) { this.Text = text; this.CharacterSpans = StringBreaker.BreakIntoCharacterParts(text); }
private int? TryCamelCaseMatch( string candidate, bool includeMatchedSpans, StringBreaks candidateParts, TextChunk chunk, CompareOptions compareOption, out List<TextSpan> matchedSpans) { matchedSpans = null; var chunkCharacterSpans = chunk.CharacterSpans; // Note: we may have more pattern parts than candidate parts. This is because multiple // pattern parts may match a candidate part. For example "SiUI" against "SimpleUI". // We'll have 3 pattern parts Si/U/I against two candidate parts Simple/UI. However, U // and I will both match in UI. int currentCandidate = 0; int currentChunkSpan = 0; int? firstMatch = null; bool? contiguous = null; while (true) { // Let's consider our termination cases if (currentChunkSpan == chunkCharacterSpans.Count) { Contract.Requires(firstMatch.HasValue); Contract.Requires(contiguous.HasValue); // We did match! We shall assign a weight to this int weight = 0; // Was this contiguous? if (contiguous.Value) { weight += 1; } // Did we start at the beginning of the candidate? if (firstMatch.Value == 0) { weight += 2; } return weight; } else if (currentCandidate == candidateParts.Count) { // No match, since we still have more of the pattern to hit matchedSpans = null; return null; } var candidatePart = candidateParts[currentCandidate]; bool gotOneMatchThisCandidate = false; // Consider the case of matching SiUI against SimpleUIElement. The candidate parts // will be Simple/UI/Element, and the pattern parts will be Si/U/I. We'll match 'Si' // against 'Simple' first. Then we'll match 'U' against 'UI'. However, we want to // still keep matching pattern parts against that candidate part. for (; currentChunkSpan < chunkCharacterSpans.Count; currentChunkSpan++) { var chunkCharacterSpan = chunkCharacterSpans[currentChunkSpan]; if (gotOneMatchThisCandidate) { // We've already gotten one pattern part match in this candidate. We will // only continue trying to consume pattern parts if the last part and this // part are both upper case. if (!char.IsUpper(chunk.Text[chunkCharacterSpans[currentChunkSpan - 1].Start]) || !char.IsUpper(chunk.Text[chunkCharacterSpans[currentChunkSpan].Start])) { break; } } if (!PartStartsWith(candidate, candidatePart, chunk.Text, chunkCharacterSpan, compareOption)) { break; } if (includeMatchedSpans) { matchedSpans = matchedSpans ?? new List<TextSpan>(); matchedSpans.Add(new TextSpan(candidatePart.Start, chunkCharacterSpan.Length)); } gotOneMatchThisCandidate = true; firstMatch = firstMatch ?? currentCandidate; // If we were contiguous, then keep that value. If we weren't, then keep that // value. If we don't know, then set the value to 'true' as an initial match is // obviously contiguous. contiguous = contiguous ?? true; candidatePart = new TextSpan(candidatePart.Start + chunkCharacterSpan.Length, candidatePart.Length - chunkCharacterSpan.Length); } // Check if we matched anything at all. If we didn't, then we need to unset the // contiguous bit if we currently had it set. // If we haven't set the bit yet, then that means we haven't matched anything so // far, and we don't want to change that. if (!gotOneMatchThisCandidate && contiguous.HasValue) { contiguous = false; } // Move onto the next candidate. currentCandidate++; } }