private void AddFuzzyMatchTokens(TokenPatternMatcher tokenPatternMatcher, string text) { using (var tokenStream = fuzzyAnalyzer.GetTokenStream("name", text)) { var termAtt = tokenStream.GetAttribute <ICharTermAttribute>(); tokenStream.Reset(); while (tokenStream.IncrementToken()) { string token = termAtt.ToString(); tokenPatternMatcher.FuzzyTokens.Add(token); } } }
private PatternMatcher CreateTextPatternMatcher(string text, bool fuzzyMatch) { // massage wildcards text so it survives tokenization "foo:___" => foo_wildcard text = text.Replace("___", WildcardPatternMatcher.ENTITYTYPE); text = text.Replace(NAMEDWILDCARD, NAMEDWILDCARD_TOKEN); var sequence = new SequencePatternMatcher(); using (TextReader reader = new StringReader(text)) { using (var tokenStream = exactAnalyzer.GetTokenStream("name", reader)) { var termAtt = tokenStream.GetAttribute <ICharTermAttribute>(); var offsetAtt = tokenStream.GetAttribute <IOffsetAttribute>(); tokenStream.Reset(); while (tokenStream.IncrementToken()) { string token = termAtt.ToString(); var start = offsetAtt.StartOffset; var end = offsetAtt.EndOffset; string tokenText = text.Substring(start, end - start); if (start > 0 && text[start - 1] == '@') { // handle datetime.subrange if (end < text.Length && text[end] == '.') { tokenStream.IncrementToken(); end = offsetAtt.EndOffset; tokenText = text.Substring(start, end - start); } sequence.PatternMatchers.Add(new EntityPatternMatcher(tokenText)); } else if (token.EndsWith(WildcardPatternMatcher.ENTITYTYPE)) { var sb = new StringBuilder(token); if (token.EndsWith(NAMEDWILDCARD_TOKEN)) { sb[token.LastIndexOf('_')] = ':'; } sequence.PatternMatchers.Add(new WildcardPatternMatcher(sb.ToString())); } else { TokenPatternMatcher tokenPatternMatcher = new TokenPatternMatcher(tokenText, token); if (fuzzyMatch) { AddFuzzyMatchTokens(tokenPatternMatcher, text); } sequence.PatternMatchers.Add(tokenPatternMatcher); } } } } if (sequence.PatternMatchers.Count == 0) { return(null); } if (sequence.PatternMatchers.Count == 1) { return(sequence.PatternMatchers.Single()); } return(sequence); }