示例#1
0
 private void AddFuzzyMatchTokens(TokenPatternMatcher tokenPatternMatcher, string text)
 {
     using (var tokenStream = fuzzyAnalyzer.GetTokenStream("name", text))
     {
         var termAtt = tokenStream.GetAttribute <ICharTermAttribute>();
         tokenStream.Reset();
         while (tokenStream.IncrementToken())
         {
             string token = termAtt.ToString();
             tokenPatternMatcher.FuzzyTokens.Add(token);
         }
     }
 }
示例#2
0
        private PatternMatcher CreateTextPatternMatcher(string text, bool fuzzyMatch)
        {
            // massage wildcards text so it survives tokenization "foo:___" => foo_wildcard
            text = text.Replace("___", WildcardPatternMatcher.ENTITYTYPE);
            text = text.Replace(NAMEDWILDCARD, NAMEDWILDCARD_TOKEN);

            var sequence = new SequencePatternMatcher();

            using (TextReader reader = new StringReader(text))
            {
                using (var tokenStream = exactAnalyzer.GetTokenStream("name", reader))
                {
                    var termAtt   = tokenStream.GetAttribute <ICharTermAttribute>();
                    var offsetAtt = tokenStream.GetAttribute <IOffsetAttribute>();
                    tokenStream.Reset();

                    while (tokenStream.IncrementToken())
                    {
                        string token     = termAtt.ToString();
                        var    start     = offsetAtt.StartOffset;
                        var    end       = offsetAtt.EndOffset;
                        string tokenText = text.Substring(start, end - start);

                        if (start > 0 && text[start - 1] == '@')
                        {
                            // handle datetime.subrange
                            if (end < text.Length && text[end] == '.')
                            {
                                tokenStream.IncrementToken();
                                end       = offsetAtt.EndOffset;
                                tokenText = text.Substring(start, end - start);
                            }
                            sequence.PatternMatchers.Add(new EntityPatternMatcher(tokenText));
                        }
                        else if (token.EndsWith(WildcardPatternMatcher.ENTITYTYPE))
                        {
                            var sb = new StringBuilder(token);
                            if (token.EndsWith(NAMEDWILDCARD_TOKEN))
                            {
                                sb[token.LastIndexOf('_')] = ':';
                            }
                            sequence.PatternMatchers.Add(new WildcardPatternMatcher(sb.ToString()));
                        }
                        else
                        {
                            TokenPatternMatcher tokenPatternMatcher = new TokenPatternMatcher(tokenText, token);
                            if (fuzzyMatch)
                            {
                                AddFuzzyMatchTokens(tokenPatternMatcher, text);
                            }
                            sequence.PatternMatchers.Add(tokenPatternMatcher);
                        }
                    }
                }
            }

            if (sequence.PatternMatchers.Count == 0)
            {
                return(null);
            }

            if (sequence.PatternMatchers.Count == 1)
            {
                return(sequence.PatternMatchers.Single());
            }

            return(sequence);
        }