private IEnumerable <string> TokenizeSearch(string search)
        {
            var analyser = new StandardAnalyzer(LuceneVersion.LUCENE_48);
            var tokens   = analyser.GetTokenStream(null, search);
            var attr     = tokens.GetAttribute <ICharTermAttribute>();

            tokens.Reset();
            while (tokens.IncrementToken())
            {
                yield return(attr.ToString());
            }
        }
Ejemplo n.º 2
0
        public static List <Token> GetTokens(string value)
        {
            List <Token> tokens = new List <Token>();

#if LUCENE
            var         analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
            TokenStream stream   = analyzer.GetTokenStream("fullName", new StringReader(value));
            stream.Reset();
            while (stream.IncrementToken())
            {
                var term = stream.GetAttribute <ICharTermAttribute>().ToString();
                var pos  = stream.GetAttribute <IOffsetAttribute>();
                tokens.Add(new Token()
                {
                    Text = term, StartOffset = pos.StartOffset, EndOffset = pos.EndOffset
                });
            }
#else
            StringBuilder sb     = new StringBuilder();
            int           iStart = 0;

            for (int iCh = 0; iCh < value.Length; iCh++)
            {
                char ch = value[iCh];

                if (char.IsWhiteSpace(ch))
                {
                    if (sb.Length > 0)
                    {
                        tokens.Add(new Token()
                        {
                            Text = sb.ToString(), StartOffset = iStart, EndOffset = iCh
                        });
                    }
                    sb.Clear();
                    iStart = iCh;
                }
                else if (ch != '-' && char.IsPunctuation(ch))
                {
                    // if we are building a token, add it
                    if (sb.Length > 0)
                    {
                        tokens.Add(new Token()
                        {
                            Text = sb.ToString(), StartOffset = iStart, EndOffset = iCh
                        });
                    }
                    sb.Clear();

                    // add the Punctuation as a token
                    iStart = iCh;
                    tokens.Add(new Token()
                    {
                        Text = ch.ToString(), StartOffset = iStart, EndOffset = iCh + 1
                    });
                }
                else
                {
                    if (sb.Length == 0)
                    {
                        iStart = iCh;
                    }

                    sb.Append(ch);
                }
            }

            if (sb.Length > 0)
            {
                tokens.Add(new Token()
                {
                    Text = sb.ToString(), StartOffset = iStart, EndOffset = value.Length
                });
            }
#endif
            return(tokens);
        }