Ejemplo n.º 1
0
        /*
         * Creates a token stream that tokenizes the given string into token terms
         * (aka words).
         *
         * @param fieldName
         *            the name of the field to tokenize (currently ignored).
         * @param text
         *            the string to tokenize
         * @return a new token stream
         */
        public TokenStream TokenStream(String fieldName, String text)
        {
            // Ideally the Analyzer superclass should have a method with the same signature,
            // with a default impl that simply delegates to the StringReader flavour.
            if (text == null)
            {
                throw new ArgumentException("text must not be null");
            }

            TokenStream stream;

            if (Regex == NON_WORD_PATTERN)
            { // fast path
                stream = new FastStringTokenizer(text, true, toLowerCase, stopWords);
            }
            else if (Regex == WHITESPACE_PATTERN)
            { // fast path
                stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
            }
            else
            {
                stream = new RegexTokenizer(text, Regex, toLowerCase);
                if (stopWords != null)
                {
                    stream = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), stream, stopWords);
                }
            }

            return(stream);
        }
Ejemplo n.º 2
0
        /**
         * Creates a token stream that tokenizes the given string into token terms
         * (aka words).
         * 
         * @param fieldName
         *            the name of the field to tokenize (currently ignored).
         * @param text
         *            the string to tokenize
         * @return a new token stream
         */
        public TokenStream TokenStream(String fieldName, String text)
        {
            // Ideally the Analyzer superclass should have a method with the same signature, 
            // with a default impl that simply delegates to the StringReader flavour. 
            if (text == null)
                throw new ArgumentException("text must not be null");

            TokenStream stream;
            if (Regex == NON_WORD_PATTERN)
            { // fast path
                stream = new FastStringTokenizer(text, true, toLowerCase, stopWords);
            }
            else if (Regex == WHITESPACE_PATTERN)
            { // fast path
                stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
            }
            else
            {
                stream = new RegexTokenizer(text, Regex, toLowerCase);
                if (stopWords != null) stream = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), stream, stopWords);
            }

            return stream;
        }