Example #1
0
        /// <summary>
        /// Returns the next, stemmed, input Token.
        /// </summary>
        /// <returns>
        ///  The stemed form of a token.
        /// </returns>
        /// <throws>IOException</throws>
        public override Token Next()
        {
            Token token = input.Next();

            if (token == null)
            {
                return(null);
            }
            else
            {
                string str = stemmer.stem(token.TermText());
                //if ((System.Object) str != token.TermText())
                if (!str.Equals(token.TermText()))
                {
                    // Yes, I mean object reference comparison here
                    //token.TermText() = str;
                    return(new Token(str, token.StartOffset(), token.EndOffset(), token.Type()));
                }
                return(token);
            }
        }
Example #2
0
        /// <summary>Returns the next token in the stream, or null at EOS.
        /// <p>Removes <tt>'s</tt> from the end of words.
        /// <p>Removes dots from acronyms.
        /// </summary>
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            Token nextToken = input.Next(reusableToken);

            if (nextToken == null)
            {
                return(null);
            }

            char[] buffer       = nextToken.TermBuffer();
            int    bufferLength = nextToken.TermLength();

            System.String type = nextToken.Type();

            if (type == APOSTROPHE_TYPE &&
                bufferLength >= 2 &&
                buffer[bufferLength - 2] == '\'' &&
                (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S'))
            {
                // Strip last 2 characters off
                nextToken.SetTermLength(bufferLength - 2);
            }
            else if (type == ACRONYM_TYPE)
            {
                // remove dots
                int upto = 0;
                for (int i = 0; i < bufferLength; i++)
                {
                    char c = buffer[i];
                    if (c != '.')
                    {
                        buffer[upto++] = c;
                    }
                }
                nextToken.SetTermLength(upto);
            }

            return(nextToken);
        }
Example #3
0
        /// <summary>Returns the next token in the stream, or null at EOS.
        /// <p>Removes <tt>'s</tt> from the end of words.
        /// <p>Removes dots from acronyms.
        /// </summary>
        public override Token Next(Token result)
        {
            Token t = input.Next(result);

            if (t == null)
            {
                return(null);
            }

            char[] buffer       = t.TermBuffer();
            int    bufferLength = t.TermLength();

            System.String type = t.Type();

            if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S'))
            {
                // Strip last 2 characters off
                t.SetTermLength(bufferLength - 2);
            }
            else if (type == ACRONYM_TYPE)
            {
                // remove dots
                int upto = 0;
                for (int i = 0; i < bufferLength; i++)
                {
                    char c = buffer[i];
                    if (c != '.')
                    {
                        buffer[upto++] = c;
                    }
                }
                t.SetTermLength(upto);
            }

            return(t);
        }
        /// <summary>Returns the next input Token, after being stemmed </summary>
        public override Token Next()
        {
            Token token = input.Next();

            if (token == null)
            {
                return(null);
            }
            stemmer.SetCurrent(token.TermText());
            try
            {
                stemMethod.Invoke(stemmer, (System.Object[])EMPTY_ARGS);
            }
            catch (System.Exception e)
            {
                throw new System.SystemException(e.ToString());
            }

            Token newToken = new Token(stemmer.GetCurrent(), token.StartOffset(), token.EndOffset(), token.Type());

            newToken.SetPositionIncrement(token.GetPositionIncrement());
            return(newToken);
        }