GetNextToken() public method

Resumes scanning until the next regular expression is matched, the end of input is encountered or an I/O-Error occurs.
if any I/O-Error occurs
public GetNextToken ( ) : int
return int
        /*
         * (non-Javadoc)
         *
         * @see Lucene.Net.Analysis.TokenStream#next()
         */
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            int posIncr = 1;

            while (true)
            {
                int tokenType = scanner.GetNextToken();

                if (tokenType == StandardTokenizerImpl.YYEOF)
                {
                    return(null);
                }

                if (scanner.Yylength() <= maxTokenLength)
                {
                    reusableToken.Clear();
                    reusableToken.SetPositionIncrement(posIncr);
                    scanner.GetText(reusableToken);
                    int start = scanner.Yychar();
                    reusableToken.SetStartOffset(start);
                    reusableToken.SetEndOffset(start + reusableToken.TermLength());
                    // This 'if' should be removed in the next release. For now, it converts
                    // invalid acronyms to HOST. When removed, only the 'else' part should
                    // remain.
                    if (tokenType == StandardTokenizerImpl.ACRONYM_DEP)
                    {
                        if (replaceInvalidAcronym)
                        {
                            reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]);
                            reusableToken.SetTermLength(reusableToken.TermLength() - 1);                             // remove extra '.'
                        }
                        else
                        {
                            reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]);
                        }
                    }
                    else
                    {
                        reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]);
                    }
                    return(reusableToken);
                }
                // When we skip a too-long term, we still increment the
                // position increment
                else
                {
                    posIncr++;
                }
            }
        }
        ///<summary>
        /// (non-Javadoc)
        /// <see cref="Lucene.Net.Analysis.TokenStream.IncrementToken()" />
        ///</summary>
        public override bool IncrementToken()
        {
            ClearAttributes();
            int posIncr = 1;

            while (true)
            {
                int tokenType = scanner.GetNextToken();

                if (tokenType == StandardTokenizerImpl.YYEOF)
                {
                    return(false);
                }

                if (scanner.Yylength() <= maxTokenLength)
                {
                    posIncrAtt.PositionIncrement = posIncr;
                    scanner.GetText(termAtt);
                    int start = scanner.Yychar();
                    offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.TermLength()));
                    // This 'if' should be removed in the next release. For now, it converts
                    // invalid acronyms to HOST. When removed, only the 'else' part should
                    // remain.
                    if (tokenType == StandardTokenizerImpl.ACRONYM_DEP)
                    {
                        if (replaceInvalidAcronym)
                        {
                            typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST];
                            termAtt.SetTermLength(termAtt.TermLength() - 1);                             // remove extra '.'
                        }
                        else
                        {
                            typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
                        }
                    }
                    else
                    {
                        typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[tokenType];
                    }
                    return(true);
                }
                // When we skip a too-long term, we still increment the
                // position increment
                else
                {
                    posIncr++;
                }
            }
        }
Esempio n. 3
0
  public static Token Next(StandardTokenizerImpl scanner)
  {
    int tokenType = scanner.GetNextToken();

    if (tokenType == StandardTokenizerImpl.YYEOF) {
      return null;
    }

    int startPosition = scanner.yychar();

    string tokenImage = scanner.yytext();
    return new Token(tokenImage, startPosition, startPosition
                     + tokenImage.Length,
                     StandardTokenizerImpl.TOKEN_TYPES[tokenType]);
  }
Esempio n. 4
0
        /// <summary>Returns the next token in the stream, or null at EOS.
        /// <p>The returned token's type is set to an element of {@link
        /// StandardTokenizerConstants#tokenImage}.
        /// </summary>
        public override Lucene.Net.Analysis.Token Next()
        {
            int tokenType = scanner.GetNextToken();

            if (tokenType == StandardTokenizerImpl.YYEOF)
            {
                return(null);
            }

            int startPosition = scanner.yychar();

            string tokenImage = scanner.yytext();

            return(new Token(tokenImage, startPosition, startPosition
                             + tokenImage.Length,
                             StandardTokenizerImpl.TOKEN_TYPES[tokenType]));
        }