/* * (non-Javadoc) * * @see Lucene.Net.Analysis.TokenStream#next() */ public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); int posIncr = 1; while (true) { int tokenType = scanner.GetNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return(null); } if (scanner.Yylength() <= maxTokenLength) { reusableToken.Clear(); reusableToken.SetPositionIncrement(posIncr); scanner.GetText(reusableToken); int start = scanner.Yychar(); reusableToken.SetStartOffset(start); reusableToken.SetEndOffset(start + reusableToken.TermLength()); // This 'if' should be removed in the next release. For now, it converts // invalid acronyms to HOST. When removed, only the 'else' part should // remain. if (tokenType == StandardTokenizerImpl.ACRONYM_DEP) { if (replaceInvalidAcronym) { reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]); reusableToken.SetTermLength(reusableToken.TermLength() - 1); // remove extra '.' } else { reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]); } } else { reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]); } return(reusableToken); } // When we skip a too-long term, we still increment the // position increment else { posIncr++; } } }
///<summary> /// (non-Javadoc) /// <see cref="Lucene.Net.Analysis.TokenStream.IncrementToken()" /> ///</summary> public override bool IncrementToken() { ClearAttributes(); int posIncr = 1; while (true) { int tokenType = scanner.GetNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return(false); } if (scanner.Yylength() <= maxTokenLength) { posIncrAtt.PositionIncrement = posIncr; scanner.GetText(termAtt); int start = scanner.Yychar(); offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.TermLength())); // This 'if' should be removed in the next release. For now, it converts // invalid acronyms to HOST. When removed, only the 'else' part should // remain. if (tokenType == StandardTokenizerImpl.ACRONYM_DEP) { if (replaceInvalidAcronym) { typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]; termAtt.SetTermLength(termAtt.TermLength() - 1); // remove extra '.' } else { typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]; } } else { typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[tokenType]; } return(true); } // When we skip a too-long term, we still increment the // position increment else { posIncr++; } } }
public static Token Next(StandardTokenizerImpl scanner) { int tokenType = scanner.GetNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return null; } int startPosition = scanner.yychar(); string tokenImage = scanner.yytext(); return new Token(tokenImage, startPosition, startPosition + tokenImage.Length, StandardTokenizerImpl.TOKEN_TYPES[tokenType]); }
/// <summary>Returns the next token in the stream, or null at EOS. /// <p>The returned token's type is set to an element of {@link /// StandardTokenizerConstants#tokenImage}. /// </summary> public override Lucene.Net.Analysis.Token Next() { int tokenType = scanner.GetNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return(null); } int startPosition = scanner.yychar(); string tokenImage = scanner.yytext(); return(new Token(tokenImage, startPosition, startPosition + tokenImage.Length, StandardTokenizerImpl.TOKEN_TYPES[tokenType])); }