///<summary> /// (non-Javadoc) /// <see cref="Lucene.Net.Analysis.TokenStream.IncrementToken()" /> ///</summary> public override bool IncrementToken() { ClearAttributes(); int posIncr = 1; while (true) { int tokenType = scanner.GetNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return(false); } if (scanner.Yylength() <= maxTokenLength) { posIncrAtt.PositionIncrement = posIncr; scanner.GetText(termAtt); int start = scanner.Yychar(); offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.TermLength())); // This 'if' should be removed in the next release. For now, it converts // invalid acronyms to HOST. When removed, only the 'else' part should // remain. if (tokenType == StandardTokenizerImpl.ACRONYM_DEP) { if (replaceInvalidAcronym) { typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]; termAtt.SetTermLength(termAtt.TermLength() - 1); // remove extra '.' } else { typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]; } } else { typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[tokenType]; } return(true); } // When we skip a too-long term, we still increment the // position increment else { posIncr++; } } }
/* * (non-Javadoc) * * @see Lucene.Net.Analysis.TokenStream#next() */ public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); int posIncr = 1; while (true) { int tokenType = scanner.GetNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return(null); } if (scanner.Yylength() <= maxTokenLength) { reusableToken.Clear(); reusableToken.SetPositionIncrement(posIncr); scanner.GetText(reusableToken); int start = scanner.Yychar(); reusableToken.SetStartOffset(start); reusableToken.SetEndOffset(start + reusableToken.TermLength()); // This 'if' should be removed in the next release. For now, it converts // invalid acronyms to HOST. When removed, only the 'else' part should // remain. if (tokenType == StandardTokenizerImpl.ACRONYM_DEP) { if (replaceInvalidAcronym) { reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]); reusableToken.SetTermLength(reusableToken.TermLength() - 1); // remove extra '.' } else { reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]); } } else { reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]); } return(reusableToken); } // When we skip a too-long term, we still increment the // position increment else { posIncr++; } } }