public override bool IncrementToken() { if (input.IncrementToken()) { payloadAttr.SetPayload(new Payload(System.Text.UTF8Encoding.UTF8.GetBytes("pos: " + pos))); int posIncr; if (i % 2 == 1) { posIncr = 1; } else { posIncr = 0; } posIncrAttr.SetPositionIncrement(posIncr); pos += posIncr; // System.out.println("term=" + termAttr.term() + " pos=" + pos); i++; return(true); } else { return(false); } }
public override bool IncrementToken() { while (input.IncrementToken()) { if (termAtt.Term().Equals("the")) { // stopword, do nothing } else if (termAtt.Term().Equals("quick")) { posIncrAtt.SetPositionIncrement(2); return(true); } else { posIncrAtt.SetPositionIncrement(1); return(true); } } return(false); }
public override bool IncrementToken() { if (i == TOKENS.Length) { return(false); } ClearAttributes(); termAtt.SetTermBuffer(TOKENS[i]); offsetAtt.SetOffset(i, i); posIncrAtt.SetPositionIncrement(INCREMENTS[i]); i++; return(true); }
public override bool IncrementToken() { if (tokenUpto >= Enclosing_Instance.tokens.Length) { return(false); } else { TestToken testToken = Enclosing_Instance.tokens[tokenUpto++]; ClearAttributes(); termAtt.SetTermBuffer(testToken.text); offsetAtt.SetOffset(testToken.startOffset, testToken.endOffset); if (tokenUpto > 1) { posIncrAtt.SetPositionIncrement(testToken.pos - Enclosing_Instance.tokens[tokenUpto - 2].pos); } else { posIncrAtt.SetPositionIncrement(testToken.pos + 1); } return(true); } }
/* * (non-Javadoc) * * @see Lucene.Net.Analysis.TokenStream#next() */ public override bool IncrementToken() { ClearAttributes(); int posIncr = 1; while (true) { int tokenType = scanner.GetNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return(false); } if (scanner.Yylength() <= maxTokenLength) { posIncrAtt.SetPositionIncrement(posIncr); scanner.GetText(termAtt); int start = scanner.Yychar(); offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.TermLength())); // This 'if' should be removed in the next release. For now, it converts // invalid acronyms to HOST. When removed, only the 'else' part should // remain. if (tokenType == StandardTokenizerImpl.ACRONYM_DEP) { if (replaceInvalidAcronym) { typeAtt.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]); termAtt.SetTermLength(termAtt.TermLength() - 1); // remove extra '.' } else { typeAtt.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]); } } else { typeAtt.SetType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]); } return(true); } // When we skip a too-long term, we still increment the // position increment else { posIncr++; } } }
/// <summary> Returns the next input Token whose term() is not a stop word.</summary> public override bool IncrementToken() { // return the first non-stop word found int skippedPositions = 0; while (input.IncrementToken()) { if (!stopWords.Contains(termAtt.TermBuffer(), 0, termAtt.TermLength())) { if (enablePositionIncrements) { posIncrAtt.SetPositionIncrement(posIncrAtt.GetPositionIncrement() + skippedPositions); } return(true); } skippedPositions += posIncrAtt.GetPositionIncrement(); } // reached EOS -- return null return(false); }
public override bool IncrementToken() { if (Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken > 0) { termAtt.SetTermBuffer("multi" + (Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken + 1)); offsetAtt.SetOffset(prevStartOffset, prevEndOffset); typeAtt.SetType(prevType); posIncrAtt.SetPositionIncrement(0); Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken--; return(true); } else { bool next = input.IncrementToken(); if (next == false) { return(false); } prevType = typeAtt.Type(); prevStartOffset = offsetAtt.StartOffset(); prevEndOffset = offsetAtt.EndOffset(); System.String text = termAtt.Term(); if (text.Equals("triplemulti")) { Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken = 2; return(true); } else if (text.Equals("multi")) { Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken = 1; return(true); } else { return(true); } } }