/// <summary> /// Returns the next token in the stream, or null at EOS. /// </summary> public override sealed bool IncrementToken() { while (true) { if (curTermBuffer == null) { if (!m_input.IncrementToken()) { return(false); } else { curTermBuffer = (char[])termAtt.Buffer.Clone(); curTermLength = termAtt.Length; curCodePointCount = charUtils.CodePointCount(termAtt.ToString()); curGramSize = minGram; curPos = 0; curPosInc = posIncAtt.PositionIncrement; curPosLen = posLenAtt.PositionLength; tokStart = offsetAtt.StartOffset; tokEnd = offsetAtt.EndOffset; // if length by start + end offsets doesn't match the term text then assume // this is a synonym and don't adjust the offsets. hasIllegalOffsets = (tokStart + curTermLength) != tokEnd; } } #pragma warning disable 612, 618 if (version.OnOrAfter(LuceneVersion.LUCENE_44)) #pragma warning restore 612, 618 { if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount) { ++curPos; curGramSize = minGram; } if ((curPos + curGramSize) <= curCodePointCount) { ClearAttributes(); int start = charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos); int end = charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize); termAtt.CopyBuffer(curTermBuffer, start, end - start); posIncAtt.PositionIncrement = curPosInc; curPosInc = 0; posLenAtt.PositionLength = curPosLen; offsetAtt.SetOffset(tokStart, tokEnd); curGramSize++; return(true); } } else { while (curGramSize <= maxGram) { while (curPos + curGramSize <= curTermLength) // while there is input { ClearAttributes(); termAtt.CopyBuffer(curTermBuffer, curPos, curGramSize); if (hasIllegalOffsets) { offsetAtt.SetOffset(tokStart, tokEnd); } else { offsetAtt.SetOffset(tokStart + curPos, tokStart + curPos + curGramSize); } curPos++; return(true); } curGramSize++; // increase n-gram size curPos = 0; } } curTermBuffer = null; } }
public override sealed bool IncrementToken() { while (true) { if (curTermBuffer == null) { if (!m_input.IncrementToken()) { return(false); } else { curTermBuffer = (char[])termAtt.Buffer.Clone(); curTermLength = termAtt.Length; curCodePointCount = charUtils.CodePointCount(termAtt); curGramSize = minGram; tokStart = offsetAtt.StartOffset; tokEnd = offsetAtt.EndOffset; #pragma warning disable 612, 618 if (version.OnOrAfter(LuceneVersion.LUCENE_44)) #pragma warning restore 612, 618 { // Never update offsets updateOffsets = false; } else { // if length by start + end offsets doesn't match the term text then assume // this is a synonym and don't adjust the offsets. updateOffsets = (tokStart + curTermLength) == tokEnd; } savePosIncr += posIncrAtt.PositionIncrement; savePosLen = posLenAtt.PositionLength; } } if (curGramSize <= maxGram) // if we have hit the end of our n-gram size range, quit { if (curGramSize <= curCodePointCount) // if the remaining input is too short, we can't generate any n-grams { // grab gramSize chars from front or back int start = side == Side.FRONT ? 0 : charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, curTermLength, -curGramSize); int end = charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize); ClearAttributes(); if (updateOffsets) { offsetAtt.SetOffset(tokStart + start, tokStart + end); } else { offsetAtt.SetOffset(tokStart, tokEnd); } // first ngram gets increment, others don't if (curGramSize == minGram) { posIncrAtt.PositionIncrement = savePosIncr; savePosIncr = 0; } else { posIncrAtt.PositionIncrement = 0; } posLenAtt.PositionLength = savePosLen; termAtt.CopyBuffer(curTermBuffer, start, end - start); curGramSize++; return(true); } } curTermBuffer = null; } }