/** Returns the next token in the stream, or null at EOS. */ public override bool IncrementToken() { while (true) { if (_curTermBuffer == null) { if (!input.IncrementToken()) { return(false); } else { _curTermBuffer = (char[])_termAtt.TermBuffer().Clone(); _curTermLength = _termAtt.TermLength(); _curGramSize = _minGram; _curPos = 0; _tokStart = _offsetAtt.StartOffset; } } while (_curGramSize <= _maxGram) { while (_curPos + _curGramSize <= _curTermLength) { // while there is input ClearAttributes(); _termAtt.SetTermBuffer(_curTermBuffer, _curPos, _curGramSize); _offsetAtt.SetOffset(_tokStart + _curPos, _tokStart + _curPos + _curGramSize); _curPos++; return(true); } _curGramSize++; // increase n-gram size _curPos = 0; } _curTermBuffer = null; } }
/** Returns the next token in the stream, or null at EOS. */ public override bool IncrementToken() { ClearAttributes(); if (!_mStarted) { _mStarted = true; _mCurrentGramSize = _mGramSize; var chars = new char[1024]; var read = input.Read(chars, 0, chars.Length); _mInStr = new string(chars, 0, read).Trim(); // remove any trailing empty strings _mInLen = _mInStr.Length; } if (_mPos + _mCurrentGramSize > _mInLen) { _mPos = 0; _mCurrentGramSize++; // increase n-gram size if (_mCurrentGramSize > _mGramSize) { return(false); } if (_mPos + _mGramSize > _mInLen) { return(false); } } var oldPos = _mPos; _mPos++; _mTermAtt.SetTermBuffer(_mInStr, oldPos, _mCurrentGramSize); _mOffsetAtt.SetOffset(CorrectOffset(oldPos), CorrectOffset(oldPos + _mCurrentGramSize)); return(true); }
public override bool IncrementToken() { if (input.IncrementToken()) { string lemmatized = lemmatizer.lemmatize(_termAtt.Term); _termAtt.SetTermBuffer(lemmatized); return(true); } return(false); }
public override bool IncrementToken() { if (!GetNextPartialSnippet()) { return(false); } ClearAttributes(); termAtt.SetTermBuffer(snippet, startTerm, lenTerm); offsetAtt.SetOffset(CorrectOffset(startOffset), CorrectOffset(startOffset + lenTerm)); return(true); }
public override bool IncrementToken() { //if (_termAtt.Term.Length <= 2) // return false; if (input.IncrementToken()) { string normalized = _normalizer.Normalize(_termAtt.Term, _termAtt.TermLength()); _termAtt.SetTermBuffer(normalized); return(true); } return(false); }