Example #1
0
 /** Returns the next token in the stream, or null at EOS. */
 public override bool IncrementToken()
 {
     while (true)
     {
         if (_curTermBuffer == null)
         {
             if (!input.IncrementToken())
             {
                 return(false);
             }
             else
             {
                 _curTermBuffer = (char[])_termAtt.TermBuffer().Clone();
                 _curTermLength = _termAtt.TermLength();
                 _curGramSize   = _minGram;
                 _curPos        = 0;
                 _tokStart      = _offsetAtt.StartOffset;
             }
         }
         while (_curGramSize <= _maxGram)
         {
             while (_curPos + _curGramSize <= _curTermLength)
             {     // while there is input
                 ClearAttributes();
                 _termAtt.SetTermBuffer(_curTermBuffer, _curPos, _curGramSize);
                 _offsetAtt.SetOffset(_tokStart + _curPos, _tokStart + _curPos + _curGramSize);
                 _curPos++;
                 return(true);
             }
             _curGramSize++;                         // increase n-gram size
             _curPos = 0;
         }
         _curTermBuffer = null;
     }
 }
Example #2
0
        /** Returns the next token in the stream, or null at EOS. */

        public override bool IncrementToken()
        {
            ClearAttributes();
            if (!_mStarted)
            {
                _mStarted         = true;
                _mCurrentGramSize = _mGramSize;
                var chars = new char[1024];
                var read  = input.Read(chars, 0, chars.Length);
                _mInStr = new string(chars, 0, read).Trim(); // remove any trailing empty strings
                _mInLen = _mInStr.Length;
            }

            if (_mPos + _mCurrentGramSize > _mInLen)
            {
                _mPos = 0;
                _mCurrentGramSize++; // increase n-gram size
                if (_mCurrentGramSize > _mGramSize)
                {
                    return(false);
                }
                if (_mPos + _mGramSize > _mInLen)
                {
                    return(false);
                }
            }

            var oldPos = _mPos;

            _mPos++;
            _mTermAtt.SetTermBuffer(_mInStr, oldPos, _mCurrentGramSize);
            _mOffsetAtt.SetOffset(CorrectOffset(oldPos), CorrectOffset(oldPos + _mCurrentGramSize));

            return(true);
        }
 public override bool IncrementToken()
 {
     if (input.IncrementToken())
     {
         string lemmatized = lemmatizer.lemmatize(_termAtt.Term);
         _termAtt.SetTermBuffer(lemmatized);
         return(true);
     }
     return(false);
 }
 public override bool IncrementToken()
 {
     if (!GetNextPartialSnippet())
     {
         return(false);
     }
     ClearAttributes();
     termAtt.SetTermBuffer(snippet, startTerm, lenTerm);
     offsetAtt.SetOffset(CorrectOffset(startOffset), CorrectOffset(startOffset + lenTerm));
     return(true);
 }
Example #5
0
 public override bool IncrementToken()
 {
     //if (_termAtt.Term.Length <= 2)
     //    return false;
     if (input.IncrementToken())
     {
         string normalized = _normalizer.Normalize(_termAtt.Term, _termAtt.TermLength());
         _termAtt.SetTermBuffer(normalized);
         return(true);
     }
     return(false);
 }