public override bool IncrementToken()
        {
            ClearAttributes();
            int length = 0;
            int start  = bufferIndex;

            char[] buffer = termAtt.TermBuffer();
            while (true)
            {
                if (bufferIndex >= dataLen)
                {
                    offset += dataLen;
                    dataLen = input.Read(ioBuffer, 0, ioBuffer.Length);
                    if (dataLen <= 0)
                    {
                        dataLen = 0;                         // so next offset += dataLen won't decrement offset
                        if (length > 0)
                        {
                            break;
                        }
                        return(false);
                    }
                    bufferIndex = 0;
                }

                char c = ioBuffer[bufferIndex++];

                if (IsTokenChar(c))
                {
                    // if it's a token char

                    if (length == 0)
                    {
                        // start of token
                        start = offset + bufferIndex - 1;
                    }
                    else if (length == buffer.Length)
                    {
                        buffer = termAtt.ResizeTermBuffer(1 + length);
                    }

                    buffer[length++] = Normalize(c);                     // buffer it, normalized

                    if (length == MAX_WORD_LEN)
                    {
                        // buffer overflow!
                        break;
                    }
                }
                else if (length > 0)
                {
                    // at non-Letter w/ chars
                    break;                     // return 'em
                }
            }

            termAtt.SetTermLength(length);
            offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));
            return(true);
        }
Пример #2
0
		private void  Init(int bufferSize)
		{
			this.done = false;
            termAtt = AddAttribute<ITermAttribute>();
            offsetAtt = AddAttribute<IOffsetAttribute>();
			termAtt.ResizeTermBuffer(bufferSize);
		}
Пример #3
0
 public override bool IncrementToken()
 {
     if (!done)
     {
         ClearAttributes();
         done = true;
         int    upto   = 0;
         char[] buffer = termAtt.TermBuffer();
         while (true)
         {
             int length = input.Read(buffer, upto, buffer.Length - upto);
             if (length == 0)
             {
                 break;
             }
             upto += length;
             if (upto == buffer.Length)
             {
                 buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
             }
         }
         termAtt.SetTermLength(upto);
         finalOffset = CorrectOffset(upto);
         offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
         return(true);
     }
     return(false);
 }
Пример #4
0
 private void  Init(int bufferSize)
 {
     this.done = false;
     termAtt   = AddAttribute <ITermAttribute>();
     offsetAtt = AddAttribute <IOffsetAttribute>();
     termAtt.ResizeTermBuffer(bufferSize);
 }
Пример #5
0
        public override bool IncrementToken()
        {
            if (!input.IncrementToken())
            {
                // reached EOS -- return null
                return(false);
            }

            if (suffixByTokenType == null)
            {
                return(true);
            }

            char[] suffix;
            if (!suffixByTokenType.TryGetValue(typeAtt.Type, out suffix))
            {
                return(true);
            }

            char[] buffer = termAtt.TermBuffer();
            int    length = termAtt.TermLength();

            if (buffer.Length <= length)
            {
                buffer = termAtt.ResizeTermBuffer(length + suffix.Length);
            }

            Array.Copy(suffix, 0, buffer, length, suffix.Length);
            termAtt.SetTermLength(length + suffix.Length);

            return(true);
        }
Пример #6
0
        public override bool IncrementToken()
        {
            ClearAttributes();

            int length = 0;
            int start  = _bufferIndex;

            char[] buffer = _termAtt.TermBuffer();
            while (true)
            {
                if (_bufferIndex >= _dataLen)
                {
                    _offset += _dataLen;
                    _dataLen = input.Read(_ioBuffer, 0, _ioBuffer.Length);
                    if (_dataLen <= 0)
                    {
                        _dataLen = 0; // so next offset += dataLen won't decrement offset
                        if (length > 0)
                        {
                            break;
                        }

                        return(false);
                    }
                    _bufferIndex = 0;
                }

                char c = _ioBuffer[_bufferIndex++];

                if (Helper.IsTokenChar(c))
                {
                    // if it's a token char

                    if (length == 0)
                    {
                        // start of token
                        start = _offset + _bufferIndex - 1;
                    }
                    else if (length == buffer.Length)
                    {
                        buffer = _termAtt.ResizeTermBuffer(1 + length);
                    }

                    buffer[length++] = Helper.Normalize(c); // buffer it, normalized
                }
                else if (length > 0)
                {
                    // at non-Letter w/ chars
                    break; // return 'em
                }
            }

            _termAtt.SetTermLength(length);
            _offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));

            return(true);
        }
Пример #7
0
 public override bool IncrementToken()
 {
     if (input.IncrementToken())
     {
         int len = termAtt.TermLength();
         if (marker != NOMARKER)
         {
             len++;
             termAtt.ResizeTermBuffer(len);
             termAtt.TermBuffer()[len - 1] = marker;
         }
         Reverse(termAtt.TermBuffer(), len);
         termAtt.SetTermLength(len);
         return(true);
     }
     else
     {
         return(false);
     }
 }