Example #1
0
        public override sealed bool IncrementToken()
        {
            ClearAttributes();

            // termination of this loop is guaranteed by the fact that every iteration
            // either advances the buffer (calls consumes()) or increases gramSize
            while (true)
            {
                // compact
                if (bufferStart >= bufferEnd - maxGram - 1 && !exhausted)
                {
                    Array.Copy(buffer, bufferStart, buffer, 0, bufferEnd - bufferStart);
                    bufferEnd        -= bufferStart;
                    lastCheckedChar  -= bufferStart;
                    lastNonTokenChar -= bufferStart;
                    bufferStart       = 0;

                    // fill in remaining space
                    exhausted = !charUtils.Fill(charBuffer, m_input, buffer.Length - bufferEnd);
                    // convert to code points
                    bufferEnd += charUtils.ToCodePoints(charBuffer.Buffer, 0, charBuffer.Length, buffer, bufferEnd);
                }

                // should we go to the next offset?
                if (gramSize > maxGram || (bufferStart + gramSize) > bufferEnd)
                {
                    if (bufferStart + 1 + minGram > bufferEnd)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(exhausted);
                        }
                        return(false);
                    }
                    Consume();
                    gramSize = minGram;
                }

                UpdateLastNonTokenChar();

                // retry if the token to be emitted was going to not only contain token chars
                bool termContainsNonTokenChar         = lastNonTokenChar >= bufferStart && lastNonTokenChar < (bufferStart + gramSize);
                bool isEdgeAndPreviousCharIsTokenChar = edgesOnly && lastNonTokenChar != bufferStart - 1;
                if (termContainsNonTokenChar || isEdgeAndPreviousCharIsTokenChar)
                {
                    Consume();
                    gramSize = minGram;
                    continue;
                }

                int length = charUtils.ToChars(buffer, bufferStart, gramSize, termAtt.Buffer, 0);
                termAtt.Length = length;
                posIncAtt.PositionIncrement = 1;
                posLenAtt.PositionLength    = 1;
                offsetAtt.SetOffset(CorrectOffset(offset), CorrectOffset(offset + length));
                ++gramSize;
                return(true);
            }
        }