Пример #1
0
        public override bool IncrementToken()
        {
            ClearAttributes();
            int length      = 0;
            int start       = -1; // this variable is always initialized
            int end_Renamed = -1;

            char[] buffer = termAtt.Buffer();
            while (true)
            {
                if (bufferIndex >= dataLen)
                {
                    offset += dataLen;
                    charUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
                    if (ioBuffer.Length == 0)
                    {
                        dataLen = 0; // so next offset += dataLen won't decrement offset
                        if (length > 0)
                        {
                            break;
                        }
                        else
                        {
                            finalOffset = CorrectOffset(offset);
                            return(false);
                        }
                    }
                    dataLen     = ioBuffer.Length;
                    bufferIndex = 0;
                }
                // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone
                int c         = charUtils.codePointAt(ioBuffer.Buffer, bufferIndex, ioBuffer.Length);
                int charCount = Character.CharCount(c);
                bufferIndex += charCount;

                if (isTokenChar(c))  // if it's a token char
                {
                    if (length == 0) // start of token
                    {
                        Debug.Assert(start == -1);
                        start       = offset + bufferIndex - charCount;
                        end_Renamed = start;
                    } // check if a supplementary could run out of bounds
                    else if (length >= buffer.Length - 1)
                    {
                        buffer = termAtt.ResizeBuffer(2 + length); // make sure a supplementary fits in the buffer
                    }
                    end_Renamed += charCount;
                    length      += Character.ToChars(Normalize(c), buffer, length); // buffer it, normalized
                    if (length >= MAX_WORD_LEN)                                     // buffer overflow! make sure to check for >= surrogate pair could break == test
                    {
                        break;
                    }
                } // at non-Letter w/ chars
                else if (length > 0)
                {
                    break; // return 'em
                }
            }

            termAtt.Length = length;
            Debug.Assert(start != -1);
            offsetAtt.SetOffset(CorrectOffset(start), finalOffset = CorrectOffset(end_Renamed));
            return(true);
        }