public override bool IncrementToken() { ClearAttributes(); int length = 0; int start = -1; // this variable is always initialized int end_Renamed = -1; char[] buffer = termAtt.Buffer(); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; charUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils if (ioBuffer.Length == 0) { dataLen = 0; // so next offset += dataLen won't decrement offset if (length > 0) { break; } else { finalOffset = CorrectOffset(offset); return(false); } } dataLen = ioBuffer.Length; bufferIndex = 0; } // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone int c = charUtils.codePointAt(ioBuffer.Buffer, bufferIndex, ioBuffer.Length); int charCount = Character.CharCount(c); bufferIndex += charCount; if (isTokenChar(c)) // if it's a token char { if (length == 0) // start of token { Debug.Assert(start == -1); start = offset + bufferIndex - charCount; end_Renamed = start; } // check if a supplementary could run out of bounds else if (length >= buffer.Length - 1) { buffer = termAtt.ResizeBuffer(2 + length); // make sure a supplementary fits in the buffer } end_Renamed += charCount; length += Character.ToChars(Normalize(c), buffer, length); // buffer it, normalized if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test { break; } } // at non-Letter w/ chars else if (length > 0) { break; // return 'em } } termAtt.Length = length; Debug.Assert(start != -1); offsetAtt.SetOffset(CorrectOffset(start), finalOffset = CorrectOffset(end_Renamed)); return(true); }