//------------------------------------------------------------------------- // Extract new token from the input stream. Return EndOfStream token again // if this token has already been reported. //------------------------------------------------------------------------- public virtual Word getNextWord() { int NumberOfSpecials; CurrentWord.ClearMarks(); bool hasUpperCaseChars = false; if (CurrentWord.Tag != Word.TokenType.eoEOS) { SkipWhitespace(); if (iOffset < BufferLength) { iEndOffset = FindRightBorderOfToken(out NumberOfSpecials, out hasUpperCaseChars); CleanToken(ref NumberOfSpecials); string token = strBuffer.Substring(iOffset, iEndOffset - iOffset + 1); uint ValidOffset = (uint)(iOffset + iShiftOffset) % MaximalOffset; CurrentWord.Init(token, ValidOffset, iSentenceNumber, iTokenNumber++, Word.TokenType.eoUndef); CurrentWord.SelfIdentifyType(NumberOfSpecials); iOffset += token.Length; iTokenNumber = (ushort)(iTokenNumber % ushort.MaxValue); } else { CurrentWord.Tag = Word.TokenType.eoEOS; CurrentWord.SentenceNumber = iSentenceNumber; } } if (hasUpperCaseChars) { CurrentWord.NormalizeCase(); } return(CurrentWord); }