private void CheckForNextWordPastEnd() { if (_nextWord != null && _nextWord.Offset >= _endAt) { _nextWord = null; } }
public TextWithOffsetAndLen Next() { while (true) { // No chunks left. if (_curr == null) { return(null); } // Advance until we get to the next potential start of word. // Note that this may not turn out to be an actual word, e.g. // if it is all numbers. AdvanceUntilWordStart(); if (EOS()) // Reached end of this chunk { _offset = 0; _curr = _src.Next(); continue; // Try again with new chunk (or null, in which case we exit) } // Move to the end of the word. Note that BoundaryWordBreak // characters may not end the word. For example, for the // string "'that's'" (including single quotes), the word is // "that's" (note outer single quotes dropped). int start = _offset; int endOfWord = _offset; do { int charsToConsume; CharClass charClass = ClassifyChar(_curr.Text, _offset, out charsToConsume); if (Test(charClass, CharClass.Break)) { break; } _offset += charsToConsume; if (Test(charClass, CharClass.IncludedBreakChar)) { endOfWord = _offset; break; } if (Test(charClass, CharClass.LetterOrNumber)) { endOfWord = _offset; } } while (!EOS()); string substring = _curr.Text.Substring(start, endOfWord - start); if (substring.Length > 0) { return(new TextWithOffsetAndLen( HtmlUtils.UnEscapeEntities(substring, HtmlUtils.UnEscapeMode.NonMarkupText), _curr.Offset + start, substring.Length )); } } }
private void AdvanceToStart() { while (null != (_nextWord = _src.Next()) // not at EOD && (_nextWord.Offset + _nextWord.Len <= _startAt)) // word is entirely before startAt { } CheckForNextWordPastEnd(); }
private void AdvanceToStart() { while (null != (_nextWord = _src.Next()) && // not at EOD (_nextWord.Offset + _nextWord.Len <= _startAt)) // word is entirely before startAt { } CheckForNextWordPastEnd(); }
public void Next() { _currentWord = _nextWord; _nextWord = _src.Next(); CheckForNextWordPastEnd(); }
private void CheckForNextWordPastEnd() { if (_nextWord != null && _nextWord.Offset >= _endAt) _nextWord = null; }
public TextWithOffsetAndLen Next() { while (true) { // No chunks left. if (_curr == null) return null; // Advance until we get to the next potential start of word. // Note that this may not turn out to be an actual word, e.g. // if it is all numbers. AdvanceUntilWordStart(); if (EOS()) // Reached end of this chunk { _offset = 0; _curr = _src.Next(); continue; // Try again with new chunk (or null, in which case we exit) } // Move to the end of the word. Note that BoundaryWordBreak // characters may not end the word. For example, for the // string "'that's'" (including single quotes), the word is // "that's" (note outer single quotes dropped). int start = _offset; int endOfWord = _offset; do { int charsToConsume; CharClass charClass = ClassifyChar(_curr.Text, _offset, out charsToConsume); if (Test(charClass, CharClass.Break)) break; _offset += charsToConsume; if (Test(charClass, CharClass.IncludedBreakChar)) { endOfWord = _offset; break; } if (Test(charClass, CharClass.LetterOrNumber)) endOfWord = _offset; } while (!EOS()); string substring = _curr.Text.Substring(start, endOfWord - start); if (substring.Length > 0) { return new TextWithOffsetAndLen( HtmlUtils.UnEscapeEntities(substring, HtmlUtils.UnEscapeMode.NonMarkupText), _curr.Offset + start, substring.Length ); } } }
public WordSource(HtmlTextSource src) { this._src = src; this._curr = src.Next(); }