コード例 #1
0
 private void CheckForNextWordPastEnd()
 {
     if (_nextWord != null && _nextWord.Offset >= _endAt)
     {
         _nextWord = null;
     }
 }
コード例 #2
0
            public TextWithOffsetAndLen Next()
            {
                while (true)
                {
                    // No chunks left.
                    if (_curr == null)
                    {
                        return(null);
                    }

                    // Advance until we get to the next potential start of word.
                    // Note that this may not turn out to be an actual word, e.g.
                    // if it is all numbers.
                    AdvanceUntilWordStart();

                    if (EOS())                      // Reached end of this chunk
                    {
                        _offset = 0;
                        _curr   = _src.Next();
                        continue;                          // Try again with new chunk (or null, in which case we exit)
                    }

                    // Move to the end of the word.  Note that BoundaryWordBreak
                    // characters may not end the word.  For example, for the
                    // string "'that's'" (including single quotes), the word is
                    // "that's" (note outer single quotes dropped).
                    int start     = _offset;
                    int endOfWord = _offset;
                    do
                    {
                        int       charsToConsume;
                        CharClass charClass = ClassifyChar(_curr.Text, _offset, out charsToConsume);
                        if (Test(charClass, CharClass.Break))
                        {
                            break;
                        }
                        _offset += charsToConsume;
                        if (Test(charClass, CharClass.IncludedBreakChar))
                        {
                            endOfWord = _offset;
                            break;
                        }
                        if (Test(charClass, CharClass.LetterOrNumber))
                        {
                            endOfWord = _offset;
                        }
                    } while (!EOS());

                    string substring = _curr.Text.Substring(start, endOfWord - start);
                    if (substring.Length > 0)
                    {
                        return(new TextWithOffsetAndLen(
                                   HtmlUtils.UnEscapeEntities(substring, HtmlUtils.UnEscapeMode.NonMarkupText),
                                   _curr.Offset + start,
                                   substring.Length
                                   ));
                    }
                }
            }
コード例 #3
0
        private void AdvanceToStart()
        {
            while (null != (_nextWord = _src.Next())   // not at EOD
                && (_nextWord.Offset + _nextWord.Len <= _startAt))   // word is entirely before startAt
            {
            }

            CheckForNextWordPastEnd();
        }
コード例 #4
0
        private void AdvanceToStart()
        {
            while (null != (_nextWord = _src.Next()) &&            // not at EOD
                   (_nextWord.Offset + _nextWord.Len <= _startAt)) // word is entirely before startAt
            {
            }

            CheckForNextWordPastEnd();
        }
コード例 #5
0
 public void Next()
 {
     _currentWord = _nextWord;
     _nextWord = _src.Next();
     CheckForNextWordPastEnd();
 }
コード例 #6
0
 private void CheckForNextWordPastEnd()
 {
     if (_nextWord != null && _nextWord.Offset >= _endAt)
         _nextWord = null;
 }
コード例 #7
0
            public TextWithOffsetAndLen Next()
            {
                while (true)
                {
                    // No chunks left.
                    if (_curr == null)
                        return null;

                    // Advance until we get to the next potential start of word.
                    // Note that this may not turn out to be an actual word, e.g.
                    // if it is all numbers.
                    AdvanceUntilWordStart();

                    if (EOS())  // Reached end of this chunk
                    {
                        _offset = 0;
                        _curr = _src.Next();
                        continue;  // Try again with new chunk (or null, in which case we exit)
                    }

                    // Move to the end of the word.  Note that BoundaryWordBreak
                    // characters may not end the word.  For example, for the
                    // string "'that's'" (including single quotes), the word is
                    // "that's" (note outer single quotes dropped).
                    int start = _offset;
                    int endOfWord = _offset;
                    do
                    {
                        int charsToConsume;
                        CharClass charClass = ClassifyChar(_curr.Text, _offset, out charsToConsume);
                        if (Test(charClass, CharClass.Break))
                            break;
                        _offset += charsToConsume;
                        if (Test(charClass, CharClass.IncludedBreakChar))
                        {
                            endOfWord = _offset;
                            break;
                        }
                        if (Test(charClass, CharClass.LetterOrNumber))
                            endOfWord = _offset;
                    } while (!EOS());

                    string substring = _curr.Text.Substring(start, endOfWord - start);
                    if (substring.Length > 0)
                    {
                        return new TextWithOffsetAndLen(
                            HtmlUtils.UnEscapeEntities(substring, HtmlUtils.UnEscapeMode.NonMarkupText),
                            _curr.Offset + start,
                            substring.Length
                            );
                    }
                }
            }
コード例 #8
0
 public WordSource(HtmlTextSource src)
 {
     this._src = src;
     this._curr = src.Next();
 }
コード例 #9
0
 public void Next()
 {
     _currentWord = _nextWord;
     _nextWord    = _src.Next();
     CheckForNextWordPastEnd();
 }
コード例 #10
0
 public WordSource(HtmlTextSource src)
 {
     this._src  = src;
     this._curr = src.Next();
 }