Read() public method

public Read ( ) : bool
return bool
示例#1
0
        public bool Read()
        {
            _nodeType       = HtmlNodeType.None;
            _name.Length    = 0;
            _value.Length   = 0;
            _isEmptyElement = false;

            var attrName  = new StringBuilder();
            var attrValue = new StringBuilder();

            var           quoteStyle    = '"';
            var           customDoctype = false;
            StringBuilder entity        = null;

            while (_reader.Read())
            {
                char c = _reader.Current;

                switch (_state)
                {
                case State.Text:
                    if (c == '&')
                    {
                        entity = new StringBuilder();
                        _state = State.Amp;
                    }
                    else if (c == '<')
                    {
                        _state = State.Lt;
                        if (_value.Length > 0)
                        {
                            _nodeType = HtmlNodeType.Text;
                            return(true);
                        }
                    }
                    else
                    {
                        _value.Append(c);
                    }
                    break;

                case State.Amp:
                    if (c == ';')
                    {
                        _state = State.Text;
                        if (entity.Length > 0)
                        {
                            _value.Append(DecodeEntity("&" + entity + ";"));
                        }
                        else
                        {
                            _value.Append("&");
                            _value.Append(";");
                        }
                    }
                    else if (c == '#' && entity.Length == 0)
                    {
                        entity.Append(c);
                    }
                    else if (Char.IsLetterOrDigit(c))
                    {
                        entity.Append(c);
                    }
                    else
                    {
                        _state = State.Text;
                        _reader.Push(c);
                        if (entity.Length > 0)
                        {
                            _value.Append(DecodeEntity("&" + entity + ";"));
                        }
                        else
                        {
                            _value.Append("&");
                        }
                        entity = null;
                    }
                    break;

                case State.Lt:
                    if (c == '/')
                    {
                        _state = State.ElemClose;
                    }
                    else if (c == '?' && _reader.Match("xml"))
                    {
                        _state = State.XmlDeclaration;
                        _reader.Read(3);
                    }
                    else if (c == '?')
                    {
                        _state = State.Pi;
                    }
                    else if (c == '!' && _reader.Match("--"))
                    {
                        _reader.Read(2);
                        _state = State.Comment;
                    }
                    else if (c == '!' && _reader.Match("[CDATA["))
                    {
                        _reader.Read(7);
                        _state = State.CData;
                    }
                    else if (c == '!' && _reader.Match("DOCTYPE"))
                    {
                        _reader.Read(7);
                        _state = State.DocType;
                    }
                    else if (!Char.IsLetter(c))
                    {
                        _state = State.Text;
                        _value.Append('<');
                        _value.Append(c);
                    }
                    else
                    {
                        _attributes = new StringDictionary();
                        _state      = State.ElemName;
                        _name.Append(c);
                    }
                    break;

                case State.ElemName:
                    if (Char.IsWhiteSpace(c))
                    {
                        _state = State.ElemAttributes;
                    }
                    else if (c == '/')
                    {
                        _isEmptyElement = true;
                        _state          = State.ElemSingle;
                    }
                    else if (c == '>')
                    {
                        _state    = State.Text;
                        _nodeType = HtmlNodeType.Element;
                        return(true);
                    }
                    else
                    {
                        _name.Append(c);
                    }
                    break;

                case State.ElemClose:
                    if (c == '>')
                    {
                        _state    = State.Text;
                        _nodeType = HtmlNodeType.EndElement;
                        return(true);
                    }
                    _name.Append(c);
                    break;

                case State.ElemSingle:
                    if (c == '>')
                    {
                        _state    = State.Text;
                        _nodeType = HtmlNodeType.Element;
                        return(true);
                    }
                    _state        = State.Text;
                    _nodeType     = HtmlNodeType.None;
                    _name.Length  = 0;
                    _value.Length = 0;
                    _value.Append(c);
                    break;

                case State.ElemAttributes:
                    if (c == '>')
                    {
                        _state    = State.Text;
                        _nodeType = HtmlNodeType.Element;
                        return(true);
                    }
                    else if (c == '/')
                    {
                        _isEmptyElement = true;
                        _state          = State.ElemSingle;
                    }
                    else if (Char.IsWhiteSpace(c))
                    {
                    }
                    else
                    {
                        _state = State.AttrKey;
                        attrName.Append(c);
                    }
                    break;

                case State.Comment:
                    if (c == '-' && _reader.Match("->"))
                    {
                        _reader.Read(2);
                        _state    = State.Text;
                        _nodeType = HtmlNodeType.Comment;
                        return(true);
                    }
                    _value.Append(c);
                    break;

                case State.CData:
                    if (c == ']' && _reader.Match("]>"))
                    {
                        _reader.Read(2);
                        _state    = State.Text;
                        _nodeType = HtmlNodeType.CDATA;
                        return(true);
                    }
                    _value.Append(c);
                    break;

                case State.XmlDeclaration:
                    if (c == '?' && _reader.Match(">"))
                    {
                        _reader.Read(1);
                        _state    = State.Text;
                        _nodeType = HtmlNodeType.XmlDeclaration;
                        return(true);
                    }
                    _value.Append(c);
                    break;

                case State.DocType:
                    if (c == '[')
                    {
                        customDoctype = true;
                    }
                    else
                    {
                        if (customDoctype)
                        {
                            if (c == ']' && _reader.Match(">"))
                            {
                                _reader.Read(1);
                                _state    = State.Text;
                                _nodeType = HtmlNodeType.DocumentType;
                                return(true);
                            }
                            _value.Append(c);
                        }
                        else
                        {
                            if (c == '>')
                            {
                                _state    = State.Text;
                                _nodeType = HtmlNodeType.DocumentType;
                                return(true);
                            }
                            _name.Append(c);
                        }
                    }

                    break;

                case State.Pi:
                    if (c == '?' && _reader.Match(">"))
                    {
                        _reader.Read(1);
                        _state    = State.Text;
                        _nodeType = HtmlNodeType.ProcessingInstruction;
                        return(true);
                    }
                    if (Char.IsWhiteSpace(c))
                    {
                        _state = State.PiValue;
                    }
                    else
                    {
                        _name.Append(c);
                    }
                    break;

                case State.PiValue:
                    if (c == '?' && _reader.Match(">"))
                    {
                        _reader.Read(1);
                        _state    = State.Text;
                        _nodeType = HtmlNodeType.ProcessingInstruction;
                        return(true);
                    }
                    _value.Append(c);
                    break;

                case State.AttrKey:
                    if (Char.IsWhiteSpace(c))
                    {
                        _state = State.AttrEq;
                    }
                    else if (c == '=')
                    {
                        _state = State.AttrValue;
                    }
                    else if (c == '>')
                    {
                        _attributes[attrName.ToString()] = null;
                        _state = State.ElemAttributes;
                        _reader.Push(c);
                        attrName.Length  = 0;
                        attrValue.Length = 0;
                    }
                    else
                    {
                        attrName.Append(c);
                    }
                    break;

                case State.AttrEq:
                    if (Char.IsWhiteSpace(c))
                    {
                    }
                    else if (c == '=')
                    {
                        _state = State.AttrValue;
                    }
                    else
                    {
                        _attributes[attrName.ToString()] = null;
                        _state = State.ElemAttributes;
                        _reader.Push(c);
                        attrName.Length  = 0;
                        attrValue.Length = 0;
                    }
                    break;

                case State.AttrValue:
                    if (Char.IsWhiteSpace(c))
                    {
                    }
                    else if (c == '"' || c == '\'')
                    {
                        quoteStyle = c;
                        _state     = State.AttrQuote;
                    }
                    else
                    {
                        quoteStyle = ' ';
                        _state     = State.AttrQuote;
                        attrValue.Append(c);
                    }
                    break;

                case State.AttrQuote:
                    if (c == quoteStyle || (' ' == quoteStyle && c == '>'))
                    {
                        _attributes[attrName.ToString()] = HttpUtility.HtmlDecode(attrValue.ToString());
                        _state = State.ElemAttributes;
                        if (' ' == quoteStyle && c == '>')
                        {
                            _reader.Push(c);
                        }
                        attrName.Length  = 0;
                        attrValue.Length = 0;
                    }
                    else
                    {
                        attrValue.Append(c);
                    }
                    break;
                }
            }

            switch (_state)
            {
            case State.Text:
                _state = 0;
                if (_value.Length > 0)
                {
                    _nodeType = HtmlNodeType.Text;
                    return(true);
                }
                return(false);

            case State.Amp:
                _state = 0;
                _value.Append('&');
                _nodeType = HtmlNodeType.Text;
                return(true);

            case State.Lt:
                _state = 0;
                _value.Append('<');
                _nodeType = HtmlNodeType.Text;
                return(true);
            }

            return(false);
        }