Example #1
0
        private void AttributeValue(char quote)
        {
            _attributeValueStart = Position();

            while (true)
            {
                switch (Current())
                {
                case '\0':
                    _type = HtmlTokenType.Comment;
                    Consume();
                    return;

                case char c when c == quote:
                    _attributeValueEnd = Position();
                    Consume();
                    _attributeEnd = Position();
                    AddAttribute();
                    BeforeAttributeName();
                    return;

                default:
                    Consume();
                    break;
                }
            }
        }
Example #2
0
        private void AddAttribute()
        {
            if (_attributeNameEnd.Index == _attributeNameStart.Index)
            {
                return;
            }

            _attributes ??= ArrayPool <HtmlAttribute> .Shared.Rent(4);

            // Grow array as needed
            if (_attributesLength == _attributes.Length)
            {
                var newAttributes = ArrayPool <HtmlAttribute> .Shared.Rent(_attributes.Length * 2);

                Array.Copy(_attributes, 0, newAttributes, 0, _attributes.Length);
                ArrayPool <HtmlAttribute> .Shared.Return(_attributes);

                _attributes = newAttributes;
            }

            _attributes[_attributesLength++] = new HtmlAttribute(
                _attributeType,
                _html.AsMemory(_attributeNameStart.Index, _attributeNameEnd.Index - _attributeNameStart.Index),
                _html.AsMemory(_attributeValueStart.Index, _attributeValueEnd.Index - _attributeValueStart.Index),
                _html.AsMemory(_attributeStart.Index, _attributeEnd.Index - _attributeStart.Index),
                new HtmlTextRange(_attributeStart, _attributeEnd),
                new HtmlTextRange(_attributeNameStart, _attributeNameEnd),
                new HtmlTextRange(_attributeValueStart, _attributeValueEnd));

            _attributeNameStart = _attributeNameEnd = default;
        }
Example #3
0
        private void EndTagOpen()
        {
            _type = HtmlTokenType.EndTag;

            switch (Current())
            {
            case '>':
                Consume();
                _nameEnd = _nameStart;
                break;

            case '\0':
                Consume();
                _type = HtmlTokenType.Text;
                break;

            case char c when IsASCIIAlpha(c):
                TagName(readAttributes: false);

                break;

            default:
                Consume();
                BogusComment();
                break;
            }
        }
Example #4
0
        private void AfterAttributeName()
        {
            while (true)
            {
                switch (Current())
                {
                case '\0':
                    _type = HtmlTokenType.Comment;
                    Consume();
                    return;

                case '>':
                    if (_attributeEnd.Index == _attributeStart.Index)
                    {
                        _attributeEnd = Position();
                    }
                    Consume();
                    AddAttribute();
                    return;

                case '/':
                    if (_attributeEnd.Index == _attributeStart.Index)
                    {
                        _attributeEnd = Position();
                    }
                    Consume();
                    AddAttribute();
                    SelfClosingStartTag();
                    return;

                case '=':
                    Consume();
                    BeforeAttributeValue();
                    return;

                case '\t':
                case '\r':
                case '\n':
                case '\f':
                case ' ':
                    if (_attributeEnd.Index == _attributeStart.Index)
                    {
                        _attributeEnd = Position();
                    }
                    Consume();
                    break;

                default:
                    AddAttribute();
                    AttributeName();
                    return;
                }
            }
        }
Example #5
0
        private void TagName(bool readAttributes)
        {
            _nameStart = _nameEnd = Position();

            while (true)
            {
                switch (Current())
                {
                case '\0':
                    _type = HtmlTokenType.Comment;
                    Consume();
                    return;

                case '>':
                    if (_nameEnd.Index == _nameStart.Index)
                    {
                        _nameEnd = Position();
                    }
                    Consume();
                    return;

                case '/':
                    if (_nameEnd.Index == _nameStart.Index)
                    {
                        _nameEnd = Position();
                    }
                    Consume();
                    SelfClosingStartTag();
                    return;

                case '\t':
                case '\r':
                case '\n':
                case '\f':
                case ' ':
                    if (_nameEnd.Index == _nameStart.Index)
                    {
                        _nameEnd = Position();
                    }
                    Consume();
                    if (readAttributes)
                    {
                        BeforeAttributeName();
                        return;
                    }
                    break;

                default:
                    Consume();
                    break;
                }
            }
        }
Example #6
0
        public bool Read(out HtmlToken token)
        {
            // Simplified parsing algorithm based on https://html.spec.whatwg.org/multipage/parsing.html
            _attributesLength = 0;
            _tokenStart       = _tokenEnd = Position();
            _nameStart        = _nameEnd = default;
            _isSelfClosing    = false;

            if (_position >= _length)
            {
                if (_attributes != null)
                {
                    ArrayPool <HtmlAttribute> .Shared.Return(_attributes);

                    _attributes = null;
                }
                token = default;
                return(false);
            }

            switch (Current())
            {
            case '<':
                Consume();
                TagOpen();
                break;

            default:
                Consume();
                Data();
                break;
            }

            _tokenEnd = Position();

            token = new HtmlToken(
                _type,
                _isSelfClosing,
                _html.AsMemory(_nameStart.Index, _nameEnd.Index - _nameStart.Index),
                _html.AsMemory(_tokenStart.Index, _tokenEnd.Index - _tokenStart.Index),
                _attributes.AsMemory(0, _attributesLength),
                new HtmlTextRange(_tokenStart, _tokenEnd),
                new HtmlTextRange(_nameStart, _nameEnd));

            return(true);
        }
Example #7
0
        private void BeforeAttributeValue()
        {
            while (true)
            {
                switch (Current())
                {
                case '\t':
                case '\r':
                case '\n':
                case '\f':
                case ' ':
                    Consume();
                    break;

                case '\'':
                    _attributeType = HtmlAttributeType.SingleQuoted;
                    Consume();
                    AttributeValue('\'');
                    return;

                case '"':
                    _attributeType = HtmlAttributeType.DoubleQuoted;
                    Consume();
                    AttributeValue('"');
                    return;

                case '>':
                    _attributeValueEnd = _attributeValueStart;
                    _attributeEnd      = Position();
                    Consume();
                    AddAttribute();
                    return;

                default:
                    _attributeType = HtmlAttributeType.Unquoted;
                    AttributeValueUnquoted();
                    return;
                }
            }
        }
Example #8
0
        private void AttributeName(bool consumeOnce = false)
        {
            _attributeType       = HtmlAttributeType.NameOnly;
            _attributeNameStart  = _attributeNameEnd = _attributeStart = _attributeEnd = Position();
            _attributeValueStart = _attributeValueEnd = default;

            if (consumeOnce)
            {
                Consume();
            }

            while (true)
            {
                switch (Current())
                {
                case '\t':
                case '\r':
                case '\n':
                case '\f':
                case ' ':
                case '/':
                case '>':
                case '\0':
                    _attributeNameEnd = Position();
                    AfterAttributeName();
                    return;

                case '=':
                    _attributeNameEnd = Position();
                    Consume();
                    BeforeAttributeValue();
                    return;

                default:
                    Consume();
                    break;
                }
            }
        }
Example #9
0
        private void AttributeValueUnquoted()
        {
            _attributeValueStart = Position();

            while (true)
            {
                switch (Current())
                {
                case '>':
                    _attributeValueEnd = Position();
                    _attributeEnd      = Position();
                    Consume();
                    AddAttribute();
                    return;

                case '\0':
                    _type = HtmlTokenType.Comment;
                    Consume();
                    return;

                case '\t':
                case '\r':
                case '\n':
                case '\f':
                case ' ':
                    _attributeValueEnd = Position();
                    _attributeEnd      = Position();
                    Consume();
                    AddAttribute();
                    BeforeAttributeName();
                    return;

                default:
                    Consume();
                    break;
                }
            }
        }