private void AttributeValue(char quote) { _attributeValueStart = Position(); while (true) { switch (Current()) { case '\0': _type = HtmlTokenType.Comment; Consume(); return; case char c when c == quote: _attributeValueEnd = Position(); Consume(); _attributeEnd = Position(); AddAttribute(); BeforeAttributeName(); return; default: Consume(); break; } } }
private void AddAttribute() { if (_attributeNameEnd.Index == _attributeNameStart.Index) { return; } _attributes ??= ArrayPool <HtmlAttribute> .Shared.Rent(4); // Grow array as needed if (_attributesLength == _attributes.Length) { var newAttributes = ArrayPool <HtmlAttribute> .Shared.Rent(_attributes.Length * 2); Array.Copy(_attributes, 0, newAttributes, 0, _attributes.Length); ArrayPool <HtmlAttribute> .Shared.Return(_attributes); _attributes = newAttributes; } _attributes[_attributesLength++] = new HtmlAttribute( _attributeType, _html.AsMemory(_attributeNameStart.Index, _attributeNameEnd.Index - _attributeNameStart.Index), _html.AsMemory(_attributeValueStart.Index, _attributeValueEnd.Index - _attributeValueStart.Index), _html.AsMemory(_attributeStart.Index, _attributeEnd.Index - _attributeStart.Index), new HtmlTextRange(_attributeStart, _attributeEnd), new HtmlTextRange(_attributeNameStart, _attributeNameEnd), new HtmlTextRange(_attributeValueStart, _attributeValueEnd)); _attributeNameStart = _attributeNameEnd = default; }
private void EndTagOpen() { _type = HtmlTokenType.EndTag; switch (Current()) { case '>': Consume(); _nameEnd = _nameStart; break; case '\0': Consume(); _type = HtmlTokenType.Text; break; case char c when IsASCIIAlpha(c): TagName(readAttributes: false); break; default: Consume(); BogusComment(); break; } }
private void AfterAttributeName() { while (true) { switch (Current()) { case '\0': _type = HtmlTokenType.Comment; Consume(); return; case '>': if (_attributeEnd.Index == _attributeStart.Index) { _attributeEnd = Position(); } Consume(); AddAttribute(); return; case '/': if (_attributeEnd.Index == _attributeStart.Index) { _attributeEnd = Position(); } Consume(); AddAttribute(); SelfClosingStartTag(); return; case '=': Consume(); BeforeAttributeValue(); return; case '\t': case '\r': case '\n': case '\f': case ' ': if (_attributeEnd.Index == _attributeStart.Index) { _attributeEnd = Position(); } Consume(); break; default: AddAttribute(); AttributeName(); return; } } }
private void TagName(bool readAttributes) { _nameStart = _nameEnd = Position(); while (true) { switch (Current()) { case '\0': _type = HtmlTokenType.Comment; Consume(); return; case '>': if (_nameEnd.Index == _nameStart.Index) { _nameEnd = Position(); } Consume(); return; case '/': if (_nameEnd.Index == _nameStart.Index) { _nameEnd = Position(); } Consume(); SelfClosingStartTag(); return; case '\t': case '\r': case '\n': case '\f': case ' ': if (_nameEnd.Index == _nameStart.Index) { _nameEnd = Position(); } Consume(); if (readAttributes) { BeforeAttributeName(); return; } break; default: Consume(); break; } } }
public bool Read(out HtmlToken token) { // Simplified parsing algorithm based on https://html.spec.whatwg.org/multipage/parsing.html _attributesLength = 0; _tokenStart = _tokenEnd = Position(); _nameStart = _nameEnd = default; _isSelfClosing = false; if (_position >= _length) { if (_attributes != null) { ArrayPool <HtmlAttribute> .Shared.Return(_attributes); _attributes = null; } token = default; return(false); } switch (Current()) { case '<': Consume(); TagOpen(); break; default: Consume(); Data(); break; } _tokenEnd = Position(); token = new HtmlToken( _type, _isSelfClosing, _html.AsMemory(_nameStart.Index, _nameEnd.Index - _nameStart.Index), _html.AsMemory(_tokenStart.Index, _tokenEnd.Index - _tokenStart.Index), _attributes.AsMemory(0, _attributesLength), new HtmlTextRange(_tokenStart, _tokenEnd), new HtmlTextRange(_nameStart, _nameEnd)); return(true); }
private void BeforeAttributeValue() { while (true) { switch (Current()) { case '\t': case '\r': case '\n': case '\f': case ' ': Consume(); break; case '\'': _attributeType = HtmlAttributeType.SingleQuoted; Consume(); AttributeValue('\''); return; case '"': _attributeType = HtmlAttributeType.DoubleQuoted; Consume(); AttributeValue('"'); return; case '>': _attributeValueEnd = _attributeValueStart; _attributeEnd = Position(); Consume(); AddAttribute(); return; default: _attributeType = HtmlAttributeType.Unquoted; AttributeValueUnquoted(); return; } } }
private void AttributeName(bool consumeOnce = false) { _attributeType = HtmlAttributeType.NameOnly; _attributeNameStart = _attributeNameEnd = _attributeStart = _attributeEnd = Position(); _attributeValueStart = _attributeValueEnd = default; if (consumeOnce) { Consume(); } while (true) { switch (Current()) { case '\t': case '\r': case '\n': case '\f': case ' ': case '/': case '>': case '\0': _attributeNameEnd = Position(); AfterAttributeName(); return; case '=': _attributeNameEnd = Position(); Consume(); BeforeAttributeValue(); return; default: Consume(); break; } } }
private void AttributeValueUnquoted() { _attributeValueStart = Position(); while (true) { switch (Current()) { case '>': _attributeValueEnd = Position(); _attributeEnd = Position(); Consume(); AddAttribute(); return; case '\0': _type = HtmlTokenType.Comment; Consume(); return; case '\t': case '\r': case '\n': case '\f': case ' ': _attributeValueEnd = Position(); _attributeEnd = Position(); Consume(); AddAttribute(); BeforeAttributeName(); return; default: Consume(); break; } } }