public void TestArgumentExceptions() { var comment = new HtmlCommentToken("This is a comment."); var cdata = new HtmlCDataToken("This is some CDATA."); var data = new HtmlDataToken("This is some character data."); var script = new HtmlScriptDataToken("This is some script data."); var doc = new HtmlDocTypeToken(); var tag = new HtmlTagToken("name", false); var attributes = new HtmlAttribute[0]; Assert.Throws <ArgumentNullException> (() => new HtmlCommentToken(null)); Assert.Throws <ArgumentNullException> (() => comment.WriteTo(null)); Assert.Throws <ArgumentNullException> (() => new HtmlCDataToken(null)); Assert.Throws <ArgumentNullException> (() => cdata.WriteTo(null)); Assert.Throws <ArgumentNullException> (() => new HtmlDataToken(null)); Assert.Throws <ArgumentNullException> (() => data.WriteTo(null)); Assert.Throws <ArgumentNullException> (() => doc.WriteTo(null)); Assert.Throws <ArgumentNullException> (() => new HtmlTagToken(null, attributes, false)); Assert.Throws <ArgumentNullException> (() => new HtmlTagToken("name", null, false)); Assert.Throws <ArgumentNullException> (() => new HtmlTagToken(null, false)); Assert.Throws <ArgumentNullException> (() => tag.WriteTo(null)); Assert.Throws <ArgumentNullException> (() => new HtmlScriptDataToken(null)); Assert.Throws <ArgumentNullException> (() => script.WriteTo(null)); }
/// <summary> /// Read one token from the stream. /// </summary> /// <returns>True if a token was read, False if end of stream is reached.</returns> public bool Read() { textBuffer = null; tagToken = null; stateMachine.ResetEmit(); while (true) { stateMachine.State(); if (stateMachine.EmitTagToken != null) { TokenKind = stateMachine.EmitTagToken.EndTag ? HtmlTokenKind.EndTag : HtmlTokenKind.Tag; stateMachine.SetNextStateFromTagName(); if (stateMachine.EmitTagToken.EndTag == false) { stateMachine.RememberLastStartTagName(); } tagToken = stateMachine.EmitTagToken; return(true); } if (stateMachine.EmitDataBuffer != null) { TokenKind = HtmlTokenKind.Text; textBuffer = stateMachine.EmitDataBuffer; return(true); } if (stateMachine.EmitCommentBuffer != null) { TokenKind = HtmlTokenKind.Comment; textBuffer = stateMachine.EmitCommentBuffer; return(true); } if (stateMachine.EmitDoctypeToken != null) { TokenKind = HtmlTokenKind.Doctype; tagToken = stateMachine.EmitDoctypeToken; return(true); } if (stateMachine.Eof) { return(false); } } }
/// <summary> /// Setups a new SVG element with the attributes from the token. /// </summary> /// <param name="element">The element to setup.</param> /// <param name="tag">The tag token to use.</param> /// <returns>The finished element.</returns> public static SvgElement Setup(this SvgElement element, HtmlTagToken tag) { var count = tag.Attributes.Count; for (var i = 0; i < count; i++) { var name = tag.Attributes[i].Key; var value = tag.Attributes[i].Value; element.AdjustAttribute(name.AdjustToSvgAttribute(), value); } return(element); }
/// <summary> /// Setups a new math element with the attributes from the token. /// </summary> /// <param name="element">The element to setup.</param> /// <param name="tag">The tag token to use.</param> /// <returns>The finished element.</returns> public static MathElement Setup(this MathElement element, HtmlTagToken tag) { var count = tag.Attributes.Count; for (var i = 0; i < count; i++) { var attr = tag.Attributes[i]; var name = attr.Name; var value = attr.Value; element.AdjustAttribute(name.AdjustToMathAttribute(), value); } return(element); }
/// <summary> /// Processes a normal start tag token. /// </summary> /// <param name="tag">The token to process.</param> void ForeignNormalTag(HtmlTagToken tag) { RaiseErrorOccurred(HtmlParseError.TagCannotStartHere, tag); if (IsFragmentCase) { ForeignSpecialTag(tag); return; } var node = CurrentNode; do { if (node.LocalName.Is(TagNames.AnnotationXml)) { var value = node.GetAttribute(null, AttributeNames.Encoding); if (value.Isi(MimeTypeNames.Html) || value.Isi(MimeTypeNames.ApplicationXHtml)) { AddElement(tag); return; } } CloseCurrentNode(); node = CurrentNode; } while ((node.Flags & (NodeFlags.HtmlTip | NodeFlags.MathTip | NodeFlags.HtmlMember)) == NodeFlags.None); Consume(tag); }
/// <summary> /// Processes a special start tag token. /// </summary> /// <param name="tag">The tag token to process.</param> void ForeignSpecialTag(HtmlTagToken tag) { var node = CreateForeignElementFrom(tag); if (node != null) { CurrentNode.AddNode(node); if (!tag.IsSelfClosing) { _openElements.Add(node); _tokenizer.IsAcceptingCharacterData = true; } else if (tag.Name.Is(TagNames.Script)) { Foreign(HtmlTagToken.Close(TagNames.Script)); } } }
/// <summary> /// Act as if an anything else tag has been found in the InBody state. /// </summary> /// <param name="tag">The actual tag found.</param> void InBodyEndTagAnythingElse(HtmlTagToken tag) { var index = _openElements.Count - 1; var node = CurrentNode; while (node != null) { if (node.LocalName.Is(tag.Name)) { GenerateImpliedEndTagsExceptFor(tag.Name); if (!node.LocalName.Is(tag.Name)) RaiseErrorOccurred(HtmlParseError.TagClosedWrong, tag); for (int i = _openElements.Count - 1; index <= i; i--) CloseCurrentNode(); break; } else if (node.Flags.HasFlag(NodeFlags.Special)) { RaiseErrorOccurred(HtmlParseError.TagClosedWrong, tag); break; } node = _openElements[--index]; } }
/// <summary> /// Acts if a block (button) end tag had been seen in the InBody state. /// </summary> /// <param name="tag">The actual tag given.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InBodyEndTagBlock(HtmlTagToken tag) { if (IsInScope(tag.Name)) { GenerateImpliedEndTags(); if (!CurrentNode.LocalName.Is(tag.Name)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, tag); ClearStackBackTo(tag.Name); CloseCurrentNode(); return true; } else { RaiseErrorOccurred(HtmlParseError.BlockNotInScope, tag); return false; } }
/// <summary> /// Follows the generic RCData parsing algorithm. /// </summary> /// <param name="tag">The given tag token.</param> void RCDataAlgorithm(HtmlTagToken tag) { AddElement(tag); _previousMode = _currentMode; _currentMode = HtmlTreeMode.Text; _tokenizer.State = HtmlParseMode.RCData; }
/// <summary> /// Closes the table if the section is in table scope. /// </summary> /// <param name="tag">The tag to insert (closes table).</param> void InTableBodyCloseTable(HtmlTagToken tag) { if (IsInTableScope(TagNames.AllTableSections)) { ClearStackBackTo(TagNames.AllTableSections); CloseCurrentNode(); _currentMode = HtmlTreeMode.InTable; InTable(tag); } else RaiseErrorOccurred(HtmlParseError.TableSectionNotInScope, tag); }
/// <summary> /// See 8.2.4.37 Before attribute value state /// </summary> /// <param name="tag">The current tag token.</param> HtmlToken AttributeBeforeValue(HtmlTagToken tag) { var c = SkipSpaces(); if (c == Symbols.DoubleQuote) { return AttributeDoubleQuotedValue(tag); } else if (c == Symbols.SingleQuote) { return AttributeSingleQuotedValue(tag); } else if (c == Symbols.Ampersand) { return AttributeUnquotedValue(c, tag); } else if (c == Symbols.GreaterThan) { RaiseErrorOccurred(HtmlParseError.TagClosedWrong); return EmitTag(tag); } else if (c == Symbols.LessThan || c == Symbols.Equality || c == Symbols.CurvedQuote) { RaiseErrorOccurred(HtmlParseError.AttributeValueInvalid); _stringBuffer.Append(c); return AttributeUnquotedValue(GetNext(), tag); } else if (c == Symbols.Null) { RaiseErrorOccurred(HtmlParseError.Null); _stringBuffer.Append(Symbols.Replacement); return AttributeUnquotedValue(GetNext(), tag); } else if (c != Symbols.EndOfFile) { _stringBuffer.Append(c); return AttributeUnquotedValue(GetNext(), tag); } else { return NewEof(); } }
/// <summary> /// See 8.2.4.36 After attribute name state /// </summary> /// <param name="tag">The current tag token.</param> HtmlToken AttributeAfterName(HtmlTagToken tag) { var c = SkipSpaces(); if (c == Symbols.GreaterThan) { return EmitTag(tag); } else if (c == Symbols.Equality) { return AttributeBeforeValue(tag); } else if (c == Symbols.Solidus) { return TagSelfClosing(tag); } else if (c.IsUppercaseAscii()) { _stringBuffer.Append(Char.ToLower(c)); return AttributeName(tag); } else if (c == Symbols.DoubleQuote || c == Symbols.SingleQuote || c == Symbols.LessThan) { RaiseErrorOccurred(HtmlParseError.AttributeNameInvalid); _stringBuffer.Append(c); return AttributeName(tag); } else if (c == Symbols.Null) { RaiseErrorOccurred(HtmlParseError.Null); _stringBuffer.Append(Symbols.Replacement); return AttributeName(tag); } else if (c != Symbols.EndOfFile) { _stringBuffer.Append(c); return AttributeName(tag); } else { return NewEof(); } }
private static bool IsBlazorBootTag(HtmlTagToken tag) => string.Equals(tag.Name, "script", StringComparison.Ordinal) && tag.Attributes.Any(pair => string.Equals(pair.Key, "type", StringComparison.Ordinal) && string.Equals(pair.Value, "blazor-boot", StringComparison.Ordinal));
/// <summary> /// See 8.2.4.10 Tag name state /// </summary> /// <param name="tag">The current tag token.</param> private HtmlToken TagName(HtmlTagToken tag) { while (true) { var c = GetNext(); if (c == Symbols.GreaterThan) { tag.Name = FlushBuffer(); return EmitTag(tag); } else if (c.IsSpaceCharacter()) { tag.Name = FlushBuffer(); return ParseAttributes(tag); } else if (c == Symbols.Solidus) { tag.Name = FlushBuffer(); return TagSelfClosing(tag); } else if (c.IsUppercaseAscii()) { StringBuffer.Append(Char.ToLowerInvariant(c)); } else if (c == Symbols.Null) { AppendReplacement(); } else if (c != Symbols.EndOfFile) { StringBuffer.Append(c); } else { return NewEof(); } } }
private HtmlToken ParseAttributes(HtmlTagToken tag) { var state = AttributeState.BeforeName; var quote = Symbols.DoubleQuote; var c = Symbols.Null; while (true) { switch (state) { // See 8.2.4.34 Before attribute name state case AttributeState.BeforeName: { c = SkipSpaces(); if (c == Symbols.Solidus) { return TagSelfClosing(tag); } else if (c == Symbols.GreaterThan) { return EmitTag(tag); } else if (c.IsUppercaseAscii()) { StringBuffer.Append(Char.ToLowerInvariant(c)); state = AttributeState.Name; } else if (c == Symbols.Null) { AppendReplacement(); state = AttributeState.Name; } else if (c == Symbols.SingleQuote || c == Symbols.DoubleQuote || c == Symbols.Equality || c == Symbols.LessThan) { RaiseErrorOccurred(HtmlParseError.AttributeNameInvalid); StringBuffer.Append(c); state = AttributeState.Name; } else if (c != Symbols.EndOfFile) { StringBuffer.Append(c); state = AttributeState.Name; } else { return NewEof(); } break; } // See 8.2.4.35 Attribute name state case AttributeState.Name: { c = GetNext(); if (c == Symbols.Equality) { tag.AddAttribute(FlushBuffer()); state = AttributeState.BeforeValue; } else if (c == Symbols.GreaterThan) { tag.AddAttribute(FlushBuffer()); return EmitTag(tag); } else if (c.IsSpaceCharacter()) { tag.AddAttribute(FlushBuffer()); state = AttributeState.AfterName; } else if (c == Symbols.Solidus) { tag.AddAttribute(FlushBuffer()); return TagSelfClosing(tag); } else if (c.IsUppercaseAscii()) { StringBuffer.Append(Char.ToLowerInvariant(c)); } else if (c == Symbols.DoubleQuote || c == Symbols.SingleQuote || c == Symbols.LessThan) { RaiseErrorOccurred(HtmlParseError.AttributeNameInvalid); StringBuffer.Append(c); } else if (c == Symbols.Null) { AppendReplacement(); } else if (c != Symbols.EndOfFile) { StringBuffer.Append(c); } else { return NewEof(); } break; } // See 8.2.4.36 After attribute name state case AttributeState.AfterName: { c = SkipSpaces(); if (c == Symbols.GreaterThan) { return EmitTag(tag); } else if (c == Symbols.Equality) { state = AttributeState.BeforeValue; } else if (c == Symbols.Solidus) { return TagSelfClosing(tag); } else if (c.IsUppercaseAscii()) { StringBuffer.Append(Char.ToLowerInvariant(c)); state = AttributeState.Name; } else if (c == Symbols.DoubleQuote || c == Symbols.SingleQuote || c == Symbols.LessThan) { RaiseErrorOccurred(HtmlParseError.AttributeNameInvalid); StringBuffer.Append(c); state = AttributeState.Name; } else if (c == Symbols.Null) { AppendReplacement(); state = AttributeState.Name; } else if (c != Symbols.EndOfFile) { StringBuffer.Append(c); state = AttributeState.Name; } else { return NewEof(); } break; } // See 8.2.4.37 Before attribute value state case AttributeState.BeforeValue: { c = SkipSpaces(); if (c == Symbols.DoubleQuote || c == Symbols.SingleQuote) { state = AttributeState.QuotedValue; quote = c; } else if (c == Symbols.Ampersand) { state = AttributeState.UnquotedValue; } else if (c == Symbols.GreaterThan) { RaiseErrorOccurred(HtmlParseError.TagClosedWrong); return EmitTag(tag); } else if (c == Symbols.LessThan || c == Symbols.Equality || c == Symbols.CurvedQuote) { RaiseErrorOccurred(HtmlParseError.AttributeValueInvalid); StringBuffer.Append(c); state = AttributeState.UnquotedValue; c = GetNext(); } else if (c == Symbols.Null) { AppendReplacement(); state = AttributeState.UnquotedValue; c = GetNext(); } else if (c != Symbols.EndOfFile) { StringBuffer.Append(c); state = AttributeState.UnquotedValue; c = GetNext(); } else { return NewEof(); } break; } // See 8.2.4.38 Attribute value (double-quoted) state // and 8.2.4.39 Attribute value (single-quoted) state case AttributeState.QuotedValue: { c = GetNext(); if (c == quote) { tag.SetAttributeValue(FlushBuffer()); state = AttributeState.AfterValue; } else if (c == Symbols.Ampersand) { AppendCharacterReference(GetNext(), quote); } else if (c == Symbols.Null) { AppendReplacement(); } else if (c != Symbols.EndOfFile) { StringBuffer.Append(c); } else { return NewEof(); } break; } // See 8.2.4.40 Attribute value (unquoted) state case AttributeState.UnquotedValue: { if (c == Symbols.GreaterThan) { tag.SetAttributeValue(FlushBuffer()); return EmitTag(tag); } else if (c.IsSpaceCharacter()) { tag.SetAttributeValue(FlushBuffer()); state = AttributeState.BeforeName; } else if (c == Symbols.Ampersand) { AppendCharacterReference(GetNext(), Symbols.GreaterThan); c = GetNext(); } else if (c == Symbols.Null) { AppendReplacement(); c = GetNext(); } else if (c == Symbols.DoubleQuote || c == Symbols.SingleQuote || c == Symbols.LessThan || c == Symbols.Equality || c == Symbols.CurvedQuote) { RaiseErrorOccurred(HtmlParseError.AttributeValueInvalid); StringBuffer.Append(c); c = GetNext(); } else if (c != Symbols.EndOfFile) { StringBuffer.Append(c); c = GetNext(); } else { return NewEof(); } break; } // See 8.2.4.42 After attribute value (quoted) state case AttributeState.AfterValue: { c = GetNext(); if (c == Symbols.GreaterThan) { return EmitTag(tag); } else if (c.IsSpaceCharacter()) { state = AttributeState.BeforeName; } else if (c == Symbols.Solidus) { return TagSelfClosing(tag); } else if (c == Symbols.EndOfFile) { return NewEof(); } else { RaiseErrorOccurred(HtmlParseError.AttributeNameExpected); Back(); state = AttributeState.BeforeName; } break; } } } }
/// <summary> /// Modifies the node by appending all attributes and /// acknowledging the self-closing flag if set. /// </summary> /// <param name="element">The node which will be added to the list.</param> /// <param name="tag">The associated tag token.</param> /// <param name="acknowledgeSelfClosing">Should the self-closing be acknowledged?</param> void SetupElement(Element element, HtmlTagToken tag, Boolean acknowledgeSelfClosing) { if (tag.IsSelfClosing && !acknowledgeSelfClosing) { RaiseErrorOccurred(HtmlParseError.TagCannotBeSelfClosed, tag); } element.SetAttributes(tag.Attributes); }
/// <summary> /// Appends a node to the current node and /// modifies the node by appending all attributes and /// acknowledging the self-closing flag if set. /// </summary> /// <param name="element">The node which will be added to the list.</param> /// <param name="tag">The associated tag token.</param> /// <param name="acknowledgeSelfClosing">Should the self-closing be acknowledged?</param> void AddElement(Element element, HtmlTagToken tag, Boolean acknowledgeSelfClosing = false) { SetupElement(element, tag, acknowledgeSelfClosing); AddElement(element); }
/// <summary> /// See 8.2.4.39 Attribute value (single-quoted) state /// </summary> /// <param name="tag">The current tag token.</param> HtmlToken AttributeSingleQuotedValue(HtmlTagToken tag) { while (true) { var c = GetNext(); if (c == Symbols.SingleQuote) { tag.SetAttributeValue(_stringBuffer.ToString()); _stringBuffer.Clear(); return AttributeAfterValue(tag); } else if (c == Symbols.Ampersand) { AppendCharacterReference(GetNext(), Symbols.SingleQuote); } else if (c == Symbols.Null) { RaiseErrorOccurred(HtmlParseError.Null); _stringBuffer.Append(Symbols.Replacement); } else if (c != Symbols.EndOfFile) { _stringBuffer.Append(c); } else { return NewEof(); } } }
void InBodyEndTag(HtmlTagToken tag) { var tagName = tag.Name; if (tagName.Is(TagNames.Div)) { InBodyEndTagBlock(tag); } else if (tagName.Is(TagNames.A)) { HeisenbergAlgorithm(tag); } else if (tagName.Is(TagNames.Li)) { if (IsInListItemScope()) { GenerateImpliedEndTagsExceptFor(tagName); if (!CurrentNode.LocalName.Is(TagNames.Li)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, tag); ClearStackBackTo(TagNames.Li); CloseCurrentNode(); } else { RaiseErrorOccurred(HtmlParseError.ListItemNotInScope, tag); } } else if (tagName.Is(TagNames.P)) { InBodyEndTagParagraph(tag); } else if (TagNames.AllBlocks.Contains(tagName)) { InBodyEndTagBlock(tag); } else if (TagNames.AllFormatting.Contains(tagName)) { HeisenbergAlgorithm(tag); } else if (tagName.Is(TagNames.Form)) { var node = _currentFormElement; _currentFormElement = null; if (node != null && IsInScope(node.LocalName)) { GenerateImpliedEndTags(); if (CurrentNode != node) RaiseErrorOccurred(HtmlParseError.FormClosedWrong, tag); _openElements.Remove(node); } else { RaiseErrorOccurred(HtmlParseError.FormNotInScope, tag); } } else if (tagName.Is(TagNames.Br)) { RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, tag); InBodyStartTagBreakrow(HtmlTagToken.Open(TagNames.Br)); } else if (TagNames.AllHeadings.Contains(tagName)) { if (IsInScope(TagNames.AllHeadings)) { GenerateImpliedEndTags(); if (!CurrentNode.LocalName.Is(tagName)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, tag); ClearStackBackTo(TagNames.AllHeadings); CloseCurrentNode(); } else { RaiseErrorOccurred(HtmlParseError.HeadingNotInScope, tag); } } else if (tagName.IsOneOf(TagNames.Dd, TagNames.Dt)) { if (IsInScope(tagName)) { GenerateImpliedEndTagsExceptFor(tagName); if (!CurrentNode.LocalName.Is(tagName)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, tag); ClearStackBackTo(tagName); CloseCurrentNode(); } else { RaiseErrorOccurred(HtmlParseError.ListItemNotInScope, tag); } } else if (tagName.IsOneOf(TagNames.Applet, TagNames.Marquee, TagNames.Object)) { if (IsInScope(tagName)) { GenerateImpliedEndTags(); if (!CurrentNode.LocalName.Is(tagName)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, tag); ClearStackBackTo(tagName); CloseCurrentNode(); _formattingElements.ClearFormatting(); } else { RaiseErrorOccurred(HtmlParseError.ObjectNotInScope, tag); } } else if (tagName.Is(TagNames.Body)) { InBodyEndTagBody(tag); } else if (tagName.Is(TagNames.Html)) { if (InBodyEndTagBody(tag)) AfterBody(tag); } else if (tagName.Is(TagNames.Template)) { InHead(tag); } else { InBodyEndTagAnythingElse(tag); } }
/// <summary> /// See 8.2.4.40 Attribute value (unquoted) state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> HtmlToken AttributeUnquotedValue(Char c, HtmlTagToken tag) { while (true) { if (c == Symbols.GreaterThan) { tag.SetAttributeValue(_stringBuffer.ToString()); _stringBuffer.Clear(); return EmitTag(tag); } else if (c.IsSpaceCharacter()) { tag.SetAttributeValue(_stringBuffer.ToString()); _stringBuffer.Clear(); return AttributeBeforeName(tag); } else if (c == Symbols.Ampersand) { AppendCharacterReference(GetNext(), Symbols.GreaterThan); } else if (c == Symbols.Null) { RaiseErrorOccurred(HtmlParseError.Null); _stringBuffer.Append(Symbols.Replacement); } else if (c == Symbols.DoubleQuote || c == Symbols.SingleQuote || c == Symbols.LessThan || c == Symbols.Equality || c == Symbols.CurvedQuote) { RaiseErrorOccurred(HtmlParseError.AttributeValueInvalid); _stringBuffer.Append(c); } else if (c != Symbols.EndOfFile) { _stringBuffer.Append(c); } else { return NewEof(); } c = GetNext(); } }
/// <summary> /// Act as if a body start tag has been found in the AfterHead state. /// </summary> /// <param name="token">The actual tag token.</param> void AfterHeadStartTagBody(HtmlTagToken token) { AddElement(new HtmlBodyElement(_document), token); _frameset = false; _currentMode = HtmlTreeMode.InBody; }
/// <summary> /// See 8.2.4.42 After attribute value (quoted) state /// </summary> /// <param name="tag">The current tag token.</param> HtmlToken AttributeAfterValue(HtmlTagToken tag) { var c = GetNext(); if (c == Symbols.GreaterThan) return EmitTag(tag); else if (c.IsSpaceCharacter()) return AttributeBeforeName(tag); else if (c == Symbols.Solidus) return TagSelfClosing(tag); else if (c == Symbols.EndOfFile) return NewEof(); RaiseErrorOccurred(HtmlParseError.AttributeNameExpected); Back(); return AttributeBeforeName(tag); }
/// <summary> /// Acts if a dd or dt tag in the InBody state has been found. /// </summary> /// <param name="tag">The actual tag given.</param> void InBodyStartTagDefinitionItem(HtmlTagToken tag) { _frameset = false; var index = _openElements.Count - 1; var node = _openElements[index]; while (true) { if (node.LocalName.IsOneOf(TagNames.Dd, TagNames.Dt)) { InBody(HtmlTagToken.Close(node.LocalName)); break; } if (node.Flags.HasFlag(NodeFlags.Special) && !TagNames.AllBasicBlocks.Contains(node.LocalName)) break; node = _openElements[--index]; } if (IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(tag); }
/// <summary> /// See 8.2.4.19 Script data end tag name state /// </summary> /// <param name="tag">The current tag token.</param> /// <param name="offset">The tag name's offset.</param> HtmlToken ScriptDataNameEndTag(HtmlTagToken tag, Int32 offset) { var length = _lastStartTag.Length; while (true) { var c = GetNext(); var isspace = c.IsSpaceCharacter(); var isclosed = c == Symbols.GreaterThan; var isslash = c == Symbols.Solidus; var hasLength = _stringBuffer.Length - offset == length; if (hasLength && (isspace || isclosed || isslash)) { var name = _stringBuffer.ToString(offset, length); if (name.Equals(_lastStartTag, StringComparison.OrdinalIgnoreCase)) { if (offset > 2) { Back(3 + length); _stringBuffer.Remove(offset - 2, length + 2); return NewCharacter(); } _stringBuffer.Clear(); if (isspace) { tag.Name = _lastStartTag; return AttributeBeforeName(tag); } else if (isslash) { tag.Name = _lastStartTag; return TagSelfClosing(tag); } else if (isclosed) { tag.Name = _lastStartTag; return EmitTag(tag); } } } if (!c.IsLetter()) { return ScriptData(c); } _stringBuffer.Append(c); } }
/// <summary> /// Acts if a nobr tag had been seen in the InBody state. /// </summary> /// <param name="tag">The actual tag given.</param> void HeisenbergAlgorithm(HtmlTagToken tag) { var outer = 0; var inner = 0; var bookmark = 0; var index = 0; while (outer < 8) { var formattingElement = default(Element); var furthestBlock = default(Element); outer++; index = 0; inner = 0; for (var j = _formattingElements.Count - 1; j >= 0; j--) { if (_formattingElements[j] == null) break; if (_formattingElements[j].LocalName.Is(tag.Name)) { index = j; formattingElement = _formattingElements[j]; break; } } if (formattingElement == null) { InBodyEndTagAnythingElse(tag); break; } var openIndex = _openElements.IndexOf(formattingElement); if (openIndex == -1) { RaiseErrorOccurred(HtmlParseError.FormattingElementNotFound, tag); _formattingElements.Remove(formattingElement); break; } if (!IsInScope(formattingElement.LocalName)) { RaiseErrorOccurred(HtmlParseError.ElementNotInScope, tag); break; } if (openIndex != _openElements.Count - 1) RaiseErrorOccurred(HtmlParseError.TagClosedWrong, tag); bookmark = index; for (var j = openIndex + 1; j < _openElements.Count; j++) { if (_openElements[j].Flags.HasFlag(NodeFlags.Special)) { index = j; furthestBlock = _openElements[j]; break; } } if (furthestBlock == null) { do { furthestBlock = CurrentNode; CloseCurrentNode(); } while (furthestBlock != formattingElement); _formattingElements.Remove(formattingElement); break; } var commonAncestor = _openElements[openIndex - 1]; var node = furthestBlock; var lastNode = furthestBlock; while (true) { inner++; node = _openElements[--index]; if (node == formattingElement) break; if (inner > 3 && _formattingElements.Contains(node)) _formattingElements.Remove(node); if (!_formattingElements.Contains(node)) { _openElements.Remove(node); continue; } var newElement = CopyElement(node); commonAncestor.AddNode(newElement); _openElements[index] = newElement; for (var l = 0; l != _formattingElements.Count; l++) { if (_formattingElements[l] == node) { _formattingElements[l] = newElement; break; } } node = newElement; if (lastNode == furthestBlock) bookmark++; if (lastNode.Parent != null) lastNode.Parent.RemoveChild(lastNode); node.AddNode(lastNode); lastNode = node; } if (lastNode.Parent != null) lastNode.Parent.RemoveChild(lastNode); if (!TagNames.AllTableMajor.Contains(commonAncestor.LocalName)) commonAncestor.AddNode(lastNode); else AddElementWithFoster(lastNode); var element = CopyElement(formattingElement); while (furthestBlock.ChildNodes.Length > 0) { var childNode = furthestBlock.ChildNodes[0]; furthestBlock.RemoveNode(0, childNode); element.AddNode(childNode); } furthestBlock.AddNode(element); _formattingElements.Remove(formattingElement); _formattingElements.Insert(bookmark, element); _openElements.Remove(formattingElement); _openElements.Insert(_openElements.IndexOf(furthestBlock) + 1, element); } }
/// <summary> /// See 8.2.4.27 Script data escaped end tag name state /// </summary> /// <param name="tag">The current tag token.</param> /// <param name="offset">The tag name's offset.</param> HtmlToken ScriptDataEscapedNameEndTag(HtmlTagToken tag, Int32 offset) { var length = Tags.Script.Length; while (true) { var c = GetNext(); var hasLength = _stringBuffer.Length - offset == length; if (hasLength && (c == Symbols.Solidus || c == Symbols.GreaterThan || c.IsSpaceCharacter()) && _stringBuffer.ToString(offset, length).Equals(Tags.Script, StringComparison.OrdinalIgnoreCase)) { Back(length + 3); _stringBuffer.Remove(offset - 2, length + 2); return NewCharacter(); } else if (!c.IsLetter()) { return ScriptDataEscaped(c); } _stringBuffer.Append(c); } }
/// <summary> /// Act as if an br start tag has been found in the InBody state. /// </summary> /// <param name="tag">The actual tag found.</param> void InBodyStartTagBreakrow(HtmlTagToken tag) { ReconstructFormatting(); AddElement(tag); CloseCurrentNode(); _frameset = false; }
HtmlToken EmitTag(HtmlTagToken tag) { var attributes = tag.Attributes; _state = HtmlParseMode.PCData; switch (tag.Type) { case HtmlTokenType.StartTag: for (var i = attributes.Count - 1; i > 0; i--) { for (var j = i - 1; j >= 0; j--) { if (attributes[j].Key == attributes[i].Key) { attributes.RemoveAt(i); RaiseErrorOccurred(HtmlParseError.AttributeDuplicateOmitted, tag.Position); break; } } } _lastStartTag = tag.Name; break; case HtmlTokenType.EndTag: if (tag.IsSelfClosing) RaiseErrorOccurred(HtmlParseError.EndTagCannotBeSelfClosed, tag.Position); if (attributes.Count != 0) RaiseErrorOccurred(HtmlParseError.EndTagCannotHaveAttributes, tag.Position); break; } return tag; }
/// <summary> /// Creates a foreign element from the given html tag. /// </summary> /// <param name="tag">The tag of the foreign element.</param> /// <returns>The element or NULL if it is no MathML or SVG element.</returns> Element CreateForeignElementFrom(HtmlTagToken tag) { if (AdjustedCurrentNode.Flags.HasFlag(NodeFlags.MathMember)) { var node = Factory.MathElements.Create(_document, tag.Name); return node.Setup(tag); } else if (AdjustedCurrentNode.Flags.HasFlag(NodeFlags.SvgMember)) { var node = Factory.SvgElements.CreateSanatized(_document, tag.Name); return node.Setup(tag); } return null; }
/// <summary> /// See 8.2.4.10 Tag name state /// </summary> /// <param name="tag">The current tag token.</param> HtmlToken TagName(HtmlTagToken tag) { while (true) { var c = GetNext(); if (c == Symbols.GreaterThan) { tag.Name = _stringBuffer.ToString(); _stringBuffer.Clear(); return EmitTag(tag); } else if (c.IsSpaceCharacter()) { tag.Name = _stringBuffer.ToString(); _stringBuffer.Clear(); return AttributeBeforeName(tag); } else if (c == Symbols.Solidus) { tag.Name = _stringBuffer.ToString(); _stringBuffer.Clear(); return TagSelfClosing(tag); } else if (c.IsUppercaseAscii()) { _stringBuffer.Append(Char.ToLower(c)); } else if (c == Symbols.Null) { RaiseErrorOccurred(HtmlParseError.Null); _stringBuffer.Append(Symbols.Replacement); } else if (c != Symbols.EndOfFile) { _stringBuffer.Append(c); } else { RaiseErrorOccurred(HtmlParseError.EOF); return NewEof(); } } }
/// <summary> /// Adds the root element (html) to the document. /// </summary> /// <param name="tag">The token which started this process.</param> void AddRoot(HtmlTagToken tag) { var element = new HtmlHtmlElement(_document); _document.AddNode(element); SetupElement(element, tag, false); _openElements.Add(element); _tokenizer.IsAcceptingCharacterData = false; _document.ApplyManifest(); }
/// <summary> /// See 8.2.4.43 Self-closing start tag state /// </summary> /// <param name="tag">The current tag token.</param> HtmlToken TagSelfClosing(HtmlTagToken tag) { switch (GetNext()) { case Symbols.GreaterThan: tag.IsSelfClosing = true; return EmitTag(tag); case Symbols.EndOfFile: RaiseErrorOccurred(HtmlParseError.EOF); return NewEof(); default: RaiseErrorOccurred(HtmlParseError.ClosingSlashMisplaced); Back(); return AttributeBeforeName(tag); } }
/// <summary> /// Appends a node to the current node and /// modifies the node by appending all attributes and /// acknowledging the self-closing flag if set. /// </summary> /// <param name="tag">The associated tag token.</param> /// <param name="acknowledgeSelfClosing">Should the self-closing be acknowledged?</param> Element AddElement(HtmlTagToken tag, Boolean acknowledgeSelfClosing = false) { var element = Factory.HtmlElements.Create(_document, tag.Name); SetupElement(element, tag, acknowledgeSelfClosing); AddElement(element); return element; }
// Anglesharp canonicalizes the case of tags, we want what the user typed. public string GetTagNameOriginalCasing(HtmlTagToken tag) { var offset = tag.Type == HtmlTokenType.EndTag ? 1 : 0; // For end tags, skip the '/' return(tag.Name); }
void InBodyStartTag(HtmlTagToken tag) { var tagName = tag.Name; if (tagName.Is(TagNames.Div)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(tag); } else if (tagName.Is(TagNames.A)) { for (var i = _formattingElements.Count - 1; i >= 0; i--) { if (_formattingElements[i] == null) break; if (_formattingElements[i].LocalName.Is(TagNames.A)) { var format = _formattingElements[i]; RaiseErrorOccurred(HtmlParseError.AnchorNested, tag); HeisenbergAlgorithm(HtmlTagToken.Close(TagNames.A)); _openElements.Remove(format); _formattingElements.Remove(format); break; } } ReconstructFormatting(); var element = new HtmlAnchorElement(_document); AddElement(element, tag); _formattingElements.AddFormatting(element); } else if (tagName.Is(TagNames.Span)) { ReconstructFormatting(); AddElement(tag); } else if (tagName.Is(TagNames.Li)) { InBodyStartTagListItem(tag); } else if (tagName.Is(TagNames.Img)) { InBodyStartTagBreakrow(tag); } else if (tagName.IsOneOf(TagNames.Ul, TagNames.P)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(tag); } else if (TagNames.AllSemanticFormatting.Contains(tagName)) { ReconstructFormatting(); _formattingElements.AddFormatting(AddElement(tag)); } else if (tagName.Is(TagNames.Script)) { InHead(tag); } else if (TagNames.AllHeadings.Contains(tagName)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); if (TagNames.AllHeadings.Contains(CurrentNode.LocalName)) { RaiseErrorOccurred(HtmlParseError.HeadingNested, tag); CloseCurrentNode(); } AddElement(new HtmlHeadingElement(_document, tagName), tag); } else if (tagName.Is(TagNames.Input)) { ReconstructFormatting(); AddElement(new HtmlInputElement(_document), tag, true); CloseCurrentNode(); if (!tag.GetAttribute(AttributeNames.Type).Isi(AttributeNames.Hidden)) _frameset = false; } else if (tagName.Is(TagNames.Form)) { if (_currentFormElement == null) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); _currentFormElement = new HtmlFormElement(_document); AddElement(_currentFormElement, tag); } else RaiseErrorOccurred(HtmlParseError.FormAlreadyOpen, tag); } else if (TagNames.AllBody.Contains(tagName)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(tag); } else if (TagNames.AllClassicFormatting.Contains(tagName)) { ReconstructFormatting(); _formattingElements.AddFormatting(AddElement(tag)); } else if (TagNames.AllHead.Contains(tagName)) { InHead(tag); } else if (tagName.IsOneOf(TagNames.Pre, TagNames.Listing)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(tag); _frameset = false; PreventNewLine(); } else if (tagName.Is(TagNames.Button)) { if (IsInScope(TagNames.Button)) { RaiseErrorOccurred(HtmlParseError.ButtonInScope, tag); InBodyEndTagBlock(tag); InBody(tag); } else { ReconstructFormatting(); AddElement(new HtmlButtonElement(_document), tag); _frameset = false; } } else if (tagName.Is(TagNames.Table)) { if (_document.QuirksMode != QuirksMode.On && IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(new HtmlTableElement(_document), tag); _frameset = false; _currentMode = HtmlTreeMode.InTable; } else if (TagNames.AllBodyBreakrow.Contains(tagName)) { InBodyStartTagBreakrow(tag); } else if (TagNames.AllBodyClosed.Contains(tagName)) { AddElement(tag, true); CloseCurrentNode(); } else if (tagName.Is(TagNames.Hr)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(new HtmlHrElement(_document), tag, true); CloseCurrentNode(); _frameset = false; } else if (tagName.Is(TagNames.Textarea)) { AddElement(new HtmlTextAreaElement(_document), tag); _tokenizer.State = HtmlParseMode.RCData; _previousMode = _currentMode; _frameset = false; _currentMode = HtmlTreeMode.Text; PreventNewLine(); } else if (tagName.Is(TagNames.Select)) { ReconstructFormatting(); AddElement(new HtmlSelectElement(_document), tag); _frameset = false; switch (_currentMode) { case HtmlTreeMode.InTable: case HtmlTreeMode.InTableBody: case HtmlTreeMode.InCaption: case HtmlTreeMode.InRow: case HtmlTreeMode.InCell: _currentMode = HtmlTreeMode.InSelectInTable; break; default: _currentMode = HtmlTreeMode.InSelect; break; } } else if (tagName.IsOneOf(TagNames.Optgroup, TagNames.Option)) { if (CurrentNode.LocalName.Is(TagNames.Option)) InBodyEndTagAnythingElse(HtmlTagToken.Close(TagNames.Option)); ReconstructFormatting(); AddElement(tag); } else if (tagName.IsOneOf(TagNames.Dd, TagNames.Dt)) { InBodyStartTagDefinitionItem(tag); } else if (tagName.Is(TagNames.Iframe)) { _frameset = false; RawtextAlgorithm(tag); } else if (TagNames.AllBodyObsolete.Contains(tagName)) { ReconstructFormatting(); AddElement(tag); _formattingElements.AddScopeMarker(); _frameset = false; } else if (tagName.Is(TagNames.Image)) { RaiseErrorOccurred(HtmlParseError.ImageTagNamedWrong, tag); tag.Name = TagNames.Img; InBodyStartTagBreakrow(tag); } else if (tagName.Is(TagNames.NoBr)) { ReconstructFormatting(); if (IsInScope(TagNames.NoBr)) { RaiseErrorOccurred(HtmlParseError.NobrInScope, tag); HeisenbergAlgorithm(tag); ReconstructFormatting(); } _formattingElements.AddFormatting(AddElement(tag)); } else if (tagName.Is(TagNames.Xmp)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); ReconstructFormatting(); _frameset = false; RawtextAlgorithm(tag); } else if (tagName.IsOneOf(TagNames.Rb, TagNames.Rtc)) { if (IsInScope(TagNames.Ruby)) { GenerateImpliedEndTags(); if (!CurrentNode.LocalName.Is(TagNames.Ruby)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, tag); } AddElement(tag); } else if (tagName.IsOneOf(TagNames.Rp, TagNames.Rt)) { if (IsInScope(TagNames.Ruby)) { GenerateImpliedEndTagsExceptFor(TagNames.Rtc); if (CurrentNode.LocalName.IsOneOf(TagNames.Ruby, TagNames.Rtc) == false) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, tag); } AddElement(tag); } else if (tagName.Is(TagNames.NoEmbed)) { RawtextAlgorithm(tag); } else if (tagName.Is(TagNames.NoScript)) { if (_options.IsScripting) { RawtextAlgorithm(tag); return; } ReconstructFormatting(); AddElement(tag); } else if (tagName.Is(TagNames.Math)) { var element = new MathElement(_document, tagName); ReconstructFormatting(); AddElement(element.Setup(tag)); if (tag.IsSelfClosing) { _openElements.Remove(element); } } else if (tagName.Is(TagNames.Svg)) { var element = new SvgElement(_document, tagName); ReconstructFormatting(); AddElement(element.Setup(tag)); if (tag.IsSelfClosing) { _openElements.Remove(element); } } else if (tagName.Is(TagNames.Plaintext)) { if (IsInButtonScope()) { InBodyEndTagParagraph(tag); } AddElement(tag); _tokenizer.State = HtmlParseMode.Plaintext; } else if (tagName.Is(TagNames.Frameset)) { RaiseErrorOccurred(HtmlParseError.FramesetMisplaced, tag); if (_openElements.Count != 1 && _openElements[1].LocalName.Is(TagNames.Body) && _frameset) { _openElements[1].RemoveFromParent(); while (_openElements.Count > 1) { CloseCurrentNode(); } AddElement(new HtmlFrameSetElement(_document), tag); _currentMode = HtmlTreeMode.InFrameset; } } else if (tagName.Is(TagNames.Html)) { RaiseErrorOccurred(HtmlParseError.HtmlTagMisplaced, tag); if (_templateModes.Count == 0) { _openElements[0].SetUniqueAttributes(tag.Attributes); } } else if (tagName.Is(TagNames.Body)) { RaiseErrorOccurred(HtmlParseError.BodyTagMisplaced, tag); if (_templateModes.Count == 0 && _openElements.Count > 1 && _openElements[1].LocalName.Is(TagNames.Body)) { _frameset = false; _openElements[1].SetUniqueAttributes(tag.Attributes); } } else if (tagName.Is(TagNames.IsIndex)) { RaiseErrorOccurred(HtmlParseError.TagInappropriate, tag); if (_currentFormElement == null) { InBody(HtmlTagToken.Open(TagNames.Form)); if (tag.GetAttribute(AttributeNames.Action).Length > 0) _currentFormElement.SetAttribute(AttributeNames.Action, tag.GetAttribute(AttributeNames.Action)); InBody(HtmlTagToken.Open(TagNames.Hr)); InBody(HtmlTagToken.Open(TagNames.Label)); if (tag.GetAttribute(AttributeNames.Prompt).Length > 0) AddCharacters(tag.GetAttribute(AttributeNames.Prompt)); else AddCharacters("This is a searchable index. Enter search keywords: "); var input = HtmlTagToken.Open(TagNames.Input); input.AddAttribute(AttributeNames.Name, TagNames.IsIndex); for (int i = 0; i < tag.Attributes.Count; i++) { if (tag.Attributes[i].Key.IsOneOf(AttributeNames.Name, AttributeNames.Action, AttributeNames.Prompt) == false) input.AddAttribute(tag.Attributes[i].Key, tag.Attributes[i].Value); } InBody(input); InBody(HtmlTagToken.Close(TagNames.Label)); InBody(HtmlTagToken.Open(TagNames.Hr)); InBody(HtmlTagToken.Close(TagNames.Form)); } } else if (TagNames.AllNested.Contains(tagName)) { RaiseErrorOccurred(HtmlParseError.TagCannotStartHere, tag); } else { ReconstructFormatting(); AddElement(tag); } }
/// <summary> /// Setups a new math element with the attributes from the token. /// </summary> /// <param name="element">The element to setup.</param> /// <param name="tag">The tag token to use.</param> /// <returns>The finished element.</returns> public static MathElement Setup(this MathElement element, HtmlTagToken tag) { var count = tag.Attributes.Count; for (var i = 0; i < count; i++) { var name = tag.Attributes[i].Key; var value = tag.Attributes[i].Value; element.AdjustAttribute(name.AdjustToMathAttribute(), value); } element.SetupElement(); return element; }