public void TreeMisnestedTagsHeisenbergWithFurthest() { //8.2.8.2 Misnested tags: <b><p></b></p> var doc = DocumentBuilder.Html(@"<b>1<p>2</b>3</p>"); var tree = new HTMLHtmlElement() .AppendChild(new HTMLHeadElement()).ParentNode .AppendChild(new HTMLBodyElement()) .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new TextNode("1")).ParentNode.ParentNode .AppendChild(new HTMLParagraphElement()) .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new TextNode("2")).ParentNode.ParentNode .AppendChild(new TextNode("3")).ParentNode.ParentNode.ParentNode; Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml()); }
public void TreeNonConformingTable() { //8.2.5.4.7 The "in body" insertion mode - "In the non-conforming ..." var doc = DocumentBuilder.Html(@"<a href=""a"">a<table><a href=""b"">b</table>x"); var tree = new HTMLHtmlElement() .AppendChild(new HTMLHeadElement()).ParentNode .AppendChild(new HTMLBodyElement()) .AppendChild(new HTMLAnchorElement().SetAttribute("href", "a")) .AppendChild(new TextNode("a")).ParentNode .AppendChild(new HTMLAnchorElement().SetAttribute("href", "b")) .AppendChild(new TextNode("b")).ParentNode.ParentNode .AppendChild(new HTMLTableElement()).ParentNode.ParentNode .AppendChild(new HTMLAnchorElement().SetAttribute("href", "b")) .AppendChild(new TextNode("x")).ParentNode.ParentNode.ParentNode; Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml()); }
public void TreeOneTextNodeTableBeforeABCD() { //One Text node before the table, containing "ABCD" var doc = DocumentBuilder.Html(@"A<table>B<tr>C</tr>D</table>"); var tree = new HTMLHtmlElement() .AppendChild(new HTMLHeadElement()).ParentNode .AppendChild(new HTMLBodyElement()) .AppendChild(new TextNode("ABCD")).ParentNode .AppendChild(new HTMLTableElement()) .AppendChild(new HTMLTableSectionElement()) .AppendChild(new HTMLTableRowElement()).ParentNode.ParentNode.ParentNode.ParentNode; Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml()); }
public void TreeUnexpectedTableMarkup() { //8.2.8.3 Unexpected markup in tables var doc = DocumentBuilder.Html(@"<table><b><tr><td>aaa</td></tr>bbb</table>ccc"); var tree = new HTMLHtmlElement() .AppendChild(new HTMLHeadElement()).ParentNode .AppendChild(new HTMLBodyElement()) .AppendChild(new HTMLElement() { NodeName = "b" }).ParentNode .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new TextNode("bbb")).ParentNode.ParentNode .AppendChild(new HTMLTableElement()) .AppendChild(new HTMLTableSectionElement()) .AppendChild(new HTMLTableRowElement()) .AppendChild(new HTMLTableCellElement()) .AppendChild(new TextNode("aaa")).ParentNode.ParentNode.ParentNode.ParentNode.ParentNode .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new TextNode("ccc")).ParentNode.ParentNode.ParentNode; Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml()); }
public void TreeOneTextNodeTableBeforeAspaceBC() { //One Text node before the table, containing "A BC" (A-space-B-C), and one Text node inside the table (as a child of a tbody) with a single space character. var doc = DocumentBuilder.Html(@"A<table><tr> B</tr> </em>C</table>"); var tree = new HTMLHtmlElement() .AppendChild(new HTMLHeadElement()).ParentNode .AppendChild(new HTMLBodyElement()) .AppendChild(new TextNode("A BC")).ParentNode .AppendChild(new HTMLTableElement()) .AppendChild(new HTMLTableSectionElement()) .AppendChild(new HTMLTableRowElement()).ParentNode .AppendChild(new TextNode(" ")).ParentNode.ParentNode.ParentNode.ParentNode; Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml()); }
public void TreeUnclosedFormattingElements() { //8.2.8.6 Unclosed formatting elements var doc = DocumentBuilder.Html(@"<!DOCTYPE html> <p><b class=x><b class=x><b><b class=x><b class=x><b>X<p>X<p><b><b class=x><b>X<p></b></b></b></b></b></b>X"); var tree = new HTMLHtmlElement() .AppendChild(new HTMLHeadElement()).ParentNode .AppendChild(new HTMLBodyElement()) .AppendChild(new HTMLParagraphElement()) .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x")) .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x")) .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x")) .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x")) .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new TextNode("X")).ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode .AppendChild(new HTMLParagraphElement()) .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x")) .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x")) .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x")) .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new TextNode("X")).ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode .AppendChild(new HTMLParagraphElement()) .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x")) .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x")) .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x")) .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x")) .AppendChild(new HTMLElement() { NodeName = "b" }) .AppendChild(new TextNode("X")).ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode .AppendChild(new HTMLParagraphElement()) .AppendChild(new TextNode("X")).ParentNode.ParentNode.ParentNode; Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml()); }
public void HtmlHasRightHeadElement() { var doc = new HTMLDocument(); var root = new HTMLHtmlElement(); doc.AppendChild(root); var head = new HTMLHeadElement(); root.AppendChild(head); Assert.AreEqual(head, doc.Head); }
public void HtmlHasRightBodyElement() { var doc = new HTMLDocument(); var root = new HTMLHtmlElement(); doc.AppendChild(root); var body = new HTMLBodyElement(); root.AppendChild(body); Assert.AreEqual(body, doc.Body); }
/// <summary> /// Adds the root element (html) to the document. /// </summary> /// <param name="token">The token which started this process.</param> void AddRoot(HtmlToken token) { var element = new HTMLHtmlElement(); doc.AppendChild(element); SetupElement(element, token, false); open.Add(element); tokenizer.AcceptsCharacterData = !element.IsInHtml; }
/// <summary> /// Switches to the fragment algorithm with the specified context element. /// </summary> /// <param name="context">The context element where the algorithm is applied to.</param> public void SwitchToFragment(Node context) { if (started) throw new InvalidOperationException("Fragment mode has to be activated before running the parser!"); switch (context.NodeName) { case HTMLTitleElement.Tag: case HTMLTextAreaElement.Tag: { tokenizer.Switch(HtmlParseMode.RCData); break; } case HTMLStyleElement.Tag: case HTMLSemanticElement.XmpTag: case HTMLIFrameElement.Tag: case HTMLNoElement.NoEmbedTag: case HTMLNoElement.NoFramesTag: { tokenizer.Switch(HtmlParseMode.Rawtext); break; } case HTMLScriptElement.Tag: { tokenizer.Switch(HtmlParseMode.Script); break; } case HTMLNoElement.NoScriptTag: { if (doc.IsScripting) tokenizer.Switch(HtmlParseMode.Rawtext); break; } case HTMLSemanticElement.PlaintextTag: { tokenizer.Switch(HtmlParseMode.Plaintext); break; } } var root = new HTMLHtmlElement(); doc.AppendChild(root); open.Add(root); Reset(context); fragmentContext = context; tokenizer.AcceptsCharacterData = !AdjustedCurrentNode.IsInHtml; do { if (context is HTMLFormElement) { form = (HTMLFormElement)context; break; } context = context.ParentNode; } while (context != null); }
/// <summary> /// See 8.2.5.4.2 The "before html" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void BeforeHtml(HtmlToken token) { if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); } else if (token.Type == HtmlTokenType.Comment) { AddComment(doc, token); } else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name == HTMLHtmlElement.Tag) { var element = new HTMLHtmlElement(); AddElementToDocument(element, token); //TODO //If the Document is being loaded as part of navigation of a browsing context, then: // if the newly created element has a manifest attribute whose value is not the empty string, // then resolve the value of that attribute to an absolute URL, relative to the newly created element, // and if that is successful, run the application cache selection algorithm with the result of applying // the URL serializer algorithm to the resulting parsed URL with the exclude fragment flag set; // otherwise, if there is no such attribute, or its value is the empty string, or resolving its value fails, // run the application cache selection algorithm with no manifest. The algorithm must be passed the Document object. insert = HtmlTreeMode.BeforeHead; } else if (token.Type == HtmlTokenType.EndTag && !(((HtmlTagToken)token).Name.IsHtmlBodyOrBreakRowElement(true))) { RaiseErrorOccurred(ErrorCode.TagCannotEndHere); } else if(!token.IsIgnorable) { var element = new HTMLHtmlElement(); AddElementToDocument(element, HtmlToken.OpenTag(HTMLHtmlElement.Tag)); //TODO //If the Document is being loaded as part of navigation of a browsing context, then: // run the application cache selection algorithm with no manifest, passing it the Document object. insert = HtmlTreeMode.BeforeHead; BeforeHead(token); } }