Exemple #1
0
        public void TreeMisnestedTagsHeisenbergWithFurthest()
        {
            //8.2.8.2 Misnested tags: <b><p></b></p>
            var doc = DocumentBuilder.Html(@"<b>1<p>2</b>3</p>");

            var tree = new HTMLHtmlElement()
                .AppendChild(new HTMLHeadElement()).ParentNode
                .AppendChild(new HTMLBodyElement())
                    .AppendChild(new HTMLElement() { NodeName = "b" })
                        .AppendChild(new TextNode("1")).ParentNode.ParentNode
                    .AppendChild(new HTMLParagraphElement())
                        .AppendChild(new HTMLElement() { NodeName = "b" })
                            .AppendChild(new TextNode("2")).ParentNode.ParentNode
                        .AppendChild(new TextNode("3")).ParentNode.ParentNode.ParentNode;

            Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml());
        }
Exemple #2
0
        public void TreeNonConformingTable()
        {
            //8.2.5.4.7 The "in body" insertion mode - "In the non-conforming ..."
            var doc = DocumentBuilder.Html(@"<a href=""a"">a<table><a href=""b"">b</table>x");

            var tree = new HTMLHtmlElement()
                .AppendChild(new HTMLHeadElement()).ParentNode
                .AppendChild(new HTMLBodyElement())
                    .AppendChild(new HTMLAnchorElement().SetAttribute("href", "a"))
                        .AppendChild(new TextNode("a")).ParentNode
                        .AppendChild(new HTMLAnchorElement().SetAttribute("href", "b"))
                            .AppendChild(new TextNode("b")).ParentNode.ParentNode
                        .AppendChild(new HTMLTableElement()).ParentNode.ParentNode
                    .AppendChild(new HTMLAnchorElement().SetAttribute("href", "b"))
                        .AppendChild(new TextNode("x")).ParentNode.ParentNode.ParentNode;

            Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml());
        }
Exemple #3
0
        public void TreeOneTextNodeTableBeforeABCD()
        {
            //One Text node before the table, containing "ABCD"
            var doc = DocumentBuilder.Html(@"A<table>B<tr>C</tr>D</table>");

            var tree = new HTMLHtmlElement()
                .AppendChild(new HTMLHeadElement()).ParentNode
                .AppendChild(new HTMLBodyElement())
                    .AppendChild(new TextNode("ABCD")).ParentNode
                    .AppendChild(new HTMLTableElement())
                        .AppendChild(new HTMLTableSectionElement())
                            .AppendChild(new HTMLTableRowElement()).ParentNode.ParentNode.ParentNode.ParentNode;

            Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml());
        }
Exemple #4
0
        public void TreeUnexpectedTableMarkup()
        {
            //8.2.8.3 Unexpected markup in tables
            var doc = DocumentBuilder.Html(@"<table><b><tr><td>aaa</td></tr>bbb</table>ccc");

            var tree = new HTMLHtmlElement()
                .AppendChild(new HTMLHeadElement()).ParentNode
                .AppendChild(new HTMLBodyElement())
                    .AppendChild(new HTMLElement() { NodeName = "b" }).ParentNode
                    .AppendChild(new HTMLElement() { NodeName = "b" })
                        .AppendChild(new TextNode("bbb")).ParentNode.ParentNode
                    .AppendChild(new HTMLTableElement())
                        .AppendChild(new HTMLTableSectionElement())
                            .AppendChild(new HTMLTableRowElement())
                                .AppendChild(new HTMLTableCellElement())
                                    .AppendChild(new TextNode("aaa")).ParentNode.ParentNode.ParentNode.ParentNode.ParentNode
                    .AppendChild(new HTMLElement() { NodeName = "b" })
                        .AppendChild(new TextNode("ccc")).ParentNode.ParentNode.ParentNode;

            Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml());
        }
Exemple #5
0
        public void TreeOneTextNodeTableBeforeAspaceBC()
        {
            //One Text node before the table, containing "A BC" (A-space-B-C), and one Text node inside the table (as a child of a tbody) with a single space character.
            var doc = DocumentBuilder.Html(@"A<table><tr> B</tr> </em>C</table>");

            var tree = new HTMLHtmlElement()
                .AppendChild(new HTMLHeadElement()).ParentNode
                .AppendChild(new HTMLBodyElement())
                    .AppendChild(new TextNode("A BC")).ParentNode
                    .AppendChild(new HTMLTableElement())
                        .AppendChild(new HTMLTableSectionElement())
                            .AppendChild(new HTMLTableRowElement()).ParentNode
                            .AppendChild(new TextNode(" ")).ParentNode.ParentNode.ParentNode.ParentNode;

            Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml());
        }
Exemple #6
0
        public void TreeUnclosedFormattingElements()
        {
            //8.2.8.6 Unclosed formatting elements
            var doc = DocumentBuilder.Html(@"<!DOCTYPE html>
            <p><b class=x><b class=x><b><b class=x><b class=x><b>X<p>X<p><b><b class=x><b>X<p></b></b></b></b></b></b>X");

            var tree = new HTMLHtmlElement()
                .AppendChild(new HTMLHeadElement()).ParentNode
                .AppendChild(new HTMLBodyElement())
                    .AppendChild(new HTMLParagraphElement())
                        .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x"))
                            .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x"))
                                .AppendChild(new HTMLElement() { NodeName = "b" })
                                    .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x"))
                                        .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x"))
                                            .AppendChild(new HTMLElement() { NodeName = "b" })
                                                .AppendChild(new TextNode("X")).ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode
                    .AppendChild(new HTMLParagraphElement())
                        .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x"))
                            .AppendChild(new HTMLElement() { NodeName = "b" })
                                .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x"))
                                    .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x"))
                                        .AppendChild(new HTMLElement() { NodeName = "b" })
                                            .AppendChild(new TextNode("X")).ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode
                    .AppendChild(new HTMLParagraphElement())
                        .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x"))
                            .AppendChild(new HTMLElement() { NodeName = "b" })
                                .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x"))
                                    .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x"))
                                        .AppendChild(new HTMLElement() { NodeName = "b" })
                                            .AppendChild(new HTMLElement() { NodeName = "b" })
                                                .AppendChild(new HTMLElement() { NodeName = "b" }.SetAttribute("class", "x"))
                                                    .AppendChild(new HTMLElement() { NodeName = "b" })
                                                        .AppendChild(new TextNode("X")).ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode
                    .AppendChild(new HTMLParagraphElement())
                        .AppendChild(new TextNode("X")).ParentNode.ParentNode.ParentNode;

            Assert.AreEqual(tree.ToHtml(), doc.DocumentElement.ToHtml());
        }
Exemple #7
0
 public void HtmlHasRightHeadElement()
 {
     var doc = new HTMLDocument();
     var root = new HTMLHtmlElement();
     doc.AppendChild(root);
     var head = new HTMLHeadElement();
     root.AppendChild(head);
     Assert.AreEqual(head, doc.Head);
 }
Exemple #8
0
 public void HtmlHasRightBodyElement()
 {
     var doc = new HTMLDocument();
     var root = new HTMLHtmlElement();
     doc.AppendChild(root);
     var body = new HTMLBodyElement();
     root.AppendChild(body);
     Assert.AreEqual(body, doc.Body);
 }
Exemple #9
0
 /// <summary>
 /// Adds the root element (html) to the document.
 /// </summary>
 /// <param name="token">The token which started this process.</param>
 void AddRoot(HtmlToken token)
 {
     var element = new HTMLHtmlElement();
     doc.AppendChild(element);
     SetupElement(element, token, false);
     open.Add(element);
     tokenizer.AcceptsCharacterData = !element.IsInHtml;
 }
Exemple #10
0
        /// <summary>
        /// Switches to the fragment algorithm with the specified context element.
        /// </summary>
        /// <param name="context">The context element where the algorithm is applied to.</param>
        public void SwitchToFragment(Node context)
        {
            if (started)
                throw new InvalidOperationException("Fragment mode has to be activated before running the parser!");

            switch (context.NodeName)
            {
                case HTMLTitleElement.Tag:
                case HTMLTextAreaElement.Tag:
                {
                    tokenizer.Switch(HtmlParseMode.RCData);
                    break;
                }
                case HTMLStyleElement.Tag:
                case HTMLSemanticElement.XmpTag:
                case HTMLIFrameElement.Tag:
                case HTMLNoElement.NoEmbedTag:
                case HTMLNoElement.NoFramesTag:
                {
                    tokenizer.Switch(HtmlParseMode.Rawtext);
                    break;
                }
                case HTMLScriptElement.Tag:
                {
                    tokenizer.Switch(HtmlParseMode.Script);
                    break;
                }
                case HTMLNoElement.NoScriptTag:
                {
                    if (doc.IsScripting)
                        tokenizer.Switch(HtmlParseMode.Rawtext);

                    break;
                }
                case HTMLSemanticElement.PlaintextTag:
                {
                    tokenizer.Switch(HtmlParseMode.Plaintext);
                    break;
                }
            }

            var root = new HTMLHtmlElement();
            doc.AppendChild(root);
            open.Add(root);
            Reset(context);

            fragmentContext = context;
            tokenizer.AcceptsCharacterData = !AdjustedCurrentNode.IsInHtml;

            do
            {
                if (context is HTMLFormElement)
                {
                    form = (HTMLFormElement)context;
                    break;
                }

                context = context.ParentNode;
            }
            while (context != null);
        }
        /// <summary>
        /// See 8.2.5.4.2 The "before html" insertion mode.
        /// </summary>
        /// <param name="token">The passed token.</param>
        void BeforeHtml(HtmlToken token)
        {
            if (token.Type == HtmlTokenType.DOCTYPE)
            {
                RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate);
            }
            else if (token.Type == HtmlTokenType.Comment)
            {
                AddComment(doc, token);
            }
            else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name == HTMLHtmlElement.Tag)
            {
                var element = new HTMLHtmlElement();
                AddElementToDocument(element, token);

                //TODO
                //If the Document is being loaded as part of navigation of a browsing context, then:
                //  if the newly created element has a manifest attribute whose value is not the empty string,
                //    then resolve the value of that attribute to an absolute URL, relative to the newly created element,
                //    and if that is successful, run the application cache selection algorithm with the result of applying
                //    the URL serializer algorithm to the resulting parsed URL with the exclude fragment flag set;
                //  otherwise, if there is no such attribute, or its value is the empty string, or resolving its value fails,
                //    run the application cache selection algorithm with no manifest. The algorithm must be passed the Document object.
                insert = HtmlTreeMode.BeforeHead;
            }
            else if (token.Type == HtmlTokenType.EndTag && !(((HtmlTagToken)token).Name.IsHtmlBodyOrBreakRowElement(true)))
            {
                RaiseErrorOccurred(ErrorCode.TagCannotEndHere);
            }
            else if(!token.IsIgnorable)
            {
                var element = new HTMLHtmlElement();
                AddElementToDocument(element, HtmlToken.OpenTag(HTMLHtmlElement.Tag));
                //TODO
                //If the Document is being loaded as part of navigation of a browsing context, then:
                //  run the application cache selection algorithm with no manifest, passing it the Document object.
                insert = HtmlTreeMode.BeforeHead;
                BeforeHead(token);
            }
        }