public void testHeaderContents()
        {
            // h* tags (h1 .. h9) in browsers can handle any internal content other than other h*. which is not per any
            // spec, which defines them as containing phrasing content only. so, reality over theory.
            Document doc = NSoupClient.Parse("<h1>Hello <div>There</div> now</h1> <h2>More <h3>Content</h3></h2>");

            Assert.AreEqual("<h1>Hello <div>There</div> now</h1> <h2>More </h2><h3>Content</h3>", TextUtil.StripNewLines(doc.Body.Html()));
        }
        public void handlesMisnestedTagsBP()
        {
            //  whatwg: <b><p></b></p>
            string   h   = "<b>1<p>2</b>3</p>";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual("<b>1</b>\n<p><b>2</b>3</p>", doc.Body.Html());
        }
        public void findsCharsetInMalformedMeta()
        {
            string h = "<meta http-equiv=Content-Type content=text/html; charset=gb2312>";
            // example cited for reason of html5's <meta charset> element
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual("gb2312", doc.Select("meta").Attr("charset"));
        }
        public void normalisesDocument()
        {
            string   h   = "<!doctype html>One<html>Two<head>Three<link></head>Four<body>Five </body>Six </html>Seven ";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual("<!DOCTYPE html><html><head></head><body>OneTwoThree<link />FourFive Six Seven </body></html>",
                            TextUtil.StripNewLines(doc.Html()));
        }
        public void ignoresContentAfterFrameset()
        {
            string   h   = "<html><head><title>One</title></head><frameset><frame /><frame /></frameset><table></table></html>";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual("<html><head><title>One</title></head><frameset><frame /><frame /></frameset></html>", TextUtil.StripNewLines(doc.Html()));
            // no body, no table. No crash!
        }
        public void reconstructFormattingElements()
        {
            // tests attributes and multi b
            string   h   = "<p><b class=one>One <i>Two <b>Three</p><p>Hello</p>";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual("<p><b class=\"one\">One <i>Two <b>Three</b></i></b></p>\n<p><b class=\"one\"><i><b>Hello</b></i></b></p>", doc.Body.Html());
        }
Exemple #7
0
        public void dropsUnterminatedAttribute()
        {
            // NSoupClient used to parse this to <p id="foo">, but whatwg, webkit will drop.
            string   h1  = "<p id=\"foo";
            Document doc = NSoupClient.Parse(h1);

            Assert.AreEqual("", doc.Text());
        }
        public void handlesSolidusAtAttributeEnd()
        {
            // this test makes sure [<a href=/>link</a>] is parsed as [<a href="/">link</a>], not [<a href="" /><a>link</a>]
            string   h   = "<a href=/>link</a>";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual("<a href=\"/\">link</a>", doc.Body.Html());
        }
        public void handlesMultiClosingBody()
        {
            string   h   = "<body><p>Hello</body><p>there</p></body></body></html><p>now";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual(3, doc.Select("p").Count);
            Assert.AreEqual(3, doc.Body.Children.Count);
        }
Exemple #10
0
        public void handlesUnknownNamespaceTags()
        {
            // note that the first foo:bar should not really be allowed to be self closing, if parsed in html mode.
            string   h   = "<foo:bar id='1' /><abc:def id=2>Foo<p>Hello</p></abc:def><foo:bar>There</foo:bar>";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual("<foo:bar id=\"1\" /><abc:def id=\"2\">Foo<p>Hello</p></abc:def><foo:bar>There</foo:bar>", TextUtil.StripNewLines(doc.Body.Html()));
        }
        public void traverse()
        {
            Document      doc   = NSoupClient.Parse("<div><p>Hello</p></div><div>There</div>");
            StringBuilder accum = new StringBuilder();

            doc.Select("div").First.Traverse(new TestNodeVisitor(accum));
            Assert.AreEqual("<div><p><#text></#text></p></div>", accum.ToString());
        }
Exemple #12
0
        public void handlesTextArea()
        {
            Document doc = NSoupClient.Parse("<textarea>Hello</textarea>");
            Elements els = doc.Select("textarea");

            Assert.AreEqual("Hello", els.Text);
            Assert.AreEqual("Hello", els.Val());
        }
Exemple #13
0
        public void handlesMisnestedTagsBI()
        {
            // whatwg: <b><i></b></i>
            string   h   = "<p>1<b>2<i>3</b>4</i>5</p>";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual("<p>1<b>2<i>3</i></b><i>4</i>5</p>", doc.Body.Html());
            // adoption agency on </b>, reconstruction of formatters on 4.
        }
Exemple #14
0
        public void handlesBaseWithoutHref()
        {
            string   h   = "<head><base target='_blank'></head><body><a href=/foo>Test</a></body>";
            Document doc = NSoupClient.Parse(h, "http://example.com/");
            Element  a   = doc.Select("a").First;

            Assert.AreEqual("/foo", a.Attr("href"));
            Assert.AreEqual("http://example.com/foo", a.Attr("abs:href"));
        }
Exemple #15
0
        public void handlesFrames()
        {
            string   h   = "<html><head><script></script><noscript></noscript></head><frameset><frame src=foo></frame><frame src=foo></frameset></html>";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual("<html><head><script></script><noscript></noscript></head><frameset><frame src=\"foo\" /><frame src=\"foo\" /></frameset></html>",
                            TextUtil.StripNewLines(doc.Html()));
            // no body auto vivification
        }
Exemple #16
0
        public void discardsNakedTds()
        {
            // NSoupClient used to make this into an implicit table; but browsers make it into a text run
            string   h   = "<td>Hello<td><p>There<p>now";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual("Hello<p>There</p><p>now</p>", TextUtil.StripNewLines(doc.Body.Html()));
            // <tbody> is introduced if no implicitly creating table, but allows tr to be directly under table
        }
Exemple #17
0
        public void testClonesDeclarations()
        {
            Document doc   = NSoupClient.Parse("<!DOCTYPE html><html><head><title>Doctype test");
            Document clone = (Document)doc.Clone();

            Assert.AreEqual(doc.Html(), clone.Html());
            Assert.AreEqual("<!DOCTYPE html><html><head><title>Doctype test</title></head><body></body></html>",
                            TextUtil.StripNewLines(clone.Html()));
        }
Exemple #18
0
        public void handlesWhitespaceInoDocType()
        {
            string html = "<!DOCTYPE html\r\n" +
                          "      PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\r\n" +
                          "      \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
            Document doc = NSoupClient.Parse(html);

            Assert.AreEqual("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">", doc.ChildNodes[0].OuterHtml());
        }
Exemple #19
0
        public void createsStructureFromBodySnippet()
        {
            // the bar baz stuff naturally goes into the body, but the 'foo' goes into root, and the normalisation routine
            // needs to move into the start of the body
            string   html = "foo <b>bar</b> baz";
            Document doc  = NSoupClient.Parse(html);

            Assert.AreEqual("foo bar baz", doc.Text());
        }
 public void testSupplyParserToDataStream()
 {
     using (Stream input = getFile("Test.htmltests.xml-test.xml"))
     {
         Document doc = NSoupClient.Parse(input, null, "http://foo.com", NSoup.Parse.Parser.XmlParser());
         Assert.AreEqual("<doc><val>One<val>Two</val>Three</val></doc>",
                         TextUtil.StripNewLines(doc.Html()));
     }
 }
Exemple #21
0
        public void relaxedBaseEntityMatchAndStrictExtendedMatch()
        {
            // extended entities need a ; at the end to match, base does not
            string   html = "&amp &quot &reg &icy &hopf &icy; &hopf;";
            Document doc  = NSoupClient.Parse(html);

            doc.OutputSettings().EscapeMode = Entities.EscapeMode.Extended; // modifies output only to clarify test
            Assert.AreEqual(StringUtil.NormaliseWhitespace("&amp; &quot; &reg; &amp;icy &amp;hopf &icy; &hopf;"), doc.Body.Html());
        }
Exemple #22
0
        public void doesNotFindShortestMatchingEntity()
        {
            // previous behaviour was to identify a possible entity, then chomp down the string until a match was found.
            // (as defined in html5.) However in practise that lead to spurious matches against the author's intent.
            string   html = "One &clubsuite; &clubsuit;";
            Document doc  = NSoupClient.Parse(html);

            Assert.AreEqual(StringUtil.NormaliseWhitespace("One &amp;clubsuite; ג™£"), doc.Body.Html());
        }
Exemple #23
0
        public void letterDigitEntities()
        {
            string   html = "<p>&sup1;&sup2;&sup3;&frac14;&frac12;&frac34;</p>";
            Document doc  = NSoupClient.Parse(html);
            Element  p    = doc.Select("p").First;

            Assert.AreEqual("&sup1;&sup2;&sup3;&frac14;&frac12;&frac34;", p.Html());
            Assert.AreEqual("¹²³¼½¾", p.Text());
        }
Exemple #24
0
        public void handlesUnexpectedMarkupInTables()
        {
            // whatwg - tests markers in active formatting (if they didn't work, would get in in table)
            // also tests foster parenting
            string   h   = "<table><b><tr><td>aaa</td></tr>bbb</table>ccc";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual("<b></b><b>bbb</b><table><tbody><tr><td>aaa</td></tr></tbody></table><b>ccc</b>", TextUtil.StripNewLines(doc.Body.Html()));
        }
Exemple #25
0
        public void handlesSpanInTBody()
        {
            // test for bug 64
            string   h   = "<table><tbody><span class='1'><tr><td>One</td></tr><tr><td>Two</td></tr></span></tbody></table>";
            Document doc = NSoupClient.Parse(h);

            Assert.AreEqual(doc.Select("span").First.Children.Count, 0); // the span gets closed
            Assert.AreEqual(doc.Select("table").Count, 1);               // only one table
        }
Exemple #26
0
        public void handlesEscapedData()
        {
            string   html = "<div title='Surf &amp; Turf'>Reef &amp; Beef</div>";
            Document doc  = NSoupClient.Parse(html);
            Element  div  = doc.GetElementsByTag("div")[0];

            Assert.AreEqual("Surf & Turf", div.Attr("title"));
            Assert.AreEqual("Reef & Beef", div.Text());
        }
Exemple #27
0
        public void wrapDiv()
        {
            string   h   = "<p><b>This</b> is <b>jsoup</b>.</p> <p>How do you like it?</p>";
            Document doc = NSoupClient.Parse(h);

            doc.Select("p").Wrap("<div></div>");
            Assert.AreEqual("<div><p><b>This</b> is <b>jsoup</b>.</p></div> <div><p>How do you like it?</p></div>",
                            TextUtil.StripNewLines(doc.Body.Html()));
        }
        public String Down(int index)
        {
            String   temp = WebHelper.GetHtmlCodeByWebClientWithGzip(this.Items[index].Url, "utf-8");
            Document doc  = NSoupClient.Parse(temp);
            Element  et   = doc.Select("a#fn_pc_download").First;
            String   down = et.Attr("_href");

            return(down);
        }
Exemple #29
0
        public void handlesNewLinesAndReturns()
        {
            string  html = "<a\r\nfoo='bar\r\nqux'\r\nbar\r\n=\r\ntwo>One</a>";
            Element el   = NSoupClient.Parse(html).Select("a").First;

            Assert.AreEqual(2, el.Attributes.Count);
            Assert.AreEqual("bar\r\nqux", el.Attr("foo")); // currently preserves newlines in quoted attributes. todo confirm if should.
            Assert.AreEqual("two", el.Attr("bar"));
        }
Exemple #30
0
        public void parsesUnterminatedTextarea()
        {
            // don't parse right to end, but break on <p>
            Document doc = NSoupClient.Parse("<body><p><textarea>one<p>two");
            Element  t   = doc.Select("textarea").First;

            Assert.AreEqual("one", t.Text());
            Assert.AreEqual("two", doc.Select("p")[1].Text());
        }