Пример #1
0
        public void CompareLowerCulture()
        {
            string   html  = File.ReadAllText(Path.Combine(_contentDirectory, "regression.html"));
            HtmlNode node1 = null;
            // Test 1
            CultureInfo cul1 = CultureInfo.CreateSpecificCulture("en-US");

            Thread.CurrentThread.CurrentCulture = cul1;
            Candy.HtmlDocument doc1 = new Candy.HtmlDocument();
            doc1.LoadHtml(html);

            node1 = doc1.DocumentNode.SelectSingleNode("//div[@id='mainContents']/h2");

            CultureInfo cul2 = CultureInfo.CreateSpecificCulture("tr-TR");

            Thread.CurrentThread.CurrentCulture = cul2;
            Candy.HtmlDocument doc2 = new Candy.HtmlDocument();
            doc2.LoadHtml(html);
            var s = doc2.DocumentNode.OuterHtml;

            HtmlNode node2 = doc2.DocumentNode.SelectSingleNode("//div[@id='mainContents']/h2");

            if (node1?.InnerHtml == node2?.InnerHtml)
            {
                Assert.AreEqual(node1?.InnerHtml, node2?.InnerHtml);
            }
            Assert.AreEqual(0, doc2.DocumentNode.OwnerDocument.ParseErrors.Count());
        }
Пример #2
0
        public void TestCommentNode()
        {
            var html =
                @"<!DOCTYPE html>
<html>
<body>
<!--title='Title' >
<!--title='Title'--!>
<!--title = 'Title'-->
<!--title='Title'--!>
<h1>This is <b>bold</b> headddding</h1>
	<p>This is <u>underlinyed</u> paragraph</p>
	<h2>This is <i>italic</i> heading</h2>
</body>
</html> ";

            var htmlDoc = new Candy.HtmlDocument();

            htmlDoc.LoadHtml(html);

            var h1       = htmlDoc.DocumentNode.SelectNodes("//h1");
            var comments = htmlDoc.DocumentNode.SelectNodes("//comment()");

            Assert.AreEqual(h1.Count, 1);
            Assert.AreEqual(comments.Count, 4);
        }
Пример #3
0
        public void ChangesToPHandlingFalse()
        {
            HtmlDocument.DisableBehaviorTagP = false;

            var input = "<p>Begin<div>Inner</div>End</p>";
            var doc   = new Candy.HtmlDocument();

            doc.LoadHtml(input);
            var docNode = doc.DocumentNode;

            try
            {
                // Check tree is correct
                Assert.AreEqual(1, docNode.ChildNodes.Count);
                Assert.AreEqual("p", docNode.ChildNodes[0].Name);

                var pRootNode = docNode.ChildNodes[0];
                Assert.AreEqual(3, pRootNode.ChildNodes.Count);
                Assert.AreEqual("Begin", pRootNode.ChildNodes[0].InnerText);
                Assert.AreEqual("Inner", pRootNode.ChildNodes[1].InnerText);
                Assert.AreEqual("End", pRootNode.ChildNodes[2].InnerText);
            }
            catch (Exception e)
            {
                throw;
            }
            finally
            {
                HtmlDocument.DisableBehaviorTagP = true;
            }
        }
Пример #4
0
        public void checkAttributForTextComment()
        {
            var doc = new Candy.HtmlDocument();

            doc.LoadHtml(@"<html><body><div id='foo'><span> some</span> text</div></body></html>");
            var       div       = doc.GetElementbyId("foo");
            int       count     = 0;
            Exception exception = null;

            foreach (var textNode in div.ChildNodes)
            {
                try
                {
                    textNode.Id = "1";
                    count++;
                }
                catch (Exception e)
                {
                    exception = e;
                }
            }

            Assert.AreEqual(count, 1);
            Assert.IsNotNull(exception);
        }
Пример #5
0
        public void TestEmptyTag_Single()
        {
            var html = "<img src=\"x\"/onerror=\"alert('onerror1')\"><img/src=\"x\"/onerror=\"alert('onerror2')\">";
            var doc  = new Candy.HtmlDocument();

            doc.LoadHtml(html);

            Assert.AreEqual(@"<img src=""x"" onerror=""alert('onerror1')""><img src=""x"" onerror=""alert('onerror2')"">", doc.DocumentNode.OuterHtml);
        }
Пример #6
0
        public void ScriptingText()
        {
            var html = @"<?xml version=""1.0"" encoding=""UTF-8"" ?>
<html xmlns=""http://www.w3.org/1999/xhtml"">
<head>
    <title>SEE title</title>
	<script>SEE script </script>
	<style>SEE style</style>
</head>
<body>
<script>NOTSEE script</script>
<div>222<script>NOTSEE script</script>
<style>NOTSEE style</style></div>
</body>
</html>";

            {
                Candy.HtmlDocument htmlDocument = new Candy.HtmlDocument();
                htmlDocument.LoadHtml(html);

                var content1 = htmlDocument.DocumentNode.SelectSingleNode("//head").InnerText;
                var content2 = htmlDocument.DocumentNode.SelectSingleNode("//script").InnerText;
                var content3 = htmlDocument.DocumentNode.SelectSingleNode("//style").InnerText;
                var content4 = htmlDocument.DocumentNode.SelectSingleNode("//body").InnerText;
                var content5 = htmlDocument.DocumentNode.SelectSingleNode("//html").InnerText;
                var content6 = htmlDocument.DocumentNode.SelectSingleNode("//body/script").InnerText;

                Assert.AreEqual("\r\n    SEE title\r\n\tSEE script \r\n\tSEE style\r\n", content1);
                Assert.AreEqual("SEE script ", content2);
                Assert.AreEqual("SEE style", content3);
                Assert.AreEqual("\r\n\r\n222\r\n\r\n", content4);
                Assert.AreEqual("\r\n\r\n    SEE title\r\n\t\r\n\t\r\n\r\n\r\n\r\n222\r\n\r\n\r\n", content5);
                Assert.AreEqual("NOTSEE script", content6);
            }

            {
                Candy.HtmlDocument htmlDocument = new Candy.HtmlDocument();
                htmlDocument.BackwardCompatibility = false;
                htmlDocument.LoadHtml(html);

                var content1 = htmlDocument.DocumentNode.SelectSingleNode("//head").InnerText;
                var content2 = htmlDocument.DocumentNode.SelectSingleNode("//script").InnerText;
                var content3 = htmlDocument.DocumentNode.SelectSingleNode("//style").InnerText;
                var content4 = htmlDocument.DocumentNode.SelectSingleNode("//body").InnerText;
                var content5 = htmlDocument.DocumentNode.SelectSingleNode("//html").InnerText;
                var content6 = htmlDocument.DocumentNode.SelectSingleNode("//body/script").InnerText;

                Assert.AreEqual("    SEE titleSEE script SEE style", content1);
                Assert.AreEqual("SEE script ", content2);
                Assert.AreEqual("SEE style", content3);
                Assert.AreEqual("222", content4);
                Assert.AreEqual("    SEE title222", content5);
                Assert.AreEqual("NOTSEE script", content6);
            }
        }
Пример #7
0
        public void testTEXTAREA()
        {
            {
                var doc = new Candy.HtmlDocument();
                doc.LoadHtml(@"<script><div>hello</div></script><TEXTAREA>Text in the <div>hello</div>area</TEXTAREA>");
                HtmlNodeCollection divs = doc.DocumentNode.SelectNodes("//div");

                Assert.IsNull(divs);

                HtmlNode ta = doc.DocumentNode.SelectSingleNode("//textarea");
                Assert.IsTrue(ta.InnerHtml.Contains("div"));
            }
        }
Пример #8
0
        public void OverFlowNotEndTag()
        {
            string   html  = File.ReadAllText(Path.Combine(_contentDirectory, "overflow.html"));
            HtmlNode node1 = null;

            // Test 1

            Candy.HtmlDocument doc1 = new Candy.HtmlDocument();
            doc1.LoadHtml(html);

            Assert.AreEqual(15, doc1.DocumentNode.ChildNodes[4].ChildNodes.Count);

            Assert.AreEqual(0, doc1.DocumentNode.OwnerDocument.ParseErrors.Count());
        }
Пример #9
0
        public void ChangesToPHandling()
        {
            var input = "<p>Begin<div>Inner</div>End</p>";
            var doc   = new Candy.HtmlDocument();

            doc.LoadHtml(input);
            var docNode = doc.DocumentNode;

            // Check tree is correct

            Assert.AreEqual(3, docNode.ChildNodes.Count);
            Assert.AreEqual("p", docNode.ChildNodes[0].Name);
            Assert.AreEqual("div", docNode.ChildNodes[1].Name);
            Assert.AreEqual("#text", docNode.ChildNodes[2].Name);
            Assert.AreEqual("Begin", docNode.ChildNodes[0].InnerText);
            Assert.AreEqual("Inner", docNode.ChildNodes[1].InnerText);
            Assert.AreEqual("End", docNode.ChildNodes[2].InnerText);
        }
Пример #10
0
        public void SanitizeXmlElementNameWithColon()
        {
            var input   = @"<RootElement xmlns:MyNamespace=""value"">
  <value:element />
</RootElement>";
            var htmlDoc = new Candy.HtmlDocument();

            htmlDoc.LoadHtml(input);
            htmlDoc.OptionDefaultStreamEncoding = System.Text.Encoding.UTF8;
            htmlDoc.OptionOutputAsXml           = true;
            htmlDoc.OptionOutputOriginalCase    = true;
            var xmlDoc = htmlDoc.DocumentNode.WriteTo();

            var expected = @"<?xml version=""1.0"" encoding=""utf-8""?>" +
                           @"<RootElement xmlns:MyNamespace=""value"">
  <_value3a_element></_value3a_element>
</RootElement>";

            Assert.AreEqual(expected, xmlDoc);
        }
Пример #11
0
        public void TextInsideScriptTagShouldHaveCorrectStreamPosition()
        {
            {
                var document = new HtmlDocument();
                document.LoadHtml(@"<scrapt>foo</scrapt>");
                var scraptText = document.DocumentNode.FirstChild.FirstChild;
                Assert.AreEqual(8, scraptText.StreamPosition);
                Assert.AreEqual(1, scraptText.Line);
                Assert.AreEqual(8, scraptText.LinePosition);
            }
            {
                var document = new HtmlDocument();
                document.LoadHtml(@"<script>foo</script>");
                var scriptText = document.DocumentNode.FirstChild.FirstChild;
                Assert.AreEqual(8, scriptText.StreamPosition);
                Assert.AreEqual(1, scriptText.Line);
                Assert.AreEqual(8, scriptText.LinePosition);
            }
            {
                var document = new Candy.HtmlDocument();
                document.LoadHtml(@"
<scrapt>foo</scrapt>");
                var scraptText = document.DocumentNode.LastChild.FirstChild;
                //   var aa = scraptText.FirstChild;
                Assert.AreEqual(10, scraptText.StreamPosition);
                Assert.AreEqual(2, scraptText.Line);
                Assert.AreEqual(8, scraptText.LinePosition);
            }


            {
                var document = new Candy.HtmlDocument();
                document.LoadHtml(@"
<script>foo</script>");
                var scriptText = document.DocumentNode.LastChild.FirstChild;
                Assert.AreEqual(10, scriptText.StreamPosition);
                Assert.AreEqual(2, scriptText.Line);
                Assert.AreEqual(8, scriptText.LinePosition);
            }
        }
Пример #12
0
        public void AttributeValue()
        {
            {
                Assert.AreEqual("&quot;&#39;", WebUtility.HtmlEncode("\"'"));
            }
            {
                var input = "<div>z</div>";
                var doc   = new Candy.HtmlDocument();
                doc.BackwardCompatibility = true;
                doc.LoadHtml(input);
                var divNode = doc.DocumentNode.ChildNodes[0];

                divNode.Attributes.Add("name", "value1value2");
                Assert.AreEqual("value1value2", divNode.Attributes[0].Value);
                Assert.AreEqual("<div name=\"value1value2\">z</div>", doc.DocumentNode.InnerHtml);
            }

            {
                var input = "<div>z</div>";
                var doc   = new Candy.HtmlDocument();
                doc.BackwardCompatibility = true;
                doc.LoadHtml(input);
                var divNode = doc.DocumentNode.ChildNodes[0];

                divNode.Attributes.Add("name", "value1\"value2");
                Assert.AreEqual("value1\"value2", divNode.Attributes[0].Value);
                Assert.AreEqual("<div name=\"value1&quot;value2\">z</div>", doc.DocumentNode.InnerHtml);
            }

            {
                var input = "<div>z</div>";
                var doc   = new Candy.HtmlDocument();
                doc.BackwardCompatibility = true;
                doc.LoadHtml(input);
                var divNode = doc.DocumentNode.ChildNodes[0];

                divNode.Attributes.Add("name", "value1&quot;value2");
                Assert.AreEqual("value1&quot;value2", divNode.Attributes[0].Value);
                Assert.AreEqual("<div name=\"value1&quot;value2\">z</div>", doc.DocumentNode.InnerHtml);
            }

            {
                var input = "<div>z</div>";
                var doc   = new Candy.HtmlDocument();
                doc.BackwardCompatibility = true;
                doc.LoadHtml(input);
                var divNode = doc.DocumentNode.ChildNodes[0];

                divNode.Attributes.Add("name", "value1'value2");
                divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote;
                Assert.AreEqual("value1'value2", divNode.Attributes[0].Value);
                Assert.AreEqual("<div name='value1&#39;value2'>z</div>", doc.DocumentNode.InnerHtml);
            }

            {
                var input = "<div>z</div>";
                var doc   = new Candy.HtmlDocument();
                doc.BackwardCompatibility = true;
                doc.LoadHtml(input);
                var divNode = doc.DocumentNode.ChildNodes[0];

                divNode.Attributes.Add("name", "value1&#39;value2");
                divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote;
                Assert.AreEqual("value1&#39;value2", divNode.Attributes[0].Value);
                Assert.AreEqual("<div name='value1&#39;value2'>z</div>", doc.DocumentNode.InnerHtml);
            }

            {
                var input = "<div>z</div>";
                var doc   = new Candy.HtmlDocument();
                doc.BackwardCompatibility = false;
                doc.LoadHtml(input);
                var divNode = doc.DocumentNode.ChildNodes[0];

                divNode.Attributes.Add("name", "value1value2");
                Assert.AreEqual("value1value2", divNode.Attributes[0].Value);
                Assert.AreEqual("<div name=\"value1value2\">z</div>", doc.DocumentNode.InnerHtml);
            }

            {
                var input = "<div>z</div>";
                var doc   = new Candy.HtmlDocument();
                doc.BackwardCompatibility = false;
                doc.LoadHtml(input);
                var divNode = doc.DocumentNode.ChildNodes[0];

                divNode.Attributes.Add("name", "value1\"value2");
                Assert.AreEqual("value1\"value2", divNode.Attributes[0].Value);
                Assert.AreEqual("<div name=\"value1&quot;value2\">z</div>", doc.DocumentNode.InnerHtml);
            }

            {
                var input = "<div>z</div>";
                var doc   = new Candy.HtmlDocument();
                doc.BackwardCompatibility = false;
                doc.LoadHtml(input);
                var divNode = doc.DocumentNode.ChildNodes[0];

                divNode.Attributes.Add("name", "value1&quot;value2");
                Assert.AreEqual("value1&quot;value2", divNode.Attributes[0].Value);
                Assert.AreEqual("<div name=\"value1&quot;value2\">z</div>", doc.DocumentNode.InnerHtml);
            }

            {
                var input = "<div>z</div>";
                var doc   = new Candy.HtmlDocument();
                doc.BackwardCompatibility = false;
                doc.LoadHtml(input);
                var divNode = doc.DocumentNode.ChildNodes[0];

                divNode.Attributes.Add("name", "value1'value2");
                divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote;
                Assert.AreEqual("value1'value2", divNode.Attributes[0].Value);
                Assert.AreEqual("<div name='value1&#39;value2'>z</div>", doc.DocumentNode.InnerHtml);
            }

            {
                var input = "<div>z</div>";
                var doc   = new Candy.HtmlDocument();
                doc.BackwardCompatibility = false;
                doc.LoadHtml(input);
                var divNode = doc.DocumentNode.ChildNodes[0];

                divNode.Attributes.Add("name", "value1&#39;value2");
                divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote;
                Assert.AreEqual("value1&#39;value2", divNode.Attributes[0].Value);
                Assert.AreEqual("<div name='value1&#39;value2'>z</div>", doc.DocumentNode.InnerHtml);
            }
        }