public void CompareLowerCulture() { string html = File.ReadAllText(Path.Combine(_contentDirectory, "regression.html")); HtmlNode node1 = null; // Test 1 CultureInfo cul1 = CultureInfo.CreateSpecificCulture("en-US"); Thread.CurrentThread.CurrentCulture = cul1; Candy.HtmlDocument doc1 = new Candy.HtmlDocument(); doc1.LoadHtml(html); node1 = doc1.DocumentNode.SelectSingleNode("//div[@id='mainContents']/h2"); CultureInfo cul2 = CultureInfo.CreateSpecificCulture("tr-TR"); Thread.CurrentThread.CurrentCulture = cul2; Candy.HtmlDocument doc2 = new Candy.HtmlDocument(); doc2.LoadHtml(html); var s = doc2.DocumentNode.OuterHtml; HtmlNode node2 = doc2.DocumentNode.SelectSingleNode("//div[@id='mainContents']/h2"); if (node1?.InnerHtml == node2?.InnerHtml) { Assert.AreEqual(node1?.InnerHtml, node2?.InnerHtml); } Assert.AreEqual(0, doc2.DocumentNode.OwnerDocument.ParseErrors.Count()); }
public void TestCommentNode() { var html = @"<!DOCTYPE html> <html> <body> <!--title='Title' > <!--title='Title'--!> <!--title = 'Title'--> <!--title='Title'--!> <h1>This is <b>bold</b> headddding</h1> <p>This is <u>underlinyed</u> paragraph</p> <h2>This is <i>italic</i> heading</h2> </body> </html> "; var htmlDoc = new Candy.HtmlDocument(); htmlDoc.LoadHtml(html); var h1 = htmlDoc.DocumentNode.SelectNodes("//h1"); var comments = htmlDoc.DocumentNode.SelectNodes("//comment()"); Assert.AreEqual(h1.Count, 1); Assert.AreEqual(comments.Count, 4); }
public void ChangesToPHandlingFalse() { HtmlDocument.DisableBehaviorTagP = false; var input = "<p>Begin<div>Inner</div>End</p>"; var doc = new Candy.HtmlDocument(); doc.LoadHtml(input); var docNode = doc.DocumentNode; try { // Check tree is correct Assert.AreEqual(1, docNode.ChildNodes.Count); Assert.AreEqual("p", docNode.ChildNodes[0].Name); var pRootNode = docNode.ChildNodes[0]; Assert.AreEqual(3, pRootNode.ChildNodes.Count); Assert.AreEqual("Begin", pRootNode.ChildNodes[0].InnerText); Assert.AreEqual("Inner", pRootNode.ChildNodes[1].InnerText); Assert.AreEqual("End", pRootNode.ChildNodes[2].InnerText); } catch (Exception e) { throw; } finally { HtmlDocument.DisableBehaviorTagP = true; } }
public void checkAttributForTextComment() { var doc = new Candy.HtmlDocument(); doc.LoadHtml(@"<html><body><div id='foo'><span> some</span> text</div></body></html>"); var div = doc.GetElementbyId("foo"); int count = 0; Exception exception = null; foreach (var textNode in div.ChildNodes) { try { textNode.Id = "1"; count++; } catch (Exception e) { exception = e; } } Assert.AreEqual(count, 1); Assert.IsNotNull(exception); }
public void TestEmptyTag_Single() { var html = "<img src=\"x\"/onerror=\"alert('onerror1')\"><img/src=\"x\"/onerror=\"alert('onerror2')\">"; var doc = new Candy.HtmlDocument(); doc.LoadHtml(html); Assert.AreEqual(@"<img src=""x"" onerror=""alert('onerror1')""><img src=""x"" onerror=""alert('onerror2')"">", doc.DocumentNode.OuterHtml); }
public void ScriptingText() { var html = @"<?xml version=""1.0"" encoding=""UTF-8"" ?> <html xmlns=""http://www.w3.org/1999/xhtml""> <head> <title>SEE title</title> <script>SEE script </script> <style>SEE style</style> </head> <body> <script>NOTSEE script</script> <div>222<script>NOTSEE script</script> <style>NOTSEE style</style></div> </body> </html>"; { Candy.HtmlDocument htmlDocument = new Candy.HtmlDocument(); htmlDocument.LoadHtml(html); var content1 = htmlDocument.DocumentNode.SelectSingleNode("//head").InnerText; var content2 = htmlDocument.DocumentNode.SelectSingleNode("//script").InnerText; var content3 = htmlDocument.DocumentNode.SelectSingleNode("//style").InnerText; var content4 = htmlDocument.DocumentNode.SelectSingleNode("//body").InnerText; var content5 = htmlDocument.DocumentNode.SelectSingleNode("//html").InnerText; var content6 = htmlDocument.DocumentNode.SelectSingleNode("//body/script").InnerText; Assert.AreEqual("\r\n SEE title\r\n\tSEE script \r\n\tSEE style\r\n", content1); Assert.AreEqual("SEE script ", content2); Assert.AreEqual("SEE style", content3); Assert.AreEqual("\r\n\r\n222\r\n\r\n", content4); Assert.AreEqual("\r\n\r\n SEE title\r\n\t\r\n\t\r\n\r\n\r\n\r\n222\r\n\r\n\r\n", content5); Assert.AreEqual("NOTSEE script", content6); } { Candy.HtmlDocument htmlDocument = new Candy.HtmlDocument(); htmlDocument.BackwardCompatibility = false; htmlDocument.LoadHtml(html); var content1 = htmlDocument.DocumentNode.SelectSingleNode("//head").InnerText; var content2 = htmlDocument.DocumentNode.SelectSingleNode("//script").InnerText; var content3 = htmlDocument.DocumentNode.SelectSingleNode("//style").InnerText; var content4 = htmlDocument.DocumentNode.SelectSingleNode("//body").InnerText; var content5 = htmlDocument.DocumentNode.SelectSingleNode("//html").InnerText; var content6 = htmlDocument.DocumentNode.SelectSingleNode("//body/script").InnerText; Assert.AreEqual(" SEE titleSEE script SEE style", content1); Assert.AreEqual("SEE script ", content2); Assert.AreEqual("SEE style", content3); Assert.AreEqual("222", content4); Assert.AreEqual(" SEE title222", content5); Assert.AreEqual("NOTSEE script", content6); } }
public void testTEXTAREA() { { var doc = new Candy.HtmlDocument(); doc.LoadHtml(@"<script><div>hello</div></script><TEXTAREA>Text in the <div>hello</div>area</TEXTAREA>"); HtmlNodeCollection divs = doc.DocumentNode.SelectNodes("//div"); Assert.IsNull(divs); HtmlNode ta = doc.DocumentNode.SelectSingleNode("//textarea"); Assert.IsTrue(ta.InnerHtml.Contains("div")); } }
public void OverFlowNotEndTag() { string html = File.ReadAllText(Path.Combine(_contentDirectory, "overflow.html")); HtmlNode node1 = null; // Test 1 Candy.HtmlDocument doc1 = new Candy.HtmlDocument(); doc1.LoadHtml(html); Assert.AreEqual(15, doc1.DocumentNode.ChildNodes[4].ChildNodes.Count); Assert.AreEqual(0, doc1.DocumentNode.OwnerDocument.ParseErrors.Count()); }
public void ChangesToPHandling() { var input = "<p>Begin<div>Inner</div>End</p>"; var doc = new Candy.HtmlDocument(); doc.LoadHtml(input); var docNode = doc.DocumentNode; // Check tree is correct Assert.AreEqual(3, docNode.ChildNodes.Count); Assert.AreEqual("p", docNode.ChildNodes[0].Name); Assert.AreEqual("div", docNode.ChildNodes[1].Name); Assert.AreEqual("#text", docNode.ChildNodes[2].Name); Assert.AreEqual("Begin", docNode.ChildNodes[0].InnerText); Assert.AreEqual("Inner", docNode.ChildNodes[1].InnerText); Assert.AreEqual("End", docNode.ChildNodes[2].InnerText); }
public void SanitizeXmlElementNameWithColon() { var input = @"<RootElement xmlns:MyNamespace=""value""> <value:element /> </RootElement>"; var htmlDoc = new Candy.HtmlDocument(); htmlDoc.LoadHtml(input); htmlDoc.OptionDefaultStreamEncoding = System.Text.Encoding.UTF8; htmlDoc.OptionOutputAsXml = true; htmlDoc.OptionOutputOriginalCase = true; var xmlDoc = htmlDoc.DocumentNode.WriteTo(); var expected = @"<?xml version=""1.0"" encoding=""utf-8""?>" + @"<RootElement xmlns:MyNamespace=""value""> <_value3a_element></_value3a_element> </RootElement>"; Assert.AreEqual(expected, xmlDoc); }
public void TextInsideScriptTagShouldHaveCorrectStreamPosition() { { var document = new HtmlDocument(); document.LoadHtml(@"<scrapt>foo</scrapt>"); var scraptText = document.DocumentNode.FirstChild.FirstChild; Assert.AreEqual(8, scraptText.StreamPosition); Assert.AreEqual(1, scraptText.Line); Assert.AreEqual(8, scraptText.LinePosition); } { var document = new HtmlDocument(); document.LoadHtml(@"<script>foo</script>"); var scriptText = document.DocumentNode.FirstChild.FirstChild; Assert.AreEqual(8, scriptText.StreamPosition); Assert.AreEqual(1, scriptText.Line); Assert.AreEqual(8, scriptText.LinePosition); } { var document = new Candy.HtmlDocument(); document.LoadHtml(@" <scrapt>foo</scrapt>"); var scraptText = document.DocumentNode.LastChild.FirstChild; // var aa = scraptText.FirstChild; Assert.AreEqual(10, scraptText.StreamPosition); Assert.AreEqual(2, scraptText.Line); Assert.AreEqual(8, scraptText.LinePosition); } { var document = new Candy.HtmlDocument(); document.LoadHtml(@" <script>foo</script>"); var scriptText = document.DocumentNode.LastChild.FirstChild; Assert.AreEqual(10, scriptText.StreamPosition); Assert.AreEqual(2, scriptText.Line); Assert.AreEqual(8, scriptText.LinePosition); } }
public void AttributeValue() { { Assert.AreEqual(""'", WebUtility.HtmlEncode("\"'")); } { var input = "<div>z</div>"; var doc = new Candy.HtmlDocument(); doc.BackwardCompatibility = true; doc.LoadHtml(input); var divNode = doc.DocumentNode.ChildNodes[0]; divNode.Attributes.Add("name", "value1value2"); Assert.AreEqual("value1value2", divNode.Attributes[0].Value); Assert.AreEqual("<div name=\"value1value2\">z</div>", doc.DocumentNode.InnerHtml); } { var input = "<div>z</div>"; var doc = new Candy.HtmlDocument(); doc.BackwardCompatibility = true; doc.LoadHtml(input); var divNode = doc.DocumentNode.ChildNodes[0]; divNode.Attributes.Add("name", "value1\"value2"); Assert.AreEqual("value1\"value2", divNode.Attributes[0].Value); Assert.AreEqual("<div name=\"value1"value2\">z</div>", doc.DocumentNode.InnerHtml); } { var input = "<div>z</div>"; var doc = new Candy.HtmlDocument(); doc.BackwardCompatibility = true; doc.LoadHtml(input); var divNode = doc.DocumentNode.ChildNodes[0]; divNode.Attributes.Add("name", "value1"value2"); Assert.AreEqual("value1"value2", divNode.Attributes[0].Value); Assert.AreEqual("<div name=\"value1"value2\">z</div>", doc.DocumentNode.InnerHtml); } { var input = "<div>z</div>"; var doc = new Candy.HtmlDocument(); doc.BackwardCompatibility = true; doc.LoadHtml(input); var divNode = doc.DocumentNode.ChildNodes[0]; divNode.Attributes.Add("name", "value1'value2"); divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote; Assert.AreEqual("value1'value2", divNode.Attributes[0].Value); Assert.AreEqual("<div name='value1'value2'>z</div>", doc.DocumentNode.InnerHtml); } { var input = "<div>z</div>"; var doc = new Candy.HtmlDocument(); doc.BackwardCompatibility = true; doc.LoadHtml(input); var divNode = doc.DocumentNode.ChildNodes[0]; divNode.Attributes.Add("name", "value1'value2"); divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote; Assert.AreEqual("value1'value2", divNode.Attributes[0].Value); Assert.AreEqual("<div name='value1'value2'>z</div>", doc.DocumentNode.InnerHtml); } { var input = "<div>z</div>"; var doc = new Candy.HtmlDocument(); doc.BackwardCompatibility = false; doc.LoadHtml(input); var divNode = doc.DocumentNode.ChildNodes[0]; divNode.Attributes.Add("name", "value1value2"); Assert.AreEqual("value1value2", divNode.Attributes[0].Value); Assert.AreEqual("<div name=\"value1value2\">z</div>", doc.DocumentNode.InnerHtml); } { var input = "<div>z</div>"; var doc = new Candy.HtmlDocument(); doc.BackwardCompatibility = false; doc.LoadHtml(input); var divNode = doc.DocumentNode.ChildNodes[0]; divNode.Attributes.Add("name", "value1\"value2"); Assert.AreEqual("value1\"value2", divNode.Attributes[0].Value); Assert.AreEqual("<div name=\"value1"value2\">z</div>", doc.DocumentNode.InnerHtml); } { var input = "<div>z</div>"; var doc = new Candy.HtmlDocument(); doc.BackwardCompatibility = false; doc.LoadHtml(input); var divNode = doc.DocumentNode.ChildNodes[0]; divNode.Attributes.Add("name", "value1"value2"); Assert.AreEqual("value1"value2", divNode.Attributes[0].Value); Assert.AreEqual("<div name=\"value1"value2\">z</div>", doc.DocumentNode.InnerHtml); } { var input = "<div>z</div>"; var doc = new Candy.HtmlDocument(); doc.BackwardCompatibility = false; doc.LoadHtml(input); var divNode = doc.DocumentNode.ChildNodes[0]; divNode.Attributes.Add("name", "value1'value2"); divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote; Assert.AreEqual("value1'value2", divNode.Attributes[0].Value); Assert.AreEqual("<div name='value1'value2'>z</div>", doc.DocumentNode.InnerHtml); } { var input = "<div>z</div>"; var doc = new Candy.HtmlDocument(); doc.BackwardCompatibility = false; doc.LoadHtml(input); var divNode = doc.DocumentNode.ChildNodes[0]; divNode.Attributes.Add("name", "value1'value2"); divNode.Attributes[0].QuoteType = AttributeValueQuote.SingleQuote; Assert.AreEqual("value1'value2", divNode.Attributes[0].Value); Assert.AreEqual("<div name='value1'value2'>z</div>", doc.DocumentNode.InnerHtml); } }