public void FixRelativeUrl() { var absoluteUrl = UriUtils.CanonicalizeUrl("?aa", "http://www.dianping.com/sh/ss/com"); Assert.Equal("http://www.dianping.com/sh/ss/com?aa", absoluteUrl); absoluteUrl = UriUtils.CanonicalizeUrl("../aa", "http://www.dianping.com/sh/ss/com"); Assert.Equal("http://www.dianping.com/sh/aa", absoluteUrl); absoluteUrl = UriUtils.CanonicalizeUrl("..aa", "http://www.dianping.com/sh/ss/com"); Assert.Equal("http://www.dianping.com/sh/ss/..aa", absoluteUrl); absoluteUrl = UriUtils.CanonicalizeUrl("../../aa", "http://www.dianping.com/sh/ss/com/"); Assert.Equal("http://www.dianping.com/sh/aa", absoluteUrl); absoluteUrl = UriUtils.CanonicalizeUrl("../../aa", "http://www.dianping.com/sh/ss/com"); Assert.Equal("http://www.dianping.com/aa", absoluteUrl); // 只有相对路径需要做补充 var selectable1 = new HtmlSelectable("<div><a href=\"/a/b\">aaaaaaab</a></div>", "http://www.b.com"); var value1 = selectable1.Links().First(); Assert.Equal("http://www.b.com/a/b", value1); // 绝对路径不需要做补充 var selectable2 = new HtmlSelectable("<div><a href=\"http://www.aaaa.com\">aaaaaaab</a></div>", "http://www.b.com", false); var value2 = selectable2.SelectList(Selectors.XPath(".//a")).First().Value; Assert.Equal("aaaaaaab", value2); }
public void DoNotFixAllRelativeHrefs() { var selectable = new HtmlSelectable("<div><a href=\"aaaa.com\">aaaaaaab</a></div>"); var values = selectable.SelectList(Selectors.XPath(".//a")).ToArray(); Assert.Equal("aaaaaaab", values.First().Value); }
public void RemoveOutboundLinks() { // 绝对路径不需要做补充 var selectable2 = new HtmlSelectable("<div><a href=\"http://www.aaaa.com\">aaaaaaab</a></div>", "http://www.b.com"); var value2 = selectable2.SelectList(Selectors.XPath(".//a")); Assert.Null(value2); }
public void Selectable() { var selectable = new HtmlSelectable(html2); var links = selectable.XPath(".//a/@href").Nodes(); Assert.Equal("http://whatever.com/aaa", links.First().Value); var links1 = selectable.XPath(".//a/@href").Value; Assert.Equal("http://whatever.com/aaa", links1); }
public void XPath2() { var selectable = new HtmlSelectable(html2); var links1 = selectable.Links(); var divs = selectable.SelectList(Selectors.XPath("//div")).ToList(); var link2 = divs[0].Links().ToList(); var link3 = divs[1].Links().ToList(); Assert.Equal(2, links1.Count()); Assert.Single(link2); Assert.Single(link3); Assert.Equal("http://whatever.com/aaa", link2[0]); Assert.Equal("http://whatever.com/bbb", link3[0]); }
public void PseudoFirstTest() { var text = @"<ul> <li class=""top""><span class=""date"" style=""display: block;"">x</span><span class=""title""><a target=""_blank"" href=""https://www.aaa.com/html/it/343752.htm"">aaaa</a></span></li> <li class=""new""><span class=""date"" style=""display: block;"">y</span><span class=""title""><a target=""_blank"" href=""https://www.aaa.com/html/digi/346221.htm"">bbbb</a></span></li> <li class=""new""><span class=""date"" style=""display: block;"">z</span><span class=""title""><a target=""_blank"" href=""https://www.aaa.com/html/it/346264.htm"">cccc</a></span></li></ul>"; ISelectable selectable = new HtmlSelectable(text); var result1 = selectable.Select(new CssSelector("ul li a")).Value; Assert.Equal("aaaa", result1); //var result2 = selectable.Select(new CssSelector("ul li a")).GetValue(); //Assert.Equal("cccc", result2); }