public void Setup() { _crawledPage = new PageRequester(new CrawlConfiguration()).MakeRequest(new Uri("http://localhost.fiddler:1111/")); //Make the real request above look like it came from the fake uri _crawledPage.ParentUri = _uri; _crawledPage.HttpWebRequest = (HttpWebRequest)WebRequest.Create(_uri); _unitUnderTest = GetInstance(false, false, null, false, false); }
public void GetLinks_CleanUrlDelegateSet_ReturnsCleanLinks() { _unitUnderTest = GetInstance(false, false, (x) => x.Replace("a", "x").Replace("b", "y")); _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>"; IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage); Assert.IsNotNull(result); Assert.AreEqual(2, result.Count()); Assert.AreEqual("http://a.com/xxx/x.html", result.ElementAt(0).AbsoluteUri); Assert.AreEqual("http://a.com/yyy/y.html", result.ElementAt(1).AbsoluteUri); }
public void Setup() { _crawledPage = new CrawledPage(_uri){ HttpWebRequest = (HttpWebRequest)WebRequest.Create(_uri) }; _unitUnderTest = GetInstance(false, false); }
public void GetLinks_RelNoFollow_NotReturned() { _unitUnderTest = GetInstance(false, true); _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" rel=\"nofollow\"></a><a href=\"/bbb/b.html\" rel=\"nofollow\" /></a>"; IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage); Assert.IsNotNull(result); Assert.AreEqual(0, result.Count()); }
public void GetLinks_MetaNoIndex_ReturnsLinks() { _unitUnderTest = GetInstance(true, false); _crawledPage.Content.Text = "<meta name=\"robots\" content=\"noindex\" /><a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>"; IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage); Assert.IsNotNull(result); Assert.AreEqual(2, result.Count()); }
public void GetLinks_MetaNoIndexNoFollowUpperCase_ReturnsEmptyList() { _unitUnderTest = GetInstance(true, false); _crawledPage.Content.Text = "<META NAME=\"ROBOTS\" CONTENT=\"NOINDEX, NOFOLLOW\" /><a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>"; IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage); Assert.IsNotNull(result); Assert.AreEqual(0, result.Count()); }
public void GetLinks_NamedAnchorsOrHashbangs_Enabled_ReturnsLinks() { _unitUnderTest = GetInstance(false, false, null, true); _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" ></a><a href=\"/aaa/a.html#top\" ></a><a href=\"/aaa/a.html#bottom\" /></a><a href=\"/aaa/a.html/#someaction/someid\" /></a>"; IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage); Assert.AreEqual(4, result.Count()); Assert.AreEqual("http://a.com/aaa/a.html", result.ElementAt(0).AbsoluteUri); Assert.AreEqual("http://a.com/aaa/a.html#top", result.ElementAt(1).AbsoluteUri); Assert.AreEqual("http://a.com/aaa/a.html#bottom", result.ElementAt(2).AbsoluteUri); Assert.AreEqual("http://a.com/aaa/a.html/#someaction/someid", result.ElementAt(3).AbsoluteUri); }
public void GetLinks_HttpXRobotsTagHeaderNoIndex_ReturnsLinks() { _crawledPage.HttpWebResponse.Headers.Add(new NameValueCollection() { { "X-Robots-Tag", "noindex" } }); _unitUnderTest = GetInstance(false, false, null, false, true); _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>"; IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage); Assert.IsNotNull(result); Assert.AreEqual(2, result.Count()); }