예제 #1
0
        public void Setup()
        {
            _crawledPage = new PageRequester(new CrawlConfiguration()).MakeRequest(new Uri("http://localhost.fiddler:1111/"));

            //Make the real request above look like it came from the fake uri
            _crawledPage.ParentUri = _uri;
            _crawledPage.HttpWebRequest = (HttpWebRequest)WebRequest.Create(_uri);
            
            _unitUnderTest = GetInstance(false, false, null, false, false);
        }
예제 #2
0
        public void GetLinks_CleanUrlDelegateSet_ReturnsCleanLinks()
        {
            _unitUnderTest = GetInstance(false, false, (x) => x.Replace("a", "x").Replace("b", "y"));
            _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>";

            IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(2, result.Count());
            Assert.AreEqual("http://a.com/xxx/x.html", result.ElementAt(0).AbsoluteUri);
            Assert.AreEqual("http://a.com/yyy/y.html", result.ElementAt(1).AbsoluteUri);
        }
예제 #3
0
 public void Setup()
 {
     _crawledPage = new CrawledPage(_uri){ HttpWebRequest = (HttpWebRequest)WebRequest.Create(_uri) };
     _unitUnderTest = GetInstance(false, false);
 }
예제 #4
0
        public void GetLinks_RelNoFollow_NotReturned()
        {
            _unitUnderTest = GetInstance(false, true);
            _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" rel=\"nofollow\"></a><a href=\"/bbb/b.html\" rel=\"nofollow\" /></a>";

            IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(0, result.Count());
        }
예제 #5
0
        public void GetLinks_MetaNoIndex_ReturnsLinks()
        {
            _unitUnderTest = GetInstance(true, false);
            _crawledPage.Content.Text = "<meta name=\"robots\" content=\"noindex\" /><a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>";

            IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(2, result.Count());
        }
예제 #6
0
        public void GetLinks_MetaNoIndexNoFollowUpperCase_ReturnsEmptyList()
        {
            _unitUnderTest = GetInstance(true, false);
            _crawledPage.Content.Text = "<META NAME=\"ROBOTS\" CONTENT=\"NOINDEX, NOFOLLOW\" /><a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>";

            IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(0, result.Count());
        }
예제 #7
0
        public void GetLinks_NamedAnchorsOrHashbangs_Enabled_ReturnsLinks()
        {
            _unitUnderTest = GetInstance(false, false, null, true);
            _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" ></a><a href=\"/aaa/a.html#top\" ></a><a href=\"/aaa/a.html#bottom\" /></a><a href=\"/aaa/a.html/#someaction/someid\" /></a>";

            IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.AreEqual(4, result.Count());
            Assert.AreEqual("http://a.com/aaa/a.html", result.ElementAt(0).AbsoluteUri);
            Assert.AreEqual("http://a.com/aaa/a.html#top", result.ElementAt(1).AbsoluteUri);
            Assert.AreEqual("http://a.com/aaa/a.html#bottom", result.ElementAt(2).AbsoluteUri);
            Assert.AreEqual("http://a.com/aaa/a.html/#someaction/someid", result.ElementAt(3).AbsoluteUri);
        }
예제 #8
0
        public void GetLinks_HttpXRobotsTagHeaderNoIndex_ReturnsLinks()
        {
            _crawledPage.HttpWebResponse.Headers.Add(new NameValueCollection() { { "X-Robots-Tag", "noindex" } });
            _unitUnderTest = GetInstance(false, false, null, false, true);
            _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>";

            IEnumerable<Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(2, result.Count());
        }