Example #1
0
 public void Constructor_NullContent()
 {
     string nullContent = null;
     _unitUnderTest = new RobotsDotText(_rootUri, nullContent);
 }
Example #2
0
 public void SetUp()
 {
     _unitUnderTest = new RobotsDotText(_rootUri, _robotsContent);
 }
Example #3
0
 public void Constructor_NullRootUri()
 {
     _unitUnderTest = new RobotsDotText(null, _robotsContent);
 }
Example #4
0
        public void IsUrlAllowed_QuerystringMatch_NotSupported_ReturnsTrue()
        {
            //IF this test starts failing that is a good thing, it means the robots impl now supports querystrings
            string userAgentString = _userAgentString;
            _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: *
Disallow: /?category=whatever
Disallow: /?category=another&color=red");

            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=whatever", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=another&blah=blah", userAgentString));
        }
Example #5
0
        public void IsUrlAllowed_WildCardAgentWithWhiteSpaceDisallow_ReturnsTrue()
        {
            string userAgentString = _userAgentString;
            _unitUnderTest = new RobotsDotText(_rootUri, @"User-agent: *
Disallow: ");
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "aa.html", userAgentString));
        }
Example #6
0
        [Test, Ignore]//This is a bug and needs to be fixed
        public void IsUrlAllowed_QuerystringOnRoot2_ReturnsTrue()
        {
            string userAgentString = _userAgentString;
            _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: *
Disallow: /?/
Disallow: /category/");

            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
        }
Example #7
0
        public void IsUrlAllowed_QuerystringOnRoot_ReturnsTrue()
        {
            string userAgentString = _userAgentString;
            _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: *
Disallow: /?category=whatever
Disallow: /?category=another&color=red");

            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
        }
Example #8
0
        public void IsUserAgentAllowed_WildCardUserAgent_ReturnsFalse()
        {
            string content = @"
User-Agent: *
Disallow: /";
            _unitUnderTest = new RobotsDotText(_rootUri, content);

            Assert.IsFalse(_unitUnderTest.IsUserAgentAllowed("aaaaaaaaaaaa"));
        }
Example #9
0
        public void IsUrlAllowed_EmptyRobotsContent_ReturnsTrue()
        {
            _unitUnderTest = new RobotsDotText(_rootUri, "");

            //Should use "*" user agent by default
            string userAgent = _userAgentString;
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgent));

            //User agent "userAgentCrawlDelayIs1" doesn't specify anything to disallow so should allow all ("*" is not inherited)
            userAgent = "userAgentCrawlDelayIs1";
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfile.txt", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder/", userAgent));

            //Allows all since "userAgentCrawlDelayIs1" does not specify allow or disallow
            userAgent = "userAgentCrawlDelayIs1";
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgent));
        }
Example #10
0
 public void SetUp()
 {
     _unitUnderTest = new RobotsDotText(_rootUri, _robotsContent);
     _realPage = new PageRequester(new CrawlConfiguration{ UserAgentString = "aaa" }).MakeRequest(new Uri("http://localhost:1111/"));
 }