Exemple #1
0
        public void IsUrlAllowed_WildCardAgentWithWhiteSpaceDisallow_ReturnsTrue()
        {
            string userAgentString = _userAgentString;
            _unitUnderTest = new RobotsDotText(_rootUri, @"User-agent: *
Disallow: ");
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "aa.html", userAgentString));
        }
Exemple #2
0
        public void IsUrlAllowed_QuerystringMatch_NotSupported_ReturnsTrue()
        {
            //IF this test starts failing that is a good thing, it means the robots impl now supports querystrings
            string userAgentString = _userAgentString;
            _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: *
Disallow: /?category=whatever
Disallow: /?category=another&color=red");

            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=whatever", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=another&blah=blah", userAgentString));
        }
Exemple #3
0
        public void IsUrlAllowed_QuerystringOnRoot_ReturnsTrue()
        {
            string userAgentString = _userAgentString;
            _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: *
Disallow: /?category=whatever
Disallow: /?category=another&color=red");

            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
        }
Exemple #4
0
        [Test, Ignore]//This is a bug and needs to be fixed
        public void IsUrlAllowed_QuerystringOnRoot2_ReturnsTrue()
        {
            string userAgentString = _userAgentString;
            _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: *
Disallow: /?/
Disallow: /category/");

            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
        }
Exemple #5
0
        public void IsUrlAllowed_EmptyRobotsContent_ReturnsTrue()
        {
            _unitUnderTest = new RobotsDotText(_rootUri, "");

            //Should use "*" user agent by default
            string userAgent = _userAgentString;
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgent));

            //User agent "userAgentCrawlDelayIs1" doesn't specify anything to disallow so should allow all ("*" is not inherited)
            userAgent = "userAgentCrawlDelayIs1";
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfile.txt", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder/", userAgent));

            //Allows all since "userAgentCrawlDelayIs1" does not specify allow or disallow
            userAgent = "userAgentCrawlDelayIs1";
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgent));
        }