Ejemplo n.º 1
0
        public void IsUrlAllowed_WildCardAgentWithWhiteSpaceDisallow_ReturnsTrue()
        {
            string userAgentString = _userAgentString;

            _unitUnderTest = new RobotsDotText(_rootUri, @"User-agent: *
Disallow: ");
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "aa.html", userAgentString));
        }
Ejemplo n.º 2
0
        public void IsUrlAllowed_QuerystringMatch_NotSupported_ReturnsFalse()
        {
            string userAgentString = _userAgentString;

            _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: *
Disallow: /?category=whatever
Disallow: /?category=another&color=red");

            Assert.IsFalse(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=whatever", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=another&blah=blah", userAgentString));
        }
Ejemplo n.º 3
0
        public void IsUrlAllowed_QuerystringMatch_NotSupported_ReturnsTrue()
        {
            //IF this test starts failing that is a good thing, it means the robots impl now supports querystrings
            string userAgentString = _userAgentString;

            _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: *
Disallow: /?category=whatever
Disallow: /?category=another&color=red");

            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=whatever", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=another&blah=blah", userAgentString));
        }
Ejemplo n.º 4
0
        [Test, Ignore]//This is a bug and needs to be fixed
        public void IsUrlAllowed_QuerystringOnRoot2_ReturnsTrue()
        {
            string userAgentString = _userAgentString;

            _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: *
Disallow: /?/
Disallow: /category/");

            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
        }
Ejemplo n.º 5
0
        public void IsUrlAllowed_QuerystringOnRoot_ReturnsTrue()
        {
            string userAgentString = _userAgentString;

            _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: *
Disallow: /?category=whatever
Disallow: /?category=another&color=red");

            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
        }
Ejemplo n.º 6
0
        public void IsUrlAllowed_AllowedPages_ReturnsTrue()
        {
            string userAgentString = _userAgentString;

            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgentString));

            //User agent "userAgentCrawlDelayIs1" doesn't specify anything to disallow so should allow all ("*" is not inherited)
            userAgentString = "userAgentCrawlDelayIs1";
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfile.txt", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder/", userAgentString));

            //Allows all since "userAgentCrawlDelayIs1" does not specify allow or disallow
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgentString));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgentString));
        }
Ejemplo n.º 7
0
        public void IsUrlAllowed_EmptyRobotsContent_ReturnsTrue()
        {
            _unitUnderTest = new RobotsDotText(_rootUri, "");

            //Should use "*" user agent by default
            string userAgent = _userAgentString;

            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgent));

            //User agent "userAgentCrawlDelayIs1" doesn't specify anything to disallow so should allow all ("*" is not inherited)
            userAgent = "userAgentCrawlDelayIs1";
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfile.txt", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder/", userAgent));

            //Allows all since "userAgentCrawlDelayIs1" does not specify allow or disallow
            userAgent = "userAgentCrawlDelayIs1";
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgent));
            Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgent));
        }