public void IsUrlAllowed_WildCardAgentWithWhiteSpaceDisallow_ReturnsTrue() { string userAgentString = _userAgentString; _unitUnderTest = new RobotsDotText(_rootUri, @"User-agent: * Disallow: "); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "aa.html", userAgentString)); }
public void IsUrlAllowed_QuerystringMatch_NotSupported_ReturnsFalse() { string userAgentString = _userAgentString; _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: * Disallow: /?category=whatever Disallow: /?category=another&color=red"); Assert.IsFalse(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=whatever", userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=another&blah=blah", userAgentString)); }
public void IsUrlAllowed_QuerystringMatch_NotSupported_ReturnsTrue() { //IF this test starts failing that is a good thing, it means the robots impl now supports querystrings string userAgentString = _userAgentString; _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: * Disallow: /?category=whatever Disallow: /?category=another&color=red"); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=whatever", userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "?category=another&blah=blah", userAgentString)); }
[Test, Ignore]//This is a bug and needs to be fixed public void IsUrlAllowed_QuerystringOnRoot2_ReturnsTrue() { string userAgentString = _userAgentString; _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: * Disallow: /?/ Disallow: /category/"); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString)); }
public void IsUrlAllowed_QuerystringOnRoot_ReturnsTrue() { string userAgentString = _userAgentString; _unitUnderTest = new RobotsDotText(_rootUri, @"User-Agent: * Disallow: /?category=whatever Disallow: /?category=another&color=red"); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString)); }
public void IsUrlAllowed_AllowedPages_ReturnsTrue() { string userAgentString = _userAgentString; Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgentString)); //User agent "userAgentCrawlDelayIs1" doesn't specify anything to disallow so should allow all ("*" is not inherited) userAgentString = "userAgentCrawlDelayIs1"; Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfile.txt", userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder", userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/", userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder", userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder/", userAgentString)); //Allows all since "userAgentCrawlDelayIs1" does not specify allow or disallow Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgentString)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgentString)); }
public void IsUrlAllowed_EmptyRobotsContent_ReturnsTrue() { _unitUnderTest = new RobotsDotText(_rootUri, ""); //Should use "*" user agent by default string userAgent = _userAgentString; Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgent)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgent)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgent)); //User agent "userAgentCrawlDelayIs1" doesn't specify anything to disallow so should allow all ("*" is not inherited) userAgent = "userAgentCrawlDelayIs1"; Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfile.txt", userAgent)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder", userAgent)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/", userAgent)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder", userAgent)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "disallowedfolder/subfolder/", userAgent)); //Allows all since "userAgentCrawlDelayIs1" does not specify allow or disallow userAgent = "userAgentCrawlDelayIs1"; Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri, userAgent)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/aa.html", userAgent)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfolder/bb.html", userAgent)); Assert.IsTrue(_unitUnderTest.IsUrlAllowed(_rootUri.AbsoluteUri + "allowedfile2", userAgent)); }