public void R_UnusualRobotsRule() { const string txtfile = "User-agent: *\n\nDisallow: /news/0\n"; Uri uri = new Uri("https://www.example.com/news/world-asia-40360168"); Uri uri2 = new Uri("https://www.example.com/040360168"); var buffer = System.Text.Encoding.UTF8.GetBytes(txtfile); var robots = new RobotsFile(RobotsFetcher.MakeRobotsUri(uri), buffer); Console.WriteLine(robots.ToString()); Assert.IsTrue(!robots.IsDisallowed(uri)); Assert.IsTrue(!robots.IsDisallowed(uri2)); }
public void GithubTest() { var path = Path.Combine(testdataPath, "www.github.com-robots.txt"); using (var robots = new RobotsFile(new Uri("https://github.com/robots.txt"), File.Open(path, FileMode.Open))) { // test different useragents Assert.IsTrue(robots.IsDisallowed(new Uri("https://github.com/nullabork/fetcho/blob/master/README.md"), userAgent)); Assert.IsTrue(robots.IsNotDisallowed(new Uri("https://github.com/nullabork/fetcho/blob/master/README.md"), "Googlebot")); } }
public void SpeedTest() { // testing that a million URIs can be tested in < 14 seconds (ignoring setup) var path = Path.Combine(testdataPath, "en.wikipedia.org-robots.txt"); using (var robots = new RobotsFile(new Uri("https://en.wikipedia.org/robots.txt"), File.Open(path, FileMode.Open))) { DateTime startTime = DateTime.Now; for (int i = 0; i < 1000000; i++) { Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Main_Page"))); Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Event_Horizon_Telescope"))); Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Talk:Event_Horizon_Telescope"))); Assert.IsTrue(robots.IsDisallowed(new Uri("https://en.wikipedia.org/w/index.php?title=Talk:Event_Horizon_Telescope&action=edit"))); Assert.IsTrue(robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Special:Random"))); } var time = DateTime.Now - startTime; Assert.IsTrue(time.TotalSeconds < 14, time.ToString()); } }
public void DisallowedTest() { var txt = "user-agent: *\n\ndisallow: /data/*\ndisallow: /daylight/$\ndisallow: /jerk\ndisallow: /h*ray.html$"; var buffer = System.Text.Encoding.UTF8.GetBytes(txt); var robots = new RobotsFile(new Uri("https://www.example.com/robots.txt"), buffer); Assert.IsTrue(robots.IsDisallowed(new Uri("http://rofflo.org/jerk"))); Assert.IsTrue(robots.IsDisallowed(new Uri("http://rofflo.org/data/hooray.html"))); Assert.IsTrue(robots.IsDisallowed(new Uri("http://rofflo.org/hooray.html"))); Assert.IsTrue(!robots.IsDisallowed(new Uri("http://rofflo.org/daylight/loafo.html"))); Assert.IsTrue(robots.IsDisallowed(new Uri("http://rofflo.org/daylight/"))); Assert.IsTrue(!robots.IsDisallowed(new Uri("http://rofflo.org/index.html"))); Assert.IsTrue(!robots.IsDisallowed(new Uri("http://rofflo.org/"))); }
public void WikipediaTest() { var path = Path.Combine(testdataPath, "en.wikipedia.org-robots.txt"); using (var robots = new RobotsFile(new Uri("https://en.wikipedia.org/robots.txt"), File.Open(path, FileMode.Open))) { Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Main_Page"))); Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Event_Horizon_Telescope"))); Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Talk:Event_Horizon_Telescope"))); Assert.IsTrue(robots.IsDisallowed(new Uri("https://en.wikipedia.org/w/index.php?title=Talk:Event_Horizon_Telescope&action=edit"))); Assert.IsTrue(robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Special:Random"))); Assert.IsTrue(robots.IsDisallowed(new Uri("https://en.wikipedia.org/w/index.php?title=Ahmet_Davutoglu&action=edit§ion=34"))); } }