Example #1
0
        public void R_UnusualRobotsRule()
        {
            const string txtfile = "User-agent: *\n\nDisallow: /news/0\n";
            Uri          uri     = new Uri("https://www.example.com/news/world-asia-40360168");
            Uri          uri2    = new Uri("https://www.example.com/040360168");
            var          buffer  = System.Text.Encoding.UTF8.GetBytes(txtfile);

            var robots = new RobotsFile(RobotsFetcher.MakeRobotsUri(uri), buffer);

            Console.WriteLine(robots.ToString());

            Assert.IsTrue(!robots.IsDisallowed(uri));
            Assert.IsTrue(!robots.IsDisallowed(uri2));
        }
Example #2
0
        public void GithubTest()
        {
            var path = Path.Combine(testdataPath, "www.github.com-robots.txt");

            using (var robots = new RobotsFile(new Uri("https://github.com/robots.txt"), File.Open(path, FileMode.Open)))
            {
                // test different useragents
                Assert.IsTrue(robots.IsDisallowed(new Uri("https://github.com/nullabork/fetcho/blob/master/README.md"), userAgent));
                Assert.IsTrue(robots.IsNotDisallowed(new Uri("https://github.com/nullabork/fetcho/blob/master/README.md"), "Googlebot"));
            }
        }
Example #3
0
        public void SpeedTest()
        {
            // testing that a million URIs can be tested in < 14 seconds (ignoring setup)
            var path = Path.Combine(testdataPath, "en.wikipedia.org-robots.txt");

            using (var robots = new RobotsFile(new Uri("https://en.wikipedia.org/robots.txt"), File.Open(path, FileMode.Open)))
            {
                DateTime startTime = DateTime.Now;
                for (int i = 0; i < 1000000; i++)
                {
                    Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Main_Page")));
                    Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Event_Horizon_Telescope")));
                    Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Talk:Event_Horizon_Telescope")));
                    Assert.IsTrue(robots.IsDisallowed(new Uri("https://en.wikipedia.org/w/index.php?title=Talk:Event_Horizon_Telescope&action=edit")));
                    Assert.IsTrue(robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Special:Random")));
                }
                var time = DateTime.Now - startTime;
                Assert.IsTrue(time.TotalSeconds < 14, time.ToString());
            }
        }
Example #4
0
        public void DisallowedTest()
        {
            var txt    = "user-agent: *\n\ndisallow: /data/*\ndisallow: /daylight/$\ndisallow: /jerk\ndisallow: /h*ray.html$";
            var buffer = System.Text.Encoding.UTF8.GetBytes(txt);

            var robots = new RobotsFile(new Uri("https://www.example.com/robots.txt"), buffer);

            Assert.IsTrue(robots.IsDisallowed(new Uri("http://rofflo.org/jerk")));
            Assert.IsTrue(robots.IsDisallowed(new Uri("http://rofflo.org/data/hooray.html")));
            Assert.IsTrue(robots.IsDisallowed(new Uri("http://rofflo.org/hooray.html")));
            Assert.IsTrue(!robots.IsDisallowed(new Uri("http://rofflo.org/daylight/loafo.html")));
            Assert.IsTrue(robots.IsDisallowed(new Uri("http://rofflo.org/daylight/")));
            Assert.IsTrue(!robots.IsDisallowed(new Uri("http://rofflo.org/index.html")));
            Assert.IsTrue(!robots.IsDisallowed(new Uri("http://rofflo.org/")));
        }
Example #5
0
        public void WikipediaTest()
        {
            var path = Path.Combine(testdataPath, "en.wikipedia.org-robots.txt");

            using (var robots = new RobotsFile(new Uri("https://en.wikipedia.org/robots.txt"), File.Open(path, FileMode.Open)))
            {
                Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Main_Page")));
                Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Event_Horizon_Telescope")));
                Assert.IsTrue(!robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Talk:Event_Horizon_Telescope")));
                Assert.IsTrue(robots.IsDisallowed(new Uri("https://en.wikipedia.org/w/index.php?title=Talk:Event_Horizon_Telescope&action=edit")));
                Assert.IsTrue(robots.IsDisallowed(new Uri("https://en.wikipedia.org/wiki/Special:Random")));
                Assert.IsTrue(robots.IsDisallowed(new Uri("https://en.wikipedia.org/w/index.php?title=Ahmet_Davutoglu&action=edit&section=34")));
            }
        }