Exemple #1
0
        public async Task Holds_Until_Oldest_Passed_Window_If_Max_Reached(int windowSeconds, int max, int millStep)
        {
            var startDate   = new DateTime(2020, 01, 01);
            var window      = TimeSpan.FromSeconds(windowSeconds);
            var nowProvider = new TestNowProvider(startDate);
            var limiter     = new RollingWindowRateLimiter(window, max, nowProvider);

            for (var i = 0; i < max; i++)
            {
                nowProvider.Update(startDate.AddMilliseconds(i * millStep));
                await limiter.HoldIfRequired(new Uri("http://domain.com/" + i));
            }
        }
Exemple #2
0
        public async Task Holds_For_All_URIs_With_Same_Domain()
        {
            var window      = TimeSpan.FromMilliseconds(1000);
            var nowProvider = new NowProvider(new DateTime(2020, 01, 01));
            var limiter     = new RollingWindowRateLimiter(window, 1, nowProvider);

            var firstURI  = new Uri("http://domain.com/something");
            var secondURI = new Uri("http://domain.com/something-else");

            var stopwatch = new Stopwatch();
            await limiter.HoldIfRequired(firstURI);

            stopwatch.Start();
            await limiter.HoldIfRequired(secondURI);

            Assert.IsTrue(stopwatch.ElapsedMilliseconds >= 1000);
        }
Exemple #3
0
        public async Task Does_Not_Hold_If_Domain_New()
        {
            var window      = TimeSpan.MaxValue;
            var nowProvider = new NowProvider(new DateTime(2020, 01, 01));
            var limiter     = new RollingWindowRateLimiter(window, 1, nowProvider);

            var uri = new Uri("http://domain.com/something");

            var waitTask  = limiter.HoldIfRequired(uri);
            var stopwatch = new Stopwatch();

            stopwatch.Start();

            while (!waitTask.IsCompleted)
            {
                if (stopwatch.ElapsedMilliseconds > 1000)
                {
                    Assert.Fail("Took too long");
                }
            }

            await waitTask;
        }
Exemple #4
0
        public async Task Test()
        {
            var limiter      = new RollingWindowRateLimiter(10000, TimeSpan.FromMinutes(1));
            var proxyService = new DefaultProxyService();
            var agent        = new WebAgent(limiter, proxyService);

            var job = new CrawlJob()
            {
                Domain = new Uri("https://reddit.com/"),
                CompletionConditions = new List <ICrawlCompletionCondition>
                {
                    new MaxPagesCrawledCondition(100),
                    new MaxTimeCondition(TimeSpan.FromMinutes(3)),
                    new MaxResultsFoundCondition(2000)
                },
                ThreadAllowance = 10,
                Cookies         = new List <Cookie> {
                    new Cookie("over18", "1", "/", "reddit.com")
                },
                Regex = "<img.+?src=\"(?<image>.+?)\""
            };

            using (var crawler = new Crawler(agent))
            {
                var results = await crawler.Crawl(job);

                Console.WriteLine(results.CrawlCount);
                Console.WriteLine(results.QueueSize);
                Console.WriteLine(results.ResultsCount);

                foreach (var item in results.Data)
                {
                    Console.WriteLine(item.Item2);
                }
            }
        }