Esempio n. 1
0
        private static int Run(Options opts)
        {
            var cancellationTokenSource = new CancellationTokenSource();
            var observer = new CrawlerObserver(
                cancellationTokenSource,
                opts.MaxErrors,
                opts.MaxUrls);

            var crawler = CreateCrawler(opts, observer);

            crawler.Enqueue(opts.StartUrls.Select(x => new Uri(x)));

            var parseRobots = opts.NoRobots == false;

            if (parseRobots)
            {
                var rootUrls      = opts.StartUrls.Select(x => new Uri(x).GetLeftPart(UriPartial.Authority));
                var robotsTxtUrls = rootUrls.Select(x => new Uri(new Uri(x), "/robots.txt")).Distinct();
                crawler.Enqueue(robotsTxtUrls);
            }

            Console.CancelKeyPress += (sender, eventArgs) =>
            {
                eventArgs.Cancel = true;
                cancellationTokenSource.Cancel();
            };

            var result = crawler.Crawl(cancellationTokenSource.Token).Result;

            WriteSummary(result, observer, opts.FullSummary);

            var crawledUrlsCount = observer.CrawledUrls.Count;

            if (crawledUrlsCount < opts.MinUrls)
            {
                Console.WriteLine($"\nExpected at least {opts.MinUrls} urls but crawled only {crawledUrlsCount}.");
                return(2);
            }

            return(observer.Errors.Count > 0 ? 1 : 0);
        }
Esempio n. 2
0
        private static void WriteSummary(IReadOnlyDictionary <Uri, CrawledUrlProperties> result, CrawlerObserver observer, bool fullSummary)
        {
            Console.WriteLine($"\nDiscovered urls: {result.Count}\nCrawled urls: {observer.CrawledUrls.Count}\nCrawl warnings: {observer.Warnings.Count}\nCrawl errors: {observer.Errors.Count}");

            if (observer.Warnings.Count > 0)
            {
                Console.WriteLine("\nCrawl warnings:\n");
                foreach (var error in observer.Warnings)
                {
                    Console.WriteLine($"{error.Status}: {error.Url}");
                    WriteReferrers(result, error);
                }
            }

            if (observer.Errors.Count > 0)
            {
                Console.WriteLine("\nCrawl errors:\n");
                foreach (var error in observer.Errors)
                {
                    Console.WriteLine($"{error.Exception?.FlattenInnerMessages() ?? error.Status.ToString()}: {error.Url}");
                    WriteReferrers(result, error);
                }
            }

            if (fullSummary)
            {
                Console.WriteLine("###########################################################################################33");
                foreach (var uri in result.Where(x => x.Value.Status != null).Select(x => x.Key).OrderBy(x => x.ToString()))
                {
                    var entry = result[uri];
                    Console.WriteLine($"[{entry.Status}] {uri}: \nReferrers:\n  {string.Join("\n  ", entry.Referrers)}\n");
                }
            }
        }