private static int Run(Options opts) { var cancellationTokenSource = new CancellationTokenSource(); var observer = new CrawlerObserver( cancellationTokenSource, opts.MaxErrors, opts.MaxUrls); var crawler = CreateCrawler(opts, observer); crawler.Enqueue(opts.StartUrls.Select(x => new Uri(x))); var parseRobots = opts.NoRobots == false; if (parseRobots) { var rootUrls = opts.StartUrls.Select(x => new Uri(x).GetLeftPart(UriPartial.Authority)); var robotsTxtUrls = rootUrls.Select(x => new Uri(new Uri(x), "/robots.txt")).Distinct(); crawler.Enqueue(robotsTxtUrls); } Console.CancelKeyPress += (sender, eventArgs) => { eventArgs.Cancel = true; cancellationTokenSource.Cancel(); }; var result = crawler.Crawl(cancellationTokenSource.Token).Result; WriteSummary(result, observer, opts.FullSummary); var crawledUrlsCount = observer.CrawledUrls.Count; if (crawledUrlsCount < opts.MinUrls) { Console.WriteLine($"\nExpected at least {opts.MinUrls} urls but crawled only {crawledUrlsCount}."); return(2); } return(observer.Errors.Count > 0 ? 1 : 0); }
private static void WriteSummary(IReadOnlyDictionary <Uri, CrawledUrlProperties> result, CrawlerObserver observer, bool fullSummary) { Console.WriteLine($"\nDiscovered urls: {result.Count}\nCrawled urls: {observer.CrawledUrls.Count}\nCrawl warnings: {observer.Warnings.Count}\nCrawl errors: {observer.Errors.Count}"); if (observer.Warnings.Count > 0) { Console.WriteLine("\nCrawl warnings:\n"); foreach (var error in observer.Warnings) { Console.WriteLine($"{error.Status}: {error.Url}"); WriteReferrers(result, error); } } if (observer.Errors.Count > 0) { Console.WriteLine("\nCrawl errors:\n"); foreach (var error in observer.Errors) { Console.WriteLine($"{error.Exception?.FlattenInnerMessages() ?? error.Status.ToString()}: {error.Url}"); WriteReferrers(result, error); } } if (fullSummary) { Console.WriteLine("###########################################################################################33"); foreach (var uri in result.Where(x => x.Value.Status != null).Select(x => x.Key).OrderBy(x => x.ToString())) { var entry = result[uri]; Console.WriteLine($"[{entry.Status}] {uri}: \nReferrers:\n {string.Join("\n ", entry.Referrers)}\n"); } } }