Пример #1
0
 public CrawlRunner(Uri root, HttpClient client, HtmlParser parser, UrlSanitiser sanitiser)
 {
     _sanitiser = sanitiser;
     _client    = client;
     _parser    = parser;
     AddUrlToCrawl(root);
     client.DefaultRequestHeaders.Add("user-agent", "Coding Sample Crawler");
 }
Пример #2
0
        static void Main(string[] args)
        {
            //Obvious future enhancement, get domain via args
            var domain = new Uri("https://hirespace.com/");

            var client = new HttpClient();
            var parser = new HtmlParser();

            //Whether the sanitiser should be owned by the crawler or not for a larger solution is questionable,
            //you may wish differing sanitiser behaviours regarding eg casing or trailing slashes
            //In a larger project where this is the case you would ideally have unit tests for sanitiser separate and only test the crawler called it
            var sanitiser = new UrlSanitiser(domain);

            var runner = new CrawlRunner(domain, client, parser, sanitiser);

            var runnerTask = Task.Run(() => runner.Crawl());

            runnerTask.Wait();

            new CrawlOutputer(runnerTask.Result).CreateReport();

            Console.WriteLine("Crawl complete, press enter to close");
            Console.ReadLine();
        }