Exemplo n.º 1
0
        public static async Task <Dictionary <string, string[]> > Scrape(string input)
        {
            var urlStandardiser = new UrlStandardiser(input);
            var linkExtractor   = new SameDomainLinkExtractor(urlStandardiser);

            var scrape = new Dictionary <string, string[]>();

            var url = urlStandardiser.Standardise(input);

            var html = await HtmlDownloader.GetHtml(url);

            var links = linkExtractor.Extract(html.Text);

            scrape[url] = links;

            foreach (var link in links)
            {
                if (scrape.ContainsKey(link))
                {
                    Console.WriteLine("URL already scraped: " + link);
                }
                else
                {
                    var html2 = await HtmlDownloader.GetHtml(link);

                    if (html2 == null)
                    {
                        Console.WriteLine("Not valid HTML: " + link);
                        scrape[link] = Array.Empty <string>();
                    }
                    else
                    {
                        var links2 = linkExtractor.Extract(html2.Text);
                        scrape[link] = links2;
                        Console.WriteLine("scrape[" + link + "]: " + string.Join(',', links2));
                    }
                }
            }

            return(scrape);
        }
 public SameDomainLinkExtractor(UrlStandardiser urlStandardiser)
 {
     UrlStandardiser = urlStandardiser;
 }