Exemplo n.º 1
0
        public void Scrape(string url)
        {
            WebClient client = new WebClient();
            try
            {
                string urlContent = client.DownloadString(url);
                //search for URLs in urlContent
                LinkScraper ls = new LinkScraper();

                ls.Scrape(url);
                _sitesToScrape = ls.Results;
                _sitesToScrape.Add(new Uri(url));

                //store found emails in the results
                foreach (Uri uri in _sitesToScrape)
                {
                    EmailScraper es = new EmailScraper();

                    if (uri.Authority == "www.southhills.edu")
                    {
                        es.Scrape(uri.AbsoluteUri);
                        _sitesScraped.Add(new Uri(uri.AbsoluteUri));
                        //current results (_results) appended to Results- if it works
                        _results.UnionWith(es.Results);
                    }
                }
            }
            catch
            {
                //how to handle this?
            }
        }
        //Public Methods
        public void Scrape(string url)
        {
            WebClient client = new WebClient();

            try
            {
                //Step 1
                string result = client.DownloadString(url);

                //Step 2
                LinkScraper ls = new LinkScraper();
                ls.Scrape(url);
                _sitesToScrape = ls.Results;

                //Step 3
                _sitesToScrape.Add(new Uri(url));

                //Step 4
                foreach (Uri uri in _sitesToScrape)
                {
                    EmailScraper es = new EmailScraper();

                    if (uri.Authority == "www.southhills.edu")
                    {
                        es.Scrape(uri.AbsoluteUri);
                        _sitesScraped.Add(new Uri(uri.AbsoluteUri));
                        _results.UnionWith(es.Results);
                    }
                }
            }
            catch
            {

            }
        }