private void WriteLine(ScraperForm form, string text) { try { form.OutputText(text); } catch (Exception) { Console.WriteLine("[" + DateTime.Now + "] - " + text); } }
private void FindDownloadPages(ScraperForm form, string tag, uint page, string dir) { string page_url = "http://wordpress.org/extend/plugins/tags/" + tag + "/page/" + Convert.ToString(page + 1) + "/"; List<String> nodelist = new List<String>(); ThreadPool.QueueUserWorkItem(new WaitCallback((object pass) => { try { using (WebClient client = new WebClient()) { WriteLine(form, "Downloading plugin pages from " + page_url); client.Headers[HttpRequestHeader.UserAgent] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36"; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(client.DownloadString(page_url)); foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//div[@class='plugin-block']/h3[1]/a[1]")) { nodelist.Add(node.Attributes["href"].Value); WriteLine(form, "Found " + node.Attributes["href"].Value + " in page " + page); } foreach (string link in nodelist) { ScrapeDownloadLocation(form, link, dir); } } } catch (Exception) { MessageBox.Show("An error occured in scraping.", "[Error]"); } })); }
private void ScrapeDownloadLocation(ScraperForm form, string url, string dir) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); using (WebClient client = new WebClient()) { client.Headers[HttpRequestHeader.UserAgent] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36"; doc.LoadHtml(client.DownloadString(url)); foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//p[@class='button']/a[@itemprop='downloadUrl']")) { string link = node.Attributes["href"].Value; DownloadFile(form, link, dir + "/" + (link.Substring(link.LastIndexOf('/'))), client); } } }
private void DownloadFile(ScraperForm form, string location, string dlloc, WebClient wClient) { wClient.DownloadFileCompleted += new AsyncCompletedEventHandler( (object o, AsyncCompletedEventArgs e) => { WriteLine(form, "Successfully downloaded " + location + "!"); }); wClient.DownloadFileAsync(new Uri(location), dlloc); }