public void Download(url_info url) { try { WebClient webClient = new WebClient(); webClient.Encoding = Encoding.UTF8; string html = webClient.DownloadString(url.url); string fileName = count.ToString(); File.WriteAllText(fileName, html, Encoding.UTF8); url.html = html; crawlerstopped(this, url); Parsse(html, url.url);//解析,并加入新的链接 } catch (Exception) { } }
public event crawlevent crawer; //生成事件,下载完成后的url返回给窗口 public void Crawl() { url_info url_Info = new url_info() { url = urlstart, processing = false, html = "" }; urls1.Add(url_Info); string str = @"(www\.){0,1}.*?\..*?/"; Regex r = new Regex(str); Match m = r.Match(urlstart); urlstart = m.Value; while (true) { url_info current = null; foreach (var url in urls1) { if (url.processing) { continue; } current = url; if (count > 20) { break; } if (current == null) { continue; } current.processing = true; var t = new Thread(() => Download(current)); t.Start(); count++; } } }