public void Process(Urls url) { try { WebClient webClient = new WebClient(); webClient.Encoding = Encoding.UTF8; string html = webClient.DownloadString(url.Url); string fileName = Num.ToString(); File.WriteAllText(fileName, html, Encoding.UTF8); url.Html = html; PageDownloaded(this, url); Parse(html, url.Url);//解析,并加入新的链接 } catch (Exception ex) { Console.WriteLine(ex.Message); return; } }
public void Crawl() { Urls surl = new Urls() { Url = StartUrl, Pro = false, Html = "" }; MyUrls.Add(surl); string str = @"(www\.){0,1}.*?\..*?/"; Regex r = new Regex(str); Match m = r.Match(StartUrl); StartWith = m.Value; while (true) { Urls Now = null; foreach (Urls url in MyUrls) { if (url.Pro) { continue; } Now = url; if (Num > 20) { break; } if (Now == null) { continue; } Now.Pro = true; var t = new Thread(() => Process(Now)); t.Start(); Num++; } } }