public void Crawl() { Init(); Console.WriteLine("Spider Initialized"); UrlsMng.AddUrl(new KeyValuePair <string, int> (rootUrl, 0)); for (int i = 0; i < threadsNum; i++) { threads [i].Start(i); idleThreads [i] = false; Console.WriteLine("第" + (i + 1) + "条线程开启"); } }
void CrawlProc(object threadIndex) { var currentIndex = (int)threadIndex; while (true) { if (!UrlsMng.HasNewUrl) { idleThreads[currentIndex] = true; if (idleThreads.All(t => t)) { Console.WriteLine("第" + currentIndex + "条线程退出"); break; } Thread.Sleep(2000); continue; } idleThreads[currentIndex] = false; KeyValuePair <string, int> curntURL = new KeyValuePair <string, int>(); lock (UrlsMng) { if (UrlsMng.HasNewUrl) { curntURL = UrlsMng.GetUrl(); } else { continue; } } var html = Downloader.Download(curntURL.Key); var parseResult = Parser.ParseURLS(html, curntURL.Value); lock (UrlsMng) parseResult.Keys.ToList().ForEach(url => UrlsMng.AddUrl(new KeyValuePair <string, int>(url, curntURL.Value + 1))); var filteredContent = Parser.ParseHTML(html); if (filteredContent == null) { filteredContent = html; } lock (DataHdler) DataHdler.CollectData(curntURL.Key, curntURL.Value, html, filteredContent); } }