/// <summary> /// DFS 方式爬取 /// </summary> public void BfsCraw() { var urlQueue = new Queue <string>(); urlQueue.Enqueue(firstUrl); while (!stopFlag && count < maxCount && urlQueue.Count > 0) { string url = urlQueue.Dequeue(); CrawlPageStarted?.Invoke(url); //开始爬取 string filepath = folderpath + "/" + (count + 1) + ".html"; string html; try { html = Download(url, filepath); } catch (Exception e) { CrawlPageFailed?.Invoke(url, e.Message); //爬取失败 return; } CrawlPageSucceeded?.Invoke(url, html); //爬取成功 hasCrawedUrl.Add(url); count++; foreach (var nxtUrl in UrlsInHtml(html, url)) { if (!hasCrawedUrl.Contains(nxtUrl)) { urlQueue.Enqueue(nxtUrl); } } } CrawlTaskEnded?.Invoke(); }
/// <summary> /// DFS 方式爬取 /// </summary> public void DfsCraw() { Dfs(firstUrl); CrawlTaskEnded?.Invoke(); }