示例#1
0
        /// <summary>
        /// DFS 方式爬取
        /// </summary>
        public void BfsCraw()
        {
            var urlQueue = new Queue <string>();

            urlQueue.Enqueue(firstUrl);
            while (!stopFlag && count < maxCount && urlQueue.Count > 0)
            {
                string url = urlQueue.Dequeue();
                CrawlPageStarted?.Invoke(url); //开始爬取
                string filepath = folderpath + "/" + (count + 1) + ".html";
                string html;
                try
                {
                    html = Download(url, filepath);
                }
                catch (Exception e)
                {
                    CrawlPageFailed?.Invoke(url, e.Message); //爬取失败
                    return;
                }
                CrawlPageSucceeded?.Invoke(url, html); //爬取成功
                hasCrawedUrl.Add(url);
                count++;

                foreach (var nxtUrl in UrlsInHtml(html, url))
                {
                    if (!hasCrawedUrl.Contains(nxtUrl))
                    {
                        urlQueue.Enqueue(nxtUrl);
                    }
                }
            }
            CrawlTaskEnded?.Invoke();
        }
示例#2
0
 /// <summary>
 /// DFS 方式爬取
 /// </summary>
 public void DfsCraw()
 {
     Dfs(firstUrl);
     CrawlTaskEnded?.Invoke();
 }