public async Task Start() { DownloadedPages.Clear(); while (pending.TryDequeue(out string result)) { } pending.Enqueue(StartURL); while (DownloadedPages.Count < MaxPage && pending.Count > 0) { if (MaxParallel > 0 && DownloadedPages.Count > MaxParallel) { await Task.Delay(100); continue; } string url; pending.TryDequeue(out url); try { string html = await DownLoad(url); // 下载 DownloadedPages[url] = true; PageDownloaded(this, url, "success"); Parse(html, url);//解析,并加入新的链接 } catch (Exception ex) { PageDownloaded(this, url, " Error:" + ex.Message); } } CrawlerStopped(this); }
public void Start() { DownloadedPages.Clear(); pending.Clear(); pending.Enqueue(StartURL); TaskRunning = 0; while (DownloadedPages.Count < MaxPage) { if (pending.Count == 0 && TaskRunning == 0) { break; } else if (pending.Count == 0) { continue; } else { string url = pending.Dequeue(); lock (this) { TaskRunning++; } Task.Run(() => RunTask(url)); } } CrawlerStopped(this); }
public void Start() { DownloadedPages.Clear(); pending = new Queue <string>(); pending.Enqueue(StartURL); Parallel.Invoke(new Action[] { () => Add() }); CrawlerStopped(this); }
public void Start() { DownloadedPages.Clear(); pending.Clear(); pending.Enqueue(StartURL); while (DownloadedPages.Count < MaxPage && pending.Count > 0) { string url = pending.Dequeue(); try { string html = DownLoad(url); // 下载 DownloadedPages[url] = true; PageDownloaded(this, url, "success"); Parse(html, url);//解析,并加入新的链接 }catch (Exception ex) { PageDownloaded(this, url, " Error:" + ex.Message); } } CrawlerStopped(this); }
public void init() { DownloadedPages.Clear(); pending.Clear(); pending.Enqueue(StartURL); }