/// <summary> /// 为避免挤占CPU, 队列为空时睡觉. /// </summary> /// <param name="crawler"></param> private static void SleepWhenQueueIsEmpty(LinkerThread linkerThread) { linkerThread.Url = string.Empty; if (linkerThread.IsComplete) { linkerThread.LinkerThreadStatusChanged(linkerThread); } Thread.Sleep(10 * 1000); }
/// <summary> /// 开始(按配置的线程数创建线程进行抓取) /// </summary> public void Start() { //创建线程 _linkerThreads = new LinkerThread[ThreadCount]; for (int i = 0; i < ThreadCount; i++) { LinkerThread linkerThread = new LinkerThread(this); linkerThread.Name = i.ToString(); //为每个线程注册委托 linkerThread.LinkerThreadStatusChanged += new LinkerThreadStatusChangedEventHandler(LinkerThreadCallbacked); linkerThread.Start(); LinkerThreads[i] = linkerThread; } }
/// <summary> /// 开始线程 /// </summary> /// <param name="data"></param> public static void DoWork(object data) { try { LinkerThread LinkerThread = (LinkerThread)data; //主体 Linker Linker = LinkerThread.Linker; //即将访问的URL队列 UrlQueue urlQueue = Linker.urlQueue; while (true) { if (urlQueue.Count > 0) { try { // 从队列中获取URL string url = (string)urlQueue.Dequeue(); // 获取页面 LinkerThread.Url = url; if (LinkerThread.IsComplete) { LinkerThread.LinkerThreadStatusChanged(Linker); } string html = HtmlHelper.GetHtml(url, "UTF-8"); LinkHelper.CollectEmail(url, html); if (LinkerThread.IsComplete) { LinkerThread.LinkerThreadStatusChanged(Linker); } } catch (InvalidOperationException) { SleepWhenQueueIsEmpty(LinkerThread); } } else { SleepWhenQueueIsEmpty(LinkerThread); } } } catch (ThreadAbortException) { // 线程被放弃 } }