protected void OnClose() { SpiderClosingEvent?.Invoke(); foreach (var pipeline in Pipelines) { SafeDestroy(pipeline); } (Scheduler as DuplicateRemovedScheduler)?.ResetDuplicateCheck(this); SafeDestroy(Scheduler); SafeDestroy(PageProcessor); SafeDestroy(Downloader); }
public void Exit() { Stat = Status.Exited; Logger.Warn("退出任务中 " + Identity + "..."); SpiderClosingEvent?.Invoke(); }
public void Run() { CheckIfRunning(); Stat = Status.Running; IsExited = false; #if !NET_CORE // 开启多线程支持 System.Net.ServicePointManager.DefaultConnectionLimit = 1000; #endif InitComponent(); IMonitorableScheduler monitor = (IMonitorableScheduler)Scheduler; if (StartTime == DateTime.MinValue) { StartTime = DateTime.Now; } Parallel.For(0, ThreadNum, new ParallelOptions { MaxDegreeOfParallelism = ThreadNum }, i => { int waitCount = 0; bool firstTask = false; var downloader = Downloader.Clone(); while (Stat == Status.Running) { Request request = Scheduler.Poll(this); if (request == null) { if (waitCount > _waitCountLimit && IsExitWhenComplete) { Stat = Status.Finished; break; } // wait until new url added WaitNewUrl(ref waitCount); } else { Log.WriteLine($"Left: {monitor.GetLeftRequestsCount(this)} Total: {monitor.GetTotalRequestsCount(this)} Thread: {ThreadNum}"); waitCount = 0; try { ProcessRequest(request, downloader); Thread.Sleep(Site.SleepTime); #if TEST System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Reset(); sw.Start(); #endif OnSuccess(request); #if TEST sw.Stop(); Console.WriteLine("OnSuccess:" + (sw.ElapsedMilliseconds).ToString()); #endif } catch (Exception e) { OnError(request); Logger.Error("采集失败: " + request.Url + ".", e); } finally { #if !NET_CORE if (Site.HttpProxyPoolEnable && request.GetExtra(Request.Proxy) != null) { Site.ReturnHttpProxyToPool((HttpHost)request.GetExtra(Request.Proxy), (int)request.GetExtra(Request.StatusCode)); } #endif FinishedPageCount.Inc(); } if (!firstTask) { Thread.Sleep(3000); firstTask = true; } } } }); FinishedTime = DateTime.Now; foreach (IPipeline pipeline in Pipelines) { SafeDestroy(pipeline); } if (Stat == Status.Finished) { OnClose(); Logger.Info($"任务 {Identity} 结束."); } if (Stat == Status.Stopped) { Logger.Info("任务 " + Identity + " 停止成功!"); } SpiderClosingEvent?.Invoke(); Log.WaitForExit(); if (Stat == Status.Exited) { Logger.Info("任务 " + Identity + " 退出成功!"); } IsExited = true; }