public void Run() { CheckIfRunning(); Stat = Status.Running; IsExited = false; #if !NET_CORE // 开启多线程支持 System.Net.ServicePointManager.DefaultConnectionLimit = 1000; #endif InitComponent(); IMonitorableScheduler monitor = (IMonitorableScheduler)Scheduler; if (StartTime == DateTime.MinValue) { StartTime = DateTime.Now; } Parallel.For(0, ThreadNum, new ParallelOptions { MaxDegreeOfParallelism = ThreadNum }, i => { int waitCount = 0; bool firstTask = false; var downloader = Downloader.Clone(); while (Stat == Status.Running) { Request request = Scheduler.Poll(this); if (request == null) { if (waitCount > _waitCountLimit && IsExitWhenComplete) { Stat = Status.Finished; break; } // wait until new url added WaitNewUrl(ref waitCount); } else { Log.WriteLine($"Left: {monitor.GetLeftRequestsCount(this)} Total: {monitor.GetTotalRequestsCount(this)} Thread: {ThreadNum}"); waitCount = 0; try { ProcessRequest(request, downloader); Thread.Sleep(Site.SleepTime); #if TEST System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Reset(); sw.Start(); #endif OnSuccess(request); #if TEST sw.Stop(); Console.WriteLine("OnSuccess:" + (sw.ElapsedMilliseconds).ToString()); #endif } catch (Exception e) { OnError(request); Logger.Error("采集失败: " + request.Url + ".", e); } finally { #if !NET_CORE if (Site.HttpProxyPoolEnable && request.GetExtra(Request.Proxy) != null) { Site.ReturnHttpProxyToPool((HttpHost)request.GetExtra(Request.Proxy), (int)request.GetExtra(Request.StatusCode)); } #endif FinishedPageCount.Inc(); } if (!firstTask) { Thread.Sleep(3000); firstTask = true; } } } }); FinishedTime = DateTime.Now; foreach (IPipeline pipeline in Pipelines) { SafeDestroy(pipeline); } if (Stat == Status.Finished) { OnClose(); Logger.Info($"任务 {Identity} 结束."); } if (Stat == Status.Stopped) { Logger.Info("任务 " + Identity + " 停止成功!"); } SpiderClosingEvent?.Invoke(); Log.WaitForExit(); if (Stat == Status.Exited) { Logger.Info("任务 " + Identity + " 退出成功!"); } IsExited = true; }
public void Run() { //Stopwatch watch = new Stopwatch(); //watch.Start(); // 必须开启多线程限制 System.Net.ServicePointManager.DefaultConnectionLimit = int.MaxValue; CheckRunningStat(); Logger.Info("Spider " + Identify + " InitComponent..."); InitComponent(); //IMonitorableScheduler monitor = (IMonitorableScheduler)Scheduler; Logger.Info("Spider " + Identify + " Started!"); bool firstTask = false; while (Stat.Value == StatRunning) { Request request = Scheduler.Poll(this); if (request == null) { if (ThreadPool.GetThreadAlive() == 0 && ExitWhenComplete) { break; } if (_waitCount > _waitCountLimit) { break; } // wait until new url added WaitNewUrl(); } else { if (_startTime == DateTime.MinValue) { _startTime = DateTime.Now; } _waitCount = 0; ThreadPool.Execute((obj, cts) => { //Logger.Info( // $"Left: {monitor.GetLeftRequestsCount(this)} Total: {monitor.GetTotalRequestsCount(this)} AliveThread: {ThreadPool.GetThreadAlive()} ThreadNum: {ThreadPool.GetThreadNum()}"); var request1 = obj as Request; if (request1 != null) { try { ProcessRequest(request1, cts); OnSuccess(request1); Uri uri = new Uri(request1.Url); Logger.Info($"Request: { HttpUtility.HtmlDecode(HttpUtility.UrlDecode(uri.Query))} Sucess."); return(1); } catch (Exception e) { OnError(request1); Logger.Error("Request " + request1.Url + " failed.", e); return(-1); } finally { if (_site.GetHttpProxyPool().Enable) { _site.ReturnHttpProxyToPool((HttpHost)request1.GetExtra(Request.Proxy), (int)request1.GetExtra(Request.StatusCode)); } _pageCount.Inc(); } } return(0); }, request); if (!firstTask) { Thread.Sleep(3000); firstTask = true; } } } ThreadPool.WaitToEnd(); // release some resources if (DestroyWhenExit) { Close(); } _endTime = DateTime.Now; OnClose(); //watch.Stop(); //Logger.Info("Cost time:" + (float)watch.ElapsedMilliseconds / 1000); Stat.Set(StatFinished); }
public void Run() { CheckIfRunning(); Stat = Status.Running; _runningExit = false; #if !NET_CORE // 必须开启多线程限制 System.Net.ServicePointManager.DefaultConnectionLimit = int.MaxValue; #endif Logger.Info("Spider " + Identity + " InitComponent..."); InitComponent(); Logger.Info("Spider " + Identity + " Started!"); bool firstTask = false; while (Stat == Status.Running) { Request request = Scheduler.Poll(this); if (request == null) { if (ThreadPool.ThreadAlive == 0) { if (_waitCount > _waitCountLimit && IsExitWhenComplete) { Stat = Status.Finished; break; } } // wait until new url added WaitNewUrl(); } else { if (StartTime == DateTime.MinValue) { StartTime = DateTime.Now; } _waitCount = 0; ThreadPool.Push(obj => { var request1 = obj as Request; if (request1 != null) { try { ProcessRequest(request1); Thread.Sleep(Site.SleepTime); OnSuccess(request1); } catch (Exception e) { OnError(request1); Logger.Error("Request " + request1.Url + " failed.", e); } finally { #if !NET_CORE if (Site.HttpProxyPoolEnable) { Site.ReturnHttpProxyToPool((HttpHost)request1.GetExtra(Request.Proxy), (int)request1.GetExtra(Request.StatusCode)); } #endif FinishedPageCount.Inc(); } return(true); } return(false); }, request); if (!firstTask) { Thread.Sleep(3000); firstTask = true; } } } _waitingToExit = true; ThreadPool.WaitToExit(); FinishedTime = DateTime.Now; // Pipeline中有可能有缓存数据, 需要清理/保存后才能安全退出/暂停 foreach (IPipeline pipeline in Pipelines) { SafeDestroy(pipeline); } if (Stat == Status.Finished) { OnClose(); } if (Stat == Status.Stopped) { Logger.Info("Spider " + Identity + " stop success!"); } _runningExit = true; }