public void Run() { CheckIfRunning(); Stat = Status.Running; IsExited = false; #if !NET_CORE // 开启多线程支持 System.Net.ServicePointManager.DefaultConnectionLimit = 1000; #endif InitComponent(); IMonitorableScheduler monitor = (IMonitorableScheduler)Scheduler; if (StartTime == DateTime.MinValue) { StartTime = DateTime.Now; } Parallel.For(0, ThreadNum, new ParallelOptions { MaxDegreeOfParallelism = ThreadNum }, i => { int waitCount = 0; bool firstTask = false; var downloader = Downloader.Clone(); while (Stat == Status.Running) { Request request = Scheduler.Poll(this); if (request == null) { if (waitCount > _waitCountLimit && IsExitWhenComplete) { Stat = Status.Finished; break; } // wait until new url added WaitNewUrl(ref waitCount); } else { Log.WriteLine($"Left: {monitor.GetLeftRequestsCount(this)} Total: {monitor.GetTotalRequestsCount(this)} Thread: {ThreadNum}"); waitCount = 0; try { ProcessRequest(request, downloader); Thread.Sleep(Site.SleepTime); #if TEST System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Reset(); sw.Start(); #endif OnSuccess(request); #if TEST sw.Stop(); Console.WriteLine("OnSuccess:" + (sw.ElapsedMilliseconds).ToString()); #endif } catch (Exception e) { OnError(request); Logger.Error("采集失败: " + request.Url + ".", e); } finally { #if !NET_CORE if (Site.HttpProxyPoolEnable && request.GetExtra(Request.Proxy) != null) { Site.ReturnHttpProxyToPool((HttpHost)request.GetExtra(Request.Proxy), (int)request.GetExtra(Request.StatusCode)); } #endif FinishedPageCount.Inc(); } if (!firstTask) { Thread.Sleep(3000); firstTask = true; } } } }); FinishedTime = DateTime.Now; foreach (IPipeline pipeline in Pipelines) { SafeDestroy(pipeline); } if (Stat == Status.Finished) { OnClose(); Logger.Info($"任务 {Identity} 结束."); } if (Stat == Status.Stopped) { Logger.Info("任务 " + Identity + " 停止成功!"); } SpiderClosingEvent?.Invoke(); Log.WaitForExit(); if (Stat == Status.Exited) { Logger.Info("任务 " + Identity + " 退出成功!"); } IsExited = true; }
public void InitComponent() { if (_init) { #if NET_CORE Logger.Info($"Component already init.", true); #else Logger.Info("Component already init."); #endif return; } Console.CancelKeyPress += ConsoleCancelKeyPress; Scheduler.Init(this); if (Downloader == null) { //Downloader = new HttpClientDownloader(); } Downloader.ThreadNum = ThreadNum; if (Pipelines.Count == 0) { Pipelines.Add(new FilePipeline()); } if (ThreadPool == null) { ThreadPool = new CountableThreadPool(ThreadNum); } if (StartRequests != null) { if (StartRequests.Count > 0) { Parallel.ForEach(StartRequests, new ParallelOptions() { MaxDegreeOfParallelism = 100 }, request => { Scheduler.Push((Request)request.Clone(), this); }); ClearStartRequests(); #if NET_CORE Logger.Info("Push Request to Scheduler success.", true); #else Logger.Info("Push Request to Scheduler success."); #endif } else { #if NET_CORE Logger.Info("Push Zero Request to Scheduler.", true); #else Logger.Info("Push Request to Scheduler success."); #endif } } Task.Factory.StartNew(() => { if (ShowConsoleStatus) { IMonitorableScheduler monitor = Scheduler as IMonitorableScheduler; if (monitor != null) { while (true) { try { if (Stat == Status.Running && !_waitingToExit) { Console.WriteLine( $"Left: {monitor.GetLeftRequestsCount(this)} Total: {monitor.GetTotalRequestsCount(this)} AliveThread: {ThreadPool.ThreadAlive} ThreadNum: {ThreadPool.ThreadNum}"); } } catch { // ignored } Thread.Sleep(2000); } } } }); _init = true; }