public virtual void Run(params string[] arguments) { CheckIfRunning(); CheckIfSettingsCorrect(); Stat = Status.Running; IsExited = false; #if !NET_CORE // 开启多线程支持 ServicePointManager.DefaultConnectionLimit = 1000; #endif InitComponent(); if (StartTime == DateTime.MinValue) { StartTime = DateTime.Now; } Parallel.For(0, ThreadNum, new ParallelOptions { MaxDegreeOfParallelism = ThreadNum }, i => { int waitCount = 0; bool firstTask = false; var downloader = Downloader.Clone(); while (Stat == Status.Running) { Request request = Scheduler.Poll(); if (request == null) { if (waitCount > _waitCountLimit && IsExitWhenComplete) { Stat = Status.Finished; break; } // wait until new url added WaitNewUrl(ref waitCount); } else { waitCount = 0; try { ProcessRequest(request, downloader); Thread.Sleep(_random.Next(Site.MinSleepTime, Site.MaxSleepTime)); #if TEST System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Reset(); sw.Start(); #endif _OnSuccess(request); #if TEST sw.Stop(); Console.WriteLine("OnSuccess:" + (sw.ElapsedMilliseconds).ToString()); #endif } catch (Exception e) { OnError(request); Logger.SaveLog(LogInfo.Create($"采集失败: {request.Url}.", Logger.Name, this, LogLevel.Error, e)); } finally { if (request.GetExtra(Request.Proxy) != null) { var statusCode = request.GetExtra(Request.StatusCode); Site.ReturnHttpProxy(request.GetExtra(Request.Proxy), statusCode == null ? HttpStatusCode.Found : (HttpStatusCode)statusCode); } } if (!firstTask) { Thread.Sleep(3000); firstTask = true; } } } }); FinishedTime = DateTime.Now; foreach (IPipeline pipeline in Pipelines) { SafeDestroy(pipeline); } SpiderClosing?.Invoke(); if (Stat == Status.Finished) { OnClose(); Logger.SaveLog(LogInfo.Create($"采集结束, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", Logger.Name, this, LogLevel.Info)); } if (Stat == Status.Stopped) { Logger.SaveLog(LogInfo.Create($"采集暂停, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", Logger.Name, this, LogLevel.Info)); } if (Stat == Status.Exited) { Logger.SaveLog(LogInfo.Create($"采集退出, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", Logger.Name, this, LogLevel.Info)); } IsExited = true; }
public void Exit() { Stat = Status.Exited; Logger.SaveLog(LogInfo.Create($"退出任务中...", Logger.Name, this, LogLevel.Warn)); SpiderClosing?.Invoke(); }
public void Exit() { Stat = Status.Exited; this.Log("退出任务中...", LogLevel.Warn); SpiderClosing?.Invoke(); }
public virtual void Run(params string[] arguments) { CheckIfRunning(); CheckIfSettingsCorrect(); Stat = Status.Running; _scheduler.IsExited = false; #if !NET_CORE // 开启多线程支持 ServicePointManager.DefaultConnectionLimit = 1000; #endif InitComponent(); if (StartTime == DateTime.MinValue) { StartTime = DateTime.Now; } Parallel.For(0, ThreadNum, new ParallelOptions { MaxDegreeOfParallelism = ThreadNum }, i => { int waitCount = 0; bool firstTask = false; var downloader = Downloader.Clone(); while (Stat == Status.Running) { Request request = Scheduler.Poll(); if (request == null) { if (waitCount > _waitCountLimit && ExitWhenComplete) { Stat = Status.Finished; break; } // wait until new url added WaitNewUrl(ref waitCount); } else { waitCount = 0; try { Stopwatch sw = new Stopwatch(); ProcessRequest(sw, request, downloader); Thread.Sleep(_random.Next(Site.MinSleepTime, Site.MaxSleepTime)); _OnSuccess(request); } catch (Exception e) { OnError(request); this.Log($"采集失败: {request.Url}.", LogLevel.Error, e); } finally { if (request.GetExtra(Request.Proxy) != null) { var statusCode = request.GetExtra(Request.StatusCode); Site.ReturnHttpProxy(request.GetExtra(Request.Proxy) as UseSpecifiedUriWebProxy, statusCode == null ? HttpStatusCode.Found : (HttpStatusCode)statusCode); } } if (!firstTask) { Thread.Sleep(3000); firstTask = true; } } } }); FinishedTime = DateTime.Now; foreach (IPipeline pipeline in Pipelines) { SafeDestroy(pipeline); } SpiderClosing?.Invoke(); if (!_scheduler.IsExited) { _scheduler.IsExited = true; } this.Log($"等待监控进程退出.", LogLevel.Info); _monitorTask.Wait(); Scheduler.Dispose(); if (Stat == Status.Finished) { OnClose(); this.Log($"结束采集, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info); } if (Stat == Status.Stopped) { this.Log($"暂停采集, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info); } if (Stat == Status.Exited) { this.Log($"退出采集, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info); } }
public virtual void Run(params string[] arguments) { if (Stat == Status.Running) { this.Log("任务运行中...", LogLevel.Warn); return; } CheckIfSettingsCorrect(); #if !NET_CORE // 开启多线程支持 ServicePointManager.DefaultConnectionLimit = 1000; #endif InitComponent(arguments); Monitorable.IsExited = false; if (arguments.Contains("running-test")) { _scheduler.IsExited = true; return; } if (StartTime == DateTime.MinValue) { StartTime = DateTime.Now; } Stat = Status.Running; Parallel.For(0, ThreadNum, new ParallelOptions { MaxDegreeOfParallelism = ThreadNum }, i => { int waitCount = 0; bool firstTask = false; var downloader = Downloader.Clone(); while (Stat == Status.Running || Stat == Status.Stopped) { if (Stat != Status.Running) { Thread.Sleep(50); continue; } Request request = Scheduler.Poll(); if (request == null) { if (waitCount > _waitCountLimit && ExitWhenComplete) { Stat = Status.Finished; break; } // wait until new url added WaitNewUrl(ref waitCount); } else { waitCount = 0; try { Stopwatch sw = new Stopwatch(); ProcessRequest(sw, request, downloader); Thread.Sleep(Site.SleepTime); _OnSuccess(request); } catch (Exception e) { OnError(request); this.Log($"采集失败: {request.Url}.", LogLevel.Error, e); } finally { if (request.GetExtra(Request.Proxy) != null) { var statusCode = request.GetExtra(Request.StatusCode); Site.ReturnHttpProxy(request.GetExtra(Request.Proxy) as UseSpecifiedUriWebProxy, statusCode == null ? HttpStatusCode.Found : (HttpStatusCode)statusCode); } } if (!firstTask) { Thread.Sleep(3000); firstTask = true; } } } }); FinishedTime = DateTime.Now; OnClose(); this.Log($"等待监控进程退出.", LogLevel.Info); _monitorTask.Wait(); SpiderClosing?.Invoke(); var msg = Stat == Status.Finished ? "结束采集" : "退出采集"; this.Log($"{msg}, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info); }
public void Exit() { Stat = Status.Exited; Logger.Warn("退出任务中 " + Identity + "..."); SpiderClosing?.Invoke(); }
public void Run() { CheckIfRunning(); Stat = Status.Running; IsExited = false; #if !NET_CORE // 开启多线程支持 System.Net.ServicePointManager.DefaultConnectionLimit = 1000; #endif InitComponent(); if (StartTime == DateTime.MinValue) { StartTime = DateTime.Now; } Parallel.For(0, ThreadNum, new ParallelOptions { MaxDegreeOfParallelism = ThreadNum }, i => { int waitCount = 0; bool firstTask = false; var downloader = Downloader.Clone(); while (Stat == Status.Running) { Request request = Scheduler.Poll(); if (request == null) { if (waitCount > _waitCountLimit && IsExitWhenComplete) { Stat = Status.Finished; break; } // wait until new url added WaitNewUrl(ref waitCount); } else { waitCount = 0; try { ProcessRequest(request, downloader); Thread.Sleep(_random.Next(Site.MinSleepTime, Site.MaxSleepTime)); #if TEST System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Reset(); sw.Start(); #endif _OnSuccess(request); #if TEST sw.Stop(); Console.WriteLine("OnSuccess:" + (sw.ElapsedMilliseconds).ToString()); #endif } catch (Exception e) { OnError(request); Logger.Error("采集失败: " + request.Url + ".", e); } finally { #if !NET_CORE if (Site.HttpProxyPoolEnable && request.GetExtra(Request.Proxy) != null) { Site.ReturnHttpProxyToPool((HttpHost)request.GetExtra(Request.Proxy), (int)request.GetExtra(Request.StatusCode)); } #endif } if (!firstTask) { Thread.Sleep(3000); firstTask = true; } } } }); FinishedTime = DateTime.Now; SpiderClosing?.Invoke(); foreach (IPipeline pipeline in Pipelines) { SafeDestroy(pipeline); } if (Stat == Status.Finished) { OnClose(); Logger.Info($"任务 {Identity} 结束, 运行时间: " + (FinishedTime - StartTime).TotalSeconds + " 秒."); } if (Stat == Status.Stopped) { Logger.Info("任务 " + Identity + " 停止成功, 运行时间: " + (FinishedTime - StartTime).TotalSeconds + " 秒."); } if (Stat == Status.Exited) { Logger.Info("任务 " + Identity + " 退出成功, 运行时间: " + (FinishedTime - StartTime).TotalSeconds + " 秒."); } Logger.Dispose(); IsExited = true; }