Exemplo n.º 1
0
        public virtual void Run(params string[] arguments)
        {
            CheckIfRunning();

            CheckIfSettingsCorrect();

            Stat     = Status.Running;
            IsExited = false;

#if !NET_CORE
            // 开启多线程支持
            ServicePointManager.DefaultConnectionLimit = 1000;
#endif

            InitComponent();

            if (StartTime == DateTime.MinValue)
            {
                StartTime = DateTime.Now;
            }

            Parallel.For(0, ThreadNum, new ParallelOptions
            {
                MaxDegreeOfParallelism = ThreadNum
            }, i =>
            {
                int waitCount  = 0;
                bool firstTask = false;

                var downloader = Downloader.Clone();

                while (Stat == Status.Running)
                {
                    Request request = Scheduler.Poll();

                    if (request == null)
                    {
                        if (waitCount > _waitCountLimit && IsExitWhenComplete)
                        {
                            Stat = Status.Finished;
                            break;
                        }

                        // wait until new url added
                        WaitNewUrl(ref waitCount);
                    }
                    else
                    {
                        waitCount = 0;

                        try
                        {
                            ProcessRequest(request, downloader);
                            Thread.Sleep(_random.Next(Site.MinSleepTime, Site.MaxSleepTime));
#if TEST
                            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
                            sw.Reset();
                            sw.Start();
#endif

                            _OnSuccess(request);
#if TEST
                            sw.Stop();
                            Console.WriteLine("OnSuccess:" + (sw.ElapsedMilliseconds).ToString());
#endif
                        }
                        catch (Exception e)
                        {
                            OnError(request);
                            Logger.SaveLog(LogInfo.Create($"采集失败: {request.Url}.", Logger.Name, this, LogLevel.Error, e));
                        }
                        finally
                        {
                            if (request.GetExtra(Request.Proxy) != null)
                            {
                                var statusCode = request.GetExtra(Request.StatusCode);
                                Site.ReturnHttpProxy(request.GetExtra(Request.Proxy), statusCode == null ? HttpStatusCode.Found : (HttpStatusCode)statusCode);
                            }
                        }

                        if (!firstTask)
                        {
                            Thread.Sleep(3000);
                            firstTask = true;
                        }
                    }
                }
            });

            FinishedTime = DateTime.Now;

            foreach (IPipeline pipeline in Pipelines)
            {
                SafeDestroy(pipeline);
            }

            SpiderClosing?.Invoke();

            if (Stat == Status.Finished)
            {
                OnClose();
                Logger.SaveLog(LogInfo.Create($"采集结束, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", Logger.Name, this, LogLevel.Info));
            }

            if (Stat == Status.Stopped)
            {
                Logger.SaveLog(LogInfo.Create($"采集暂停, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", Logger.Name, this, LogLevel.Info));
            }

            if (Stat == Status.Exited)
            {
                Logger.SaveLog(LogInfo.Create($"采集退出, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", Logger.Name, this, LogLevel.Info));
            }

            IsExited = true;
        }
Exemplo n.º 2
0
 public void Exit()
 {
     Stat = Status.Exited;
     Logger.SaveLog(LogInfo.Create($"退出任务中...", Logger.Name, this, LogLevel.Warn));
     SpiderClosing?.Invoke();
 }
Exemplo n.º 3
0
 public void Exit()
 {
     Stat = Status.Exited;
     this.Log("退出任务中...", LogLevel.Warn);
     SpiderClosing?.Invoke();
 }
Exemplo n.º 4
0
        public virtual void Run(params string[] arguments)
        {
            CheckIfRunning();

            CheckIfSettingsCorrect();

            Stat = Status.Running;
            _scheduler.IsExited = false;

#if !NET_CORE
            // 开启多线程支持
            ServicePointManager.DefaultConnectionLimit = 1000;
#endif

            InitComponent();

            if (StartTime == DateTime.MinValue)
            {
                StartTime = DateTime.Now;
            }

            Parallel.For(0, ThreadNum, new ParallelOptions
            {
                MaxDegreeOfParallelism = ThreadNum
            }, i =>
            {
                int waitCount  = 0;
                bool firstTask = false;

                var downloader = Downloader.Clone();

                while (Stat == Status.Running)
                {
                    Request request = Scheduler.Poll();

                    if (request == null)
                    {
                        if (waitCount > _waitCountLimit && ExitWhenComplete)
                        {
                            Stat = Status.Finished;
                            break;
                        }

                        // wait until new url added
                        WaitNewUrl(ref waitCount);
                    }
                    else
                    {
                        waitCount = 0;

                        try
                        {
                            Stopwatch sw = new Stopwatch();
                            ProcessRequest(sw, request, downloader);
                            Thread.Sleep(_random.Next(Site.MinSleepTime, Site.MaxSleepTime));
                            _OnSuccess(request);
                        }
                        catch (Exception e)
                        {
                            OnError(request);
                            this.Log($"采集失败: {request.Url}.", LogLevel.Error, e);
                        }
                        finally
                        {
                            if (request.GetExtra(Request.Proxy) != null)
                            {
                                var statusCode = request.GetExtra(Request.StatusCode);
                                Site.ReturnHttpProxy(request.GetExtra(Request.Proxy) as UseSpecifiedUriWebProxy, statusCode == null ? HttpStatusCode.Found : (HttpStatusCode)statusCode);
                            }
                        }

                        if (!firstTask)
                        {
                            Thread.Sleep(3000);
                            firstTask = true;
                        }
                    }
                }
            });

            FinishedTime = DateTime.Now;

            foreach (IPipeline pipeline in Pipelines)
            {
                SafeDestroy(pipeline);
            }

            SpiderClosing?.Invoke();

            if (!_scheduler.IsExited)
            {
                _scheduler.IsExited = true;
            }

            this.Log($"等待监控进程退出.", LogLevel.Info);
            _monitorTask.Wait();

            Scheduler.Dispose();

            if (Stat == Status.Finished)
            {
                OnClose();
                this.Log($"结束采集, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info);
            }

            if (Stat == Status.Stopped)
            {
                this.Log($"暂停采集, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info);
            }

            if (Stat == Status.Exited)
            {
                this.Log($"退出采集, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info);
            }
        }
Exemplo n.º 5
0
        public virtual void Run(params string[] arguments)
        {
            if (Stat == Status.Running)
            {
                this.Log("任务运行中...", LogLevel.Warn);
                return;
            }

            CheckIfSettingsCorrect();

#if !NET_CORE
            // 开启多线程支持
            ServicePointManager.DefaultConnectionLimit = 1000;
#endif

            InitComponent(arguments);

            Monitorable.IsExited = false;

            if (arguments.Contains("running-test"))
            {
                _scheduler.IsExited = true;
                return;
            }

            if (StartTime == DateTime.MinValue)
            {
                StartTime = DateTime.Now;
            }

            Stat = Status.Running;

            Parallel.For(0, ThreadNum, new ParallelOptions
            {
                MaxDegreeOfParallelism = ThreadNum
            }, i =>
            {
                int waitCount  = 0;
                bool firstTask = false;

                var downloader = Downloader.Clone();

                while (Stat == Status.Running || Stat == Status.Stopped)
                {
                    if (Stat != Status.Running)
                    {
                        Thread.Sleep(50);
                        continue;
                    }
                    Request request = Scheduler.Poll();

                    if (request == null)
                    {
                        if (waitCount > _waitCountLimit && ExitWhenComplete)
                        {
                            Stat = Status.Finished;
                            break;
                        }

                        // wait until new url added
                        WaitNewUrl(ref waitCount);
                    }
                    else
                    {
                        waitCount = 0;

                        try
                        {
                            Stopwatch sw = new Stopwatch();
                            ProcessRequest(sw, request, downloader);
                            Thread.Sleep(Site.SleepTime);
                            _OnSuccess(request);
                        }
                        catch (Exception e)
                        {
                            OnError(request);
                            this.Log($"采集失败: {request.Url}.", LogLevel.Error, e);
                        }
                        finally
                        {
                            if (request.GetExtra(Request.Proxy) != null)
                            {
                                var statusCode = request.GetExtra(Request.StatusCode);
                                Site.ReturnHttpProxy(request.GetExtra(Request.Proxy) as UseSpecifiedUriWebProxy, statusCode == null ? HttpStatusCode.Found : (HttpStatusCode)statusCode);
                            }
                        }

                        if (!firstTask)
                        {
                            Thread.Sleep(3000);
                            firstTask = true;
                        }
                    }
                }
            });

            FinishedTime = DateTime.Now;

            OnClose();

            this.Log($"等待监控进程退出.", LogLevel.Info);
            _monitorTask.Wait();

            SpiderClosing?.Invoke();

            var msg = Stat == Status.Finished ? "结束采集" : "退出采集";
            this.Log($"{msg}, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info);
        }
Exemplo n.º 6
0
 public void Exit()
 {
     Stat = Status.Exited;
     Logger.Warn("退出任务中 " + Identity + "...");
     SpiderClosing?.Invoke();
 }
Exemplo n.º 7
0
        public void Run()
        {
            CheckIfRunning();

            Stat     = Status.Running;
            IsExited = false;

#if !NET_CORE
            // 开启多线程支持
            System.Net.ServicePointManager.DefaultConnectionLimit = 1000;
#endif

            InitComponent();

            if (StartTime == DateTime.MinValue)
            {
                StartTime = DateTime.Now;
            }

            Parallel.For(0, ThreadNum, new ParallelOptions
            {
                MaxDegreeOfParallelism = ThreadNum
            }, i =>
            {
                int waitCount  = 0;
                bool firstTask = false;

                var downloader = Downloader.Clone();

                while (Stat == Status.Running)
                {
                    Request request = Scheduler.Poll();

                    if (request == null)
                    {
                        if (waitCount > _waitCountLimit && IsExitWhenComplete)
                        {
                            Stat = Status.Finished;
                            break;
                        }

                        // wait until new url added
                        WaitNewUrl(ref waitCount);
                    }
                    else
                    {
                        waitCount = 0;

                        try
                        {
                            ProcessRequest(request, downloader);
                            Thread.Sleep(_random.Next(Site.MinSleepTime, Site.MaxSleepTime));
#if TEST
                            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
                            sw.Reset();
                            sw.Start();
#endif

                            _OnSuccess(request);
#if TEST
                            sw.Stop();
                            Console.WriteLine("OnSuccess:" + (sw.ElapsedMilliseconds).ToString());
#endif
                        }
                        catch (Exception e)
                        {
                            OnError(request);
                            Logger.Error("采集失败: " + request.Url + ".", e);
                        }
                        finally
                        {
#if !NET_CORE
                            if (Site.HttpProxyPoolEnable && request.GetExtra(Request.Proxy) != null)
                            {
                                Site.ReturnHttpProxyToPool((HttpHost)request.GetExtra(Request.Proxy), (int)request.GetExtra(Request.StatusCode));
                            }
#endif
                        }

                        if (!firstTask)
                        {
                            Thread.Sleep(3000);
                            firstTask = true;
                        }
                    }
                }
            });

            FinishedTime = DateTime.Now;

            SpiderClosing?.Invoke();

            foreach (IPipeline pipeline in Pipelines)
            {
                SafeDestroy(pipeline);
            }

            if (Stat == Status.Finished)
            {
                OnClose();
                Logger.Info($"任务 {Identity} 结束, 运行时间: " + (FinishedTime - StartTime).TotalSeconds + " 秒.");
            }

            if (Stat == Status.Stopped)
            {
                Logger.Info("任务 " + Identity + " 停止成功, 运行时间: " + (FinishedTime - StartTime).TotalSeconds + " 秒.");
            }

            if (Stat == Status.Exited)
            {
                Logger.Info("任务 " + Identity + " 退出成功, 运行时间: " + (FinishedTime - StartTime).TotalSeconds + " 秒.");
            }
            Logger.Dispose();
            IsExited = true;
        }