Exemplo n.º 1
0
        public virtual void Run(params string[] arguments)
        {
            CheckIfRunning();

            CheckIfSettingsCorrect();

            Stat = Status.Running;
            _scheduler.IsExited = false;

#if !NET_CORE
            // 开启多线程支持
            ServicePointManager.DefaultConnectionLimit = 1000;
#endif

            InitComponent();

            if (StartTime == DateTime.MinValue)
            {
                StartTime = DateTime.Now;
            }

            Parallel.For(0, ThreadNum, new ParallelOptions
            {
                MaxDegreeOfParallelism = ThreadNum
            }, i =>
            {
                int waitCount  = 0;
                bool firstTask = false;

                var downloader = Downloader.Clone();

                while (Stat == Status.Running)
                {
                    Request request = Scheduler.Poll();

                    if (request == null)
                    {
                        if (waitCount > _waitCountLimit && ExitWhenComplete)
                        {
                            Stat = Status.Finished;
                            break;
                        }

                        // wait until new url added
                        WaitNewUrl(ref waitCount);
                    }
                    else
                    {
                        waitCount = 0;

                        try
                        {
                            Stopwatch sw = new Stopwatch();
                            ProcessRequest(sw, request, downloader);
                            Thread.Sleep(_random.Next(Site.MinSleepTime, Site.MaxSleepTime));
                            _OnSuccess(request);
                        }
                        catch (Exception e)
                        {
                            OnError(request);
                            this.Log($"采集失败: {request.Url}.", LogLevel.Error, e);
                        }
                        finally
                        {
                            if (request.GetExtra(Request.Proxy) != null)
                            {
                                var statusCode = request.GetExtra(Request.StatusCode);
                                Site.ReturnHttpProxy(request.GetExtra(Request.Proxy) as UseSpecifiedUriWebProxy, statusCode == null ? HttpStatusCode.Found : (HttpStatusCode)statusCode);
                            }
                        }

                        if (!firstTask)
                        {
                            Thread.Sleep(3000);
                            firstTask = true;
                        }
                    }
                }
            });

            FinishedTime = DateTime.Now;

            foreach (IPipeline pipeline in Pipelines)
            {
                SafeDestroy(pipeline);
            }

            SpiderClosing?.Invoke();

            if (!_scheduler.IsExited)
            {
                _scheduler.IsExited = true;
            }

            this.Log($"等待监控进程退出.", LogLevel.Info);
            _monitorTask.Wait();

            Scheduler.Dispose();

            if (Stat == Status.Finished)
            {
                OnClose();
                this.Log($"结束采集, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info);
            }

            if (Stat == Status.Stopped)
            {
                this.Log($"暂停采集, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info);
            }

            if (Stat == Status.Exited)
            {
                this.Log($"退出采集, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info);
            }
        }
Exemplo n.º 2
0
        public virtual void Run(params string[] arguments)
        {
            CheckIfRunning();

            CheckIfSettingsCorrect();

            Stat     = Status.Running;
            IsExited = false;

#if !NET_CORE
            // 开启多线程支持
            ServicePointManager.DefaultConnectionLimit = 1000;
#endif

            InitComponent();

            if (StartTime == DateTime.MinValue)
            {
                StartTime = DateTime.Now;
            }

            Parallel.For(0, ThreadNum, new ParallelOptions
            {
                MaxDegreeOfParallelism = ThreadNum
            }, i =>
            {
                int waitCount  = 0;
                bool firstTask = false;

                var downloader = Downloader.Clone();

                while (Stat == Status.Running)
                {
                    Request request = Scheduler.Poll();

                    if (request == null)
                    {
                        if (waitCount > _waitCountLimit && IsExitWhenComplete)
                        {
                            Stat = Status.Finished;
                            break;
                        }

                        // wait until new url added
                        WaitNewUrl(ref waitCount);
                    }
                    else
                    {
                        waitCount = 0;

                        try
                        {
                            ProcessRequest(request, downloader);
                            Thread.Sleep(_random.Next(Site.MinSleepTime, Site.MaxSleepTime));
#if TEST
                            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
                            sw.Reset();
                            sw.Start();
#endif

                            _OnSuccess(request);
#if TEST
                            sw.Stop();
                            Console.WriteLine("OnSuccess:" + (sw.ElapsedMilliseconds).ToString());
#endif
                        }
                        catch (Exception e)
                        {
                            OnError(request);
                            Logger.SaveLog(LogInfo.Create($"采集失败: {request.Url}.", Logger.Name, this, LogLevel.Error, e));
                        }
                        finally
                        {
                            if (request.GetExtra(Request.Proxy) != null)
                            {
                                var statusCode = request.GetExtra(Request.StatusCode);
                                Site.ReturnHttpProxy(request.GetExtra(Request.Proxy), statusCode == null ? HttpStatusCode.Found : (HttpStatusCode)statusCode);
                            }
                        }

                        if (!firstTask)
                        {
                            Thread.Sleep(3000);
                            firstTask = true;
                        }
                    }
                }
            });

            FinishedTime = DateTime.Now;

            foreach (IPipeline pipeline in Pipelines)
            {
                SafeDestroy(pipeline);
            }

            SpiderClosing?.Invoke();

            if (Stat == Status.Finished)
            {
                OnClose();
                Logger.SaveLog(LogInfo.Create($"采集结束, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", Logger.Name, this, LogLevel.Info));
            }

            if (Stat == Status.Stopped)
            {
                Logger.SaveLog(LogInfo.Create($"采集暂停, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", Logger.Name, this, LogLevel.Info));
            }

            if (Stat == Status.Exited)
            {
                Logger.SaveLog(LogInfo.Create($"采集退出, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", Logger.Name, this, LogLevel.Info));
            }

            IsExited = true;
        }
Exemplo n.º 3
0
        public virtual void Run(params string[] arguments)
        {
            if (Stat == Status.Running)
            {
                this.Log("任务运行中...", LogLevel.Warn);
                return;
            }

            CheckIfSettingsCorrect();

#if !NET_CORE
            // 开启多线程支持
            ServicePointManager.DefaultConnectionLimit = 1000;
#endif

            InitComponent(arguments);

            Monitorable.IsExited = false;

            if (arguments.Contains("running-test"))
            {
                _scheduler.IsExited = true;
                return;
            }

            if (StartTime == DateTime.MinValue)
            {
                StartTime = DateTime.Now;
            }

            Stat      = Status.Running;
            _realStat = Status.Running;

            while (Stat == Status.Running || Stat == Status.Stopped)
            {
                if (Stat == Status.Stopped)
                {
                    _realStat = Status.Stopped;
                    Thread.Sleep(50);
                    continue;
                }

                Parallel.For(0, ThreadNum, new ParallelOptions
                {
                    MaxDegreeOfParallelism = ThreadNum
                }, i =>
                {
                    int waitCount  = 0;
                    bool firstTask = false;

                    var downloader = Downloader.Clone();

                    while (Stat == Status.Running)
                    {
                        Request request = Scheduler.Poll();

                        if (request == null)
                        {
                            if (waitCount > _waitCountLimit && ExitWhenComplete)
                            {
                                Stat      = Status.Finished;
                                _realStat = Status.Finished;
                                _OnComplete();
                                OnComplete();
                                break;
                            }

                            // wait until new url added
                            WaitNewUrl(ref waitCount);
                        }
                        else
                        {
                            waitCount = 0;

                            try
                            {
                                Stopwatch sw = new Stopwatch();
                                ProcessRequest(sw, request, downloader);
                                Thread.Sleep(Site.SleepTime);
                                _OnSuccess(request);
                            }
                            catch (Exception e)
                            {
                                OnError(request);
                                this.Log($"采集失败: {request.Url}.", LogLevel.Error, e);
                            }
                            finally
                            {
                                if (request.GetExtra(Request.Proxy) != null)
                                {
                                    var statusCode = request.GetExtra(Request.StatusCode);
                                    Site.ReturnHttpProxy(request.GetExtra(Request.Proxy) as UseSpecifiedUriWebProxy, statusCode == null ? HttpStatusCode.Found : (HttpStatusCode)statusCode);
                                }
                            }

                            if (!firstTask)
                            {
                                Thread.Sleep(3000);
                                firstTask = true;
                            }
                        }
                    }
                });
            }

            FinishedTime = DateTime.Now;
            _realStat    = Status.Exited;

            OnClose();

            this.Log($"等待监控进程退出.", LogLevel.Info);
            _monitorTask.Wait();

            OnClosing?.Invoke();

            var msg = Stat == Status.Finished ? "结束采集" : "退出采集";
            this.Log($"{msg}, 运行时间: {(FinishedTime - StartTime).TotalSeconds} 秒.", LogLevel.Info);
        }