Example #1
0
        protected void OnClose()
        {
            SpiderClosingEvent?.Invoke();
            foreach (var pipeline in Pipelines)
            {
                SafeDestroy(pipeline);
            }

            (Scheduler as DuplicateRemovedScheduler)?.ResetDuplicateCheck(this);

            SafeDestroy(Scheduler);
            SafeDestroy(PageProcessor);
            SafeDestroy(Downloader);
        }
Example #2
0
 public void Exit()
 {
     Stat = Status.Exited;
     Logger.Warn("退出任务中 " + Identity + "...");
     SpiderClosingEvent?.Invoke();
 }
Example #3
0
        public void Run()
        {
            CheckIfRunning();

            Stat     = Status.Running;
            IsExited = false;

#if !NET_CORE
            // 开启多线程支持
            System.Net.ServicePointManager.DefaultConnectionLimit = 1000;
#endif

            InitComponent();

            IMonitorableScheduler monitor = (IMonitorableScheduler)Scheduler;

            if (StartTime == DateTime.MinValue)
            {
                StartTime = DateTime.Now;
            }

            Parallel.For(0, ThreadNum, new ParallelOptions
            {
                MaxDegreeOfParallelism = ThreadNum
            }, i =>
            {
                int waitCount  = 0;
                bool firstTask = false;

                var downloader = Downloader.Clone();

                while (Stat == Status.Running)
                {
                    Request request = Scheduler.Poll(this);

                    if (request == null)
                    {
                        if (waitCount > _waitCountLimit && IsExitWhenComplete)
                        {
                            Stat = Status.Finished;
                            break;
                        }

                        // wait until new url added
                        WaitNewUrl(ref waitCount);
                    }
                    else
                    {
                        Log.WriteLine($"Left: {monitor.GetLeftRequestsCount(this)} Total: {monitor.GetTotalRequestsCount(this)} Thread: {ThreadNum}");

                        waitCount = 0;

                        try
                        {
                            ProcessRequest(request, downloader);
                            Thread.Sleep(Site.SleepTime);
#if TEST
                            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
                            sw.Reset();
                            sw.Start();
#endif

                            OnSuccess(request);
#if TEST
                            sw.Stop();
                            Console.WriteLine("OnSuccess:" + (sw.ElapsedMilliseconds).ToString());
#endif
                        }
                        catch (Exception e)
                        {
                            OnError(request);
                            Logger.Error("采集失败: " + request.Url + ".", e);
                        }
                        finally
                        {
#if !NET_CORE
                            if (Site.HttpProxyPoolEnable && request.GetExtra(Request.Proxy) != null)
                            {
                                Site.ReturnHttpProxyToPool((HttpHost)request.GetExtra(Request.Proxy), (int)request.GetExtra(Request.StatusCode));
                            }
#endif
                            FinishedPageCount.Inc();
                        }

                        if (!firstTask)
                        {
                            Thread.Sleep(3000);
                            firstTask = true;
                        }
                    }
                }
            });

            FinishedTime = DateTime.Now;

            foreach (IPipeline pipeline in Pipelines)
            {
                SafeDestroy(pipeline);
            }

            if (Stat == Status.Finished)
            {
                OnClose();

                Logger.Info($"任务 {Identity} 结束.");
            }

            if (Stat == Status.Stopped)
            {
                Logger.Info("任务 " + Identity + " 停止成功!");
            }

            SpiderClosingEvent?.Invoke();

            Log.WaitForExit();

            if (Stat == Status.Exited)
            {
                Logger.Info("任务 " + Identity + " 退出成功!");
            }

            IsExited = true;
        }