Esempio n. 1
0
        /// <summary>
        ///     初始化爬虫
        /// </summary>
        /// <exception cref="ArgumentNullException">Scheduler is not null</exception>
        private void InitSpider()
        {
            CheckRunning();

            if (Scheduler == null)
            {
                throw new ArgumentNullException($"Scheduler is not null");
            }

            if (DownLoader == null)
            {
                throw new ArgumentNullException($"DownLoader is not null");
            }

            if (PageProcessor == null)
            {
                throw new ArgumentNullException($"PageProcessormo is not null");
            }

            if (Site.MinSleepTime < 500)
            {
                throw new SpiderExceptoin("Sleep time should be large than 500");
            }

            if (ThreadCount <= 0)
            {
                throw new ArgumentNullException($"ThreadCount should be more than one!");
            }

            //http并发请求限制
            ServicePointManager.DefaultConnectionLimit = ThreadCount > 1024 ? ThreadCount : 1024;


            SpiderListening.ForEach(item => item.AfterInit(this));
        }
Esempio n. 2
0
        /// <summary>
        ///     运行爬虫
        /// </summary>
        public void Run()
        {
            InitSpider();
            Status = SpiderStatusEnum.Running;

            var parallelOptions = new ParallelOptions {
                MaxDegreeOfParallelism = ThreadCount
            };

            Parallel.For(0, ThreadCount, parallelOptions, i =>
            {
                while (Status == SpiderStatusEnum.Running)
                {
                    Request requset = null;
                    try
                    {
                        requset = Scheduler.GetRequest();
                        if (requset == null)
                        {
                            break;
                        }

                        ProcessRequest(requset, DownLoader);

                        Thread.Sleep(_random.Next(Site.MinSleepTime, Site.MaxSleepTime));
                    }
                    catch (Exception e)
                    {
                        SpiderListening.ForEach(item => item.ErrorHandler(requset, e));
                    }
                }
            });
        }
Esempio n. 3
0
        /// <summary>
        ///     页面下载
        /// </summary>
        private void ProcessRequest(Request request, IDownLoader downLoader)
        {
            var page = downLoader.DownLoader(request, this);

            PageProcessor.Process(page);
            Scheduler.AddFinishRequest(request);
            SpiderListening.ForEach(item => item.AfterSuccess(request));

            if (page.IsSave)
            {
                Pipelines.ForEach(item => item.Process(page.PageResult));
            }

            GetPageUrl(page).ForEach(item => Scheduler.AddWaitRequest(new Request(item)));
        }