Пример #1
0
        protected virtual Core.Spider GenerateSpider(IScheduler scheduler)
        {
            EntityProcessor processor = new EntityProcessor(SpiderContext);

            processor.TargetUrlExtractInfos = SpiderContext.TargetUrlExtractInfos?.Select(t => t.GetTargetUrlExtractInfo()).ToList();
            foreach (var entity in SpiderContext.Entities)
            {
                processor.AddEntity(entity);
            }

            EntityGeneralSpider spider = new EntityGeneralSpider(SpiderContext.Site, Name, SpiderContext.UserId, SpiderContext.TaskGroup, processor, scheduler);

            foreach (var entity in SpiderContext.Entities)
            {
                string entiyName = entity.Identity;

                var schema = entity.Schema;

                List <IEntityPipeline> pipelines = new List <IEntityPipeline>();
                foreach (var pipeline in SpiderContext.Pipelines)
                {
                    pipelines.Add(pipeline.GetPipeline(schema, entity));
                }
                spider.AddPipeline(new EntityPipeline(entiyName, pipelines));
            }
            spider.SetCachedSize(SpiderContext.CachedSize);
            spider.SetEmptySleepTime(SpiderContext.EmptySleepTime);
            spider.SetThreadNum(SpiderContext.ThreadNum);
            spider.Deep = SpiderContext.Deep;
            spider.SetDownloader(SpiderContext.Downloader.GetDownloader());
            spider.SkipWhenResultIsEmpty = SpiderContext.SkipWhenResultIsEmpty;
            if (SpiderContext.PageHandlers != null)
            {
                spider.PageHandlers = new List <Action <Page> >();
                foreach (var pageHandler in SpiderContext.PageHandlers)
                {
                    spider.PageHandlers.Add(pageHandler.Customize);
                }
            }

            if (SpiderContext.TargetUrlsHandler != null)
            {
                spider.SetCustomizeTargetUrls(SpiderContext.TargetUrlsHandler.Handle);
            }

            return(spider);
        }
Пример #2
0
        private Core.Spider GenerateSpider(IScheduler scheduler)
        {
            Site            site      = _spiderContext.Site;
            EntityProcessor processor = new EntityProcessor(site);

            foreach (var entity in _spiderContext.Entities)
            {
                processor.AddEntity(entity);
            }

            EntityGeneralSpider spider = new EntityGeneralSpider(_spiderContext.SpiderName, processor, scheduler);

            foreach (var entity in _spiderContext.Entities)
            {
                string entiyName    = entity.SelectToken("$.Identity")?.ToString();
                var    pipelineType = _spiderContext.Pipeline.SelectToken("$.Type").ToObject <Configuration.Pipeline.Types>();
                var    schema       = entity.SelectToken("$.Schema")?.ToObject <Schema>();

                switch (pipelineType)
                {
                case Configuration.Pipeline.Types.MongoDb:
                {
                    var mongoDbPipelineConfig = _spiderContext.Pipeline.ToObject <MongoDbPipeline>();
                    spider.AddPipeline(new EntityPipeline(entiyName, mongoDbPipelineConfig.GetPipeline(schema, entity)));

                    break;
                }

                case Configuration.Pipeline.Types.MySql:
                {
                    var mysqlPipelineConfig = _spiderContext.Pipeline.ToObject <MysqlPipeline>();
                    spider.AddPipeline(new EntityPipeline(entiyName, mysqlPipelineConfig.GetPipeline(schema, entity)));
                    break;
                }

                case Configuration.Pipeline.Types.MySqlFile:
                {
                    var mysqlFilePipelineConfig = _spiderContext.Pipeline.ToObject <MysqlFilePipeline>();

                    spider.AddPipeline(new EntityPipeline(entiyName, mysqlFilePipelineConfig.GetPipeline(schema, entity)));
                    break;
                }
                }
            }
            spider.SetCachedSize(_spiderContext.CachedSize);
            spider.SetEmptySleepTime(_spiderContext.EmptySleepTime);
            spider.SetThreadNum(_spiderContext.ThreadNum);
            spider.Deep = _spiderContext.Deep;
            spider.SetDownloader(GenerateDownloader());

            if (_spiderContext.CustomizePage != null)
            {
                var customizePageType = _spiderContext.CustomizePage.SelectToken("$.Type").ToObject <CustomizePage.Types>();
                switch (customizePageType)
                {
                case CustomizePage.Types.Sub:
                {
                    var customizePage = _spiderContext.CustomizePage.ToObject <SubCustomizePage>();
                    spider.CustomizePage = customizePage.Customize;
                    break;
                }
                }
            }

            if (_spiderContext.CustomizeTargetUrls != null)
            {
                var customizeTargetUrlsType = _spiderContext.CustomizeTargetUrls.SelectToken("$.Type").ToObject <CustomizeTargetUrls.Types>();
                switch (customizeTargetUrlsType)
                {
                case CustomizeTargetUrls.Types.IncreasePageNumber:
                {
                    var customizeTargetUrls = _spiderContext.CustomizeTargetUrls.ToObject <IncreasePageNumberCustomizeTargetUrls>();
                    spider.SetCustomizeTargetUrls(customizeTargetUrls.Customize);
                    break;
                }
                }
            }

            return(spider);
        }