protected virtual Core.Spider GenerateSpider(IScheduler scheduler) { EntityProcessor processor = new EntityProcessor(SpiderContext); processor.TargetUrlExtractInfos = SpiderContext.TargetUrlExtractInfos?.Select(t => t.GetTargetUrlExtractInfo()).ToList(); foreach (var entity in SpiderContext.Entities) { processor.AddEntity(entity); } EntityGeneralSpider spider = new EntityGeneralSpider(SpiderContext.Site, Name, SpiderContext.UserId, SpiderContext.TaskGroup, processor, scheduler); foreach (var entity in SpiderContext.Entities) { string entiyName = entity.Identity; var schema = entity.Schema; List <IEntityPipeline> pipelines = new List <IEntityPipeline>(); foreach (var pipeline in SpiderContext.Pipelines) { pipelines.Add(pipeline.GetPipeline(schema, entity)); } spider.AddPipeline(new EntityPipeline(entiyName, pipelines)); } spider.SetCachedSize(SpiderContext.CachedSize); spider.SetEmptySleepTime(SpiderContext.EmptySleepTime); spider.SetThreadNum(SpiderContext.ThreadNum); spider.Deep = SpiderContext.Deep; spider.SetDownloader(SpiderContext.Downloader.GetDownloader()); spider.SkipWhenResultIsEmpty = SpiderContext.SkipWhenResultIsEmpty; if (SpiderContext.PageHandlers != null) { spider.PageHandlers = new List <Action <Page> >(); foreach (var pageHandler in SpiderContext.PageHandlers) { spider.PageHandlers.Add(pageHandler.Customize); } } if (SpiderContext.TargetUrlsHandler != null) { spider.SetCustomizeTargetUrls(SpiderContext.TargetUrlsHandler.Handle); } return(spider); }
private Core.Spider GenerateSpider(IScheduler scheduler) { Site site = _spiderContext.Site; EntityProcessor processor = new EntityProcessor(site); foreach (var entity in _spiderContext.Entities) { processor.AddEntity(entity); } EntityGeneralSpider spider = new EntityGeneralSpider(_spiderContext.SpiderName, processor, scheduler); foreach (var entity in _spiderContext.Entities) { string entiyName = entity.SelectToken("$.Identity")?.ToString(); var pipelineType = _spiderContext.Pipeline.SelectToken("$.Type").ToObject <Configuration.Pipeline.Types>(); var schema = entity.SelectToken("$.Schema")?.ToObject <Schema>(); switch (pipelineType) { case Configuration.Pipeline.Types.MongoDb: { var mongoDbPipelineConfig = _spiderContext.Pipeline.ToObject <MongoDbPipeline>(); spider.AddPipeline(new EntityPipeline(entiyName, mongoDbPipelineConfig.GetPipeline(schema, entity))); break; } case Configuration.Pipeline.Types.MySql: { var mysqlPipelineConfig = _spiderContext.Pipeline.ToObject <MysqlPipeline>(); spider.AddPipeline(new EntityPipeline(entiyName, mysqlPipelineConfig.GetPipeline(schema, entity))); break; } case Configuration.Pipeline.Types.MySqlFile: { var mysqlFilePipelineConfig = _spiderContext.Pipeline.ToObject <MysqlFilePipeline>(); spider.AddPipeline(new EntityPipeline(entiyName, mysqlFilePipelineConfig.GetPipeline(schema, entity))); break; } } } spider.SetCachedSize(_spiderContext.CachedSize); spider.SetEmptySleepTime(_spiderContext.EmptySleepTime); spider.SetThreadNum(_spiderContext.ThreadNum); spider.Deep = _spiderContext.Deep; spider.SetDownloader(GenerateDownloader()); if (_spiderContext.CustomizePage != null) { var customizePageType = _spiderContext.CustomizePage.SelectToken("$.Type").ToObject <CustomizePage.Types>(); switch (customizePageType) { case CustomizePage.Types.Sub: { var customizePage = _spiderContext.CustomizePage.ToObject <SubCustomizePage>(); spider.CustomizePage = customizePage.Customize; break; } } } if (_spiderContext.CustomizeTargetUrls != null) { var customizeTargetUrlsType = _spiderContext.CustomizeTargetUrls.SelectToken("$.Type").ToObject <CustomizeTargetUrls.Types>(); switch (customizeTargetUrlsType) { case CustomizeTargetUrls.Types.IncreasePageNumber: { var customizeTargetUrls = _spiderContext.CustomizeTargetUrls.ToObject <IncreasePageNumberCustomizeTargetUrls>(); spider.SetCustomizeTargetUrls(customizeTargetUrls.Customize); break; } } } return(spider); }