protected override EntitySpider GetEntitySpider() { EntitySpider context = new EntitySpider(new Site()); context.SetSite(new Site()); context.SetThreadNum(2); context.ThreadNum = 1; context.RetryWhenResultIsEmpty = false; context.Deep = 100; context.EmptySleepTime = 5000; context.SetEmptySleepTime(5000); context.ExitWhenComplete = true; context.CachedSize = 1; context.SetDownloader(new HttpClientDownloader()); context.SetScheduler(new QueueDuplicateRemovedScheduler()); context.SkipWhenResultIsEmpty = true; context.SpawnUrl = true; context.AddPipeline(new CollectEntityPipeline()); context.AddStartUrl("http://www.cas.cn/kx/kpwz/index.shtml"); context.AddEntityType(typeof(ArticleSummary)); Name = "qidian"; Batch = DateTime.Now.ToString("yyyy_MM_dd_HHmmss"); return(context); }
protected override EntitySpider GetEntitySpider() { EntitySpider context = new EntitySpider(new Site()); context.SetIdentity("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.AddPipeline(new MySqlEntityPipeline("Database='mysql';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306")); context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); context.AddEntityType(typeof(Product)); context.SetDownloader(new WebDriverDownloader(Browser.Chrome)); return(context); }
protected override EntitySpider GetEntitySpider() { EntitySpider context = new EntitySpider(new Site { //HttpProxyPool = new HttpProxyPool(new KuaidailiProxySupplier("快代理API")) }); context.SetThreadNum(1); context.SetIdentity("JD_sku_store_test_" + DateTime.Now.ToString("yyyy_MM_dd_hhmmss")); // dowload html by http client context.SetDownloader(new HttpClientDownloader()); // save data to mysql. context.AddPipeline(new MySqlEntityPipeline("Database='test';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306")); context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); context.AddEntityType(typeof(Product)); return(context); }
protected override EntitySpider GetEntitySpider() { EntitySpider context = new EntitySpider(new Site()); context.SetIdentity("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.AddEntityPipeline(new MySqlEntityPipeline("Database='mysql';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306")); context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); context.AddEntityType(typeof(Product), new TargetUrlExtractor { Region = new Selector { Type = SelectorType.XPath, Expression = "//span[@class=\"p-num\"]" }, Patterns = new List <string> { @"&page=[0-9]+&" } }); context.SetDownloader(new WebDriverDownloader(Browser.Chrome)); return(context); }