protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.SetTaskGroup("JD sku/store test"); context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor { Region = new Extension.Configuration.Selector { Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]" }, Patterns = new List <string> { @"&page=[0-9]+&" } }); context.AddPipeline(new MysqlPipeline { ConnectString = "Database='test';Data Source=ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306" }); context.SetScheduler(new RedisScheduler { Host = "ooodata.com", Password = "******", Port = 6379 }); context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); context.AddEntityType(typeof(Product)); return(context); }
protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.SetTaskGroup("JD sku/store test"); context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor { Region = new Extension.Configuration.Selector { Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]" }, Patterns = new List <string> { @"&page=[0-9]+&" } }); context.AddPipeline(new MysqlPipeline { ConnectString = "Database='test';Data Source=ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306" }); context.SetScheduler(new RedisScheduler { Host = "ooodata.com", Password = "******", Port = 6379 }); context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=1", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); context.AddPrepareStartUrls(new ConfigurableDbPrepareStartUrls() { Source = DataSource.MySql, ConnectString = "Database='test';Data Source= ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306", TableName = "jd.category", Columns = new List <BaseDbPrepareStartUrls.Column> { new BaseDbPrepareStartUrls.Column { Name = "url", Formatters = new List <Formatter> { new ReplaceFormatter { OldValue = ".html", NewValue = "" } } } }, FormateStrings = new List <string> { "{0}&page=1&JL=6_0_0" } }); context.AddEntityType(typeof(Product)); return(context); }