protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.SetThreadNum(2); context.SetSpiderName("JD_sku_store_test_" + DateTime.Now.ToString("yyyy_MM_dd_HHmmss")); context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor { Region = new Extension.Configuration.Selector { Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]" }, Patterns = new List <string> { @"&page=[0-9]+&" } }); context.AddPipeline(new MysqlPipeline { ConnectString = "Database='test';Data Source=mysqlserver;User ID=root;Password=1qazZAQ!;Port=4306" }); context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); context.AddEntityType(typeof(Product)); return(context); }
protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.Site = new Site { MaxSleepTime = 1, MinSleepTime = 1 }; context.SetTaskGroup("cnblogs homepage"); context.SetSpiderName("cnblogs homepage " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.AddStartUrl("http://news.cnblogs.com/n/page/1/"); context.AddPipeline(new MysqlPipeline { ConnectString = "Database='taobao';Data Source= 86research.imwork.net;User ID=root;Password=1qazZAQ!;Port=4306" }); context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor { Region = new Extension.Configuration.Selector { Expression = "//*[@id='pager']", Type = ExtractType.XPath } }); context.ThreadNum = 5; context.AddEntityType(typeof(Cnblogs)); return(context); }
protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor { Region = new Extension.Configuration.Selector { Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]" }, Patterns = new List <string> { @"&page=[0-9]+&" } }); context.AddPipeline(new MysqlPipeline { ConnectString = "Database='mysql';Data Source=192.168.199.211;User ID=root;Password=1qazZAQ!;Port=3306" }); context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); context.AddEntityType(typeof(Product)); context.SetDownloader(new WebDriverDownloader { Browser = Extension.Downloader.WebDriver.Browser.Chrome }); return(context); }
protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.SetSpiderName("ddeng.com " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.SetSite(new Site { Cookie = "sid=dea284fc36c24e8cbcd447343d7b8a4e; sn=DD962248; ctid=000000; ctnm=%E5%8F%A4%E9%95%87%E7%81%AF%E9%A5%B0%E6%89%B9%E5%8F%91; ctpv=%E5%B9%BF%E4%B8%9C; JSESSIONID=acbBqFfOD4I63d9PziDvv; DDENG=c4fc08ae2e3ba3efeddbc667c2f45e615a85e80009169501dc244a03e87908aa61146548b97ed9c7dc07af23bfd80bff5008f8c8867a9165d4bd2732aca0db7dedae2e042d3968fcad1150f36be242e8a32a3f59db2a0b39216a59f1628508c5799644532a9d99925f9841b3c13a1f97; userId=10003379; previousUser=%E5%A4%95%E7%8E%89; Hm_lvt_9e33f153f28be198970d205d90a24f28=1466146335; Hm_lpvt_9e33f153f28be198970d205d90a24f28=1466146392; Hm_lvt_54b4cb498afd05463ab4611b38a6f289=1466146335; Hm_lpvt_54b4cb498afd05463ab4611b38a6f289=1466146392; CNZZDATA1256982382=395301521-1466143554-%7C1466143554", Headers = new Dictionary <string, string> { { "Cache-Control", "max-age=0" }, { "Upgrade-Insecure-Requests", "1" } }, UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36", Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" }); context.AddPipeline(new MysqlPipeline { ConnectString = "Database='test';Data Source=;User ID=root;Password=;Port=4306" }); context.AddStartUrl("http://www.ddeng.com/product/967659"); context.AddEntityType(typeof(Corp)); return(context); }
protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.SetTaskGroup("JD sku/store test"); context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor { Region = new Extension.Configuration.Selector { Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]" }, Patterns = new List <string> { @"&page=[0-9]+&" } }); context.AddPipeline(new MysqlPipeline { ConnectString = "Database='test';Data Source=ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306" }); context.SetScheduler(new RedisScheduler { Host = "ooodata.com", Password = "******", Port = 6379 }); context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); context.AddEntityType(typeof(Product)); return(context); }
protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.SetTaskGroup("cnblogs homepage"); context.SetSpiderName("cnblogs homepage " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.AddStartUrl("http://www.cnblogs.com"); context.AddPipeline(new ConslePipeline()); context.AddEntityType(typeof(HomePage)); return context; }
protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.SetTaskGroup("cnblogs homepage"); context.SetSpiderName("cnblogs homepage " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.AddStartUrl("http://www.cnblogs.com"); context.AddPipeline(new ConslePipeline()); context.AddEntityType(typeof(HomePage)); return(context); }
protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.SetTaskGroup("JD sku/store test"); context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor { Region = new Extension.Configuration.Selector { Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]" }, Patterns = new List <string> { @"&page=[0-9]+&" } }); context.AddPipeline(new MysqlPipeline { ConnectString = "Database='test';Data Source=ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306" }); context.SetScheduler(new RedisScheduler { Host = "ooodata.com", Password = "******", Port = 6379 }); context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=1", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); context.AddPrepareStartUrls(new ConfigurableDbPrepareStartUrls() { Source = DataSource.MySql, ConnectString = "Database='test';Data Source= ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306", TableName = "jd.category", Columns = new List <BaseDbPrepareStartUrls.Column> { new BaseDbPrepareStartUrls.Column { Name = "url", Formatters = new List <Formatter> { new ReplaceFormatter { OldValue = ".html", NewValue = "" } } } }, FormateStrings = new List <string> { "{0}&page=1&JL=6_0_0" } }); context.AddEntityType(typeof(Product)); return(context); }
protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor { Region = new Extension.Configuration.Selector { Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]" }, Patterns = new List<string> { @"&page=[0-9]+&" } }); context.AddPipeline(new MysqlPipeline { ConnectString = "Database='test';Data Source=;User ID=root;Password=;Port=4306" }); context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary<string, object> { { "name", "手机" }, { "cat3", "655" } }); context.AddEntityType(typeof(Product)); return context; }
protected override SpiderContext GetSpiderContext() { SpiderContext context = new SpiderContext(); context.SetSpiderName("ddeng.com " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); context.SetSite(new Site { Cookie = "sid=dea284fc36c24e8cbcd447343d7b8a4e; sn=DD962248; ctid=000000; ctnm=%E5%8F%A4%E9%95%87%E7%81%AF%E9%A5%B0%E6%89%B9%E5%8F%91; ctpv=%E5%B9%BF%E4%B8%9C; JSESSIONID=acbBqFfOD4I63d9PziDvv; DDENG=c4fc08ae2e3ba3efeddbc667c2f45e615a85e80009169501dc244a03e87908aa61146548b97ed9c7dc07af23bfd80bff5008f8c8867a9165d4bd2732aca0db7dedae2e042d3968fcad1150f36be242e8a32a3f59db2a0b39216a59f1628508c5799644532a9d99925f9841b3c13a1f97; userId=10003379; previousUser=%E5%A4%95%E7%8E%89; Hm_lvt_9e33f153f28be198970d205d90a24f28=1466146335; Hm_lpvt_9e33f153f28be198970d205d90a24f28=1466146392; Hm_lvt_54b4cb498afd05463ab4611b38a6f289=1466146335; Hm_lpvt_54b4cb498afd05463ab4611b38a6f289=1466146392; CNZZDATA1256982382=395301521-1466143554-%7C1466143554", Headers = new Dictionary<string, string> { { "Cache-Control","max-age=0"}, { "Upgrade-Insecure-Requests","1"} }, UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36", Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" }); context.AddPipeline(new MysqlPipeline { ConnectString = "Database='test';Data Source=;User ID=root;Password=;Port=4306" }); context.AddStartUrl("http://www.ddeng.com/product/967659"); context.AddEntityType(typeof(Corp)); return context; }