Example #1
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetThreadNum(2);
            context.SetSpiderName("JD_sku_store_test_" + DateTime.Now.ToString("yyyy_MM_dd_HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=mysqlserver;User ID=root;Password=1qazZAQ!;Port=4306"
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product));

            return(context);
        }
Example #2
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.Site = new Site
            {
                MaxSleepTime = 1,
                MinSleepTime = 1
            };
            context.SetTaskGroup("cnblogs homepage");
            context.SetSpiderName("cnblogs homepage " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddStartUrl("http://news.cnblogs.com/n/page/1/");
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='taobao';Data Source= 86research.imwork.net;User ID=root;Password=1qazZAQ!;Port=4306"
            });
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector
                {
                    Expression = "//*[@id='pager']",
                    Type       = ExtractType.XPath
                }
            });
            context.ThreadNum = 5;
            context.AddEntityType(typeof(Cnblogs));
            return(context);
        }
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='mysql';Data Source=192.168.199.211;User ID=root;Password=1qazZAQ!;Port=3306"
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product));
            context.SetDownloader(new WebDriverDownloader
            {
                Browser = Extension.Downloader.WebDriver.Browser.Chrome
            });
            return(context);
        }
Example #4
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetSpiderName("ddeng.com " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.SetSite(new Site
            {
                Cookie  = "sid=dea284fc36c24e8cbcd447343d7b8a4e; sn=DD962248; ctid=000000; ctnm=%E5%8F%A4%E9%95%87%E7%81%AF%E9%A5%B0%E6%89%B9%E5%8F%91; ctpv=%E5%B9%BF%E4%B8%9C; JSESSIONID=acbBqFfOD4I63d9PziDvv; DDENG=c4fc08ae2e3ba3efeddbc667c2f45e615a85e80009169501dc244a03e87908aa61146548b97ed9c7dc07af23bfd80bff5008f8c8867a9165d4bd2732aca0db7dedae2e042d3968fcad1150f36be242e8a32a3f59db2a0b39216a59f1628508c5799644532a9d99925f9841b3c13a1f97; userId=10003379; previousUser=%E5%A4%95%E7%8E%89; Hm_lvt_9e33f153f28be198970d205d90a24f28=1466146335; Hm_lpvt_9e33f153f28be198970d205d90a24f28=1466146392; Hm_lvt_54b4cb498afd05463ab4611b38a6f289=1466146335; Hm_lpvt_54b4cb498afd05463ab4611b38a6f289=1466146392; CNZZDATA1256982382=395301521-1466143554-%7C1466143554",
                Headers = new Dictionary <string, string>
                {
                    { "Cache-Control", "max-age=0" },
                    { "Upgrade-Insecure-Requests", "1" }
                },
                UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",
                Accept    = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=;User ID=root;Password=;Port=4306"
            });
            context.AddStartUrl("http://www.ddeng.com/product/967659");
            context.AddEntityType(typeof(Corp));

            return(context);
        }
Example #5
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetTaskGroup("JD sku/store test");
            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306"
            });
            context.SetScheduler(new RedisScheduler
            {
                Host     = "ooodata.com",
                Password = "******",
                Port     = 6379
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product));
            return(context);
        }
 protected override SpiderContext GetSpiderContext()
 {
     SpiderContext context = new SpiderContext();
     context.SetTaskGroup("cnblogs homepage");
     context.SetSpiderName("cnblogs homepage " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
     context.AddStartUrl("http://www.cnblogs.com");
     context.AddPipeline(new ConslePipeline());
     context.AddEntityType(typeof(HomePage));
     return context;
 }
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetTaskGroup("cnblogs homepage");
            context.SetSpiderName("cnblogs homepage " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddStartUrl("http://www.cnblogs.com");
            context.AddPipeline(new ConslePipeline());
            context.AddEntityType(typeof(HomePage));
            return(context);
        }
Example #8
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetTaskGroup("JD sku/store test");
            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306"
            });
            context.SetScheduler(new RedisScheduler
            {
                Host     = "ooodata.com",
                Password = "******",
                Port     = 6379
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=1", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddPrepareStartUrls(new ConfigurableDbPrepareStartUrls()
            {
                Source        = DataSource.MySql,
                ConnectString = "Database='test';Data Source= ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306",
                TableName     = "jd.category",
                Columns       = new List <BaseDbPrepareStartUrls.Column> {
                    new BaseDbPrepareStartUrls.Column {
                        Name = "url", Formatters = new List <Formatter> {
                            new ReplaceFormatter {
                                OldValue = ".html", NewValue = ""
                            }
                        }
                    }
                },
                FormateStrings = new List <string> {
                    "{0}&page=1&JL=6_0_0"
                }
            });
            context.AddEntityType(typeof(Product));
            return(context);
        }
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();
            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector { Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]" },
                Patterns = new List<string> { @"&page=[0-9]+&" }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=;User ID=root;Password=;Port=4306"
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary<string, object> { { "name", "手机" }, { "cat3", "655" } });
            context.AddEntityType(typeof(Product));

            return context;
        }
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();
            context.SetSpiderName("ddeng.com " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.SetSite(new Site
            {
                Cookie = "sid=dea284fc36c24e8cbcd447343d7b8a4e; sn=DD962248; ctid=000000; ctnm=%E5%8F%A4%E9%95%87%E7%81%AF%E9%A5%B0%E6%89%B9%E5%8F%91; ctpv=%E5%B9%BF%E4%B8%9C; JSESSIONID=acbBqFfOD4I63d9PziDvv; DDENG=c4fc08ae2e3ba3efeddbc667c2f45e615a85e80009169501dc244a03e87908aa61146548b97ed9c7dc07af23bfd80bff5008f8c8867a9165d4bd2732aca0db7dedae2e042d3968fcad1150f36be242e8a32a3f59db2a0b39216a59f1628508c5799644532a9d99925f9841b3c13a1f97; userId=10003379; previousUser=%E5%A4%95%E7%8E%89; Hm_lvt_9e33f153f28be198970d205d90a24f28=1466146335; Hm_lpvt_9e33f153f28be198970d205d90a24f28=1466146392; Hm_lvt_54b4cb498afd05463ab4611b38a6f289=1466146335; Hm_lpvt_54b4cb498afd05463ab4611b38a6f289=1466146392; CNZZDATA1256982382=395301521-1466143554-%7C1466143554",
                Headers = new Dictionary<string, string>
                 {
                     { "Cache-Control","max-age=0"},
                     { "Upgrade-Insecure-Requests","1"}
                 },
                UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",
                Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=;User ID=root;Password=;Port=4306"
            });
            context.AddStartUrl("http://www.ddeng.com/product/967659");
            context.AddEntityType(typeof(Corp));

            return context;
        }