Exemplo n.º 1
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.Site = new Site
            {
                MaxSleepTime = 1,
                MinSleepTime = 1
            };
            context.SetTaskGroup("cnblogs homepage");
            context.SetSpiderName("cnblogs homepage " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddStartUrl("http://news.cnblogs.com/n/page/1/");
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='taobao';Data Source= 86research.imwork.net;User ID=root;Password=1qazZAQ!;Port=4306"
            });
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector
                {
                    Expression = "//*[@id='pager']",
                    Type       = ExtractType.XPath
                }
            });
            context.ThreadNum = 5;
            context.AddEntityType(typeof(Cnblogs));
            return(context);
        }
Exemplo n.º 2
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetThreadNum(2);
            context.SetSpiderName("JD_sku_store_test_" + DateTime.Now.ToString("yyyy_MM_dd_HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=mysqlserver;User ID=root;Password=1qazZAQ!;Port=4306"
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product));

            return(context);
        }
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='mysql';Data Source=192.168.199.211;User ID=root;Password=1qazZAQ!;Port=3306"
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product));
            context.SetDownloader(new WebDriverDownloader
            {
                Browser = Extension.Downloader.WebDriver.Browser.Chrome
            });
            return(context);
        }
Exemplo n.º 4
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetTaskGroup("JD sku/store test");
            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306"
            });
            context.SetScheduler(new RedisScheduler
            {
                Host     = "ooodata.com",
                Password = "******",
                Port     = 6379
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product));
            return(context);
        }
Exemplo n.º 5
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetTaskGroup("JD sku/store test");
            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306"
            });
            context.SetScheduler(new RedisScheduler
            {
                Host     = "ooodata.com",
                Password = "******",
                Port     = 6379
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=1", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddPrepareStartUrls(new ConfigurableDbPrepareStartUrls()
            {
                Source        = DataSource.MySql,
                ConnectString = "Database='test';Data Source= ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306",
                TableName     = "jd.category",
                Columns       = new List <BaseDbPrepareStartUrls.Column> {
                    new BaseDbPrepareStartUrls.Column {
                        Name = "url", Formatters = new List <Formatter> {
                            new ReplaceFormatter {
                                OldValue = ".html", NewValue = ""
                            }
                        }
                    }
                },
                FormateStrings = new List <string> {
                    "{0}&page=1&JL=6_0_0"
                }
            });
            context.AddEntityType(typeof(Product));
            return(context);
        }
Exemplo n.º 6
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();
            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector { Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]" },
                Patterns = new List<string> { @"&page=[0-9]+&" }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=;User ID=root;Password=;Port=4306"
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary<string, object> { { "name", "手机" }, { "cat3", "655" } });
            context.AddEntityType(typeof(Product));

            return context;
        }