Пример #1
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetTaskGroup("JD sku/store test");
            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306"
            });
            context.SetScheduler(new RedisScheduler
            {
                Host     = "ooodata.com",
                Password = "******",
                Port     = 6379
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=1", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddPrepareStartUrls(new ConfigurableDbPrepareStartUrls()
            {
                Source        = DataSource.MySql,
                ConnectString = "Database='test';Data Source= ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306",
                TableName     = "jd.category",
                Columns       = new List <BaseDbPrepareStartUrls.Column> {
                    new BaseDbPrepareStartUrls.Column {
                        Name = "url", Formatters = new List <Formatter> {
                            new ReplaceFormatter {
                                OldValue = ".html", NewValue = ""
                            }
                        }
                    }
                },
                FormateStrings = new List <string> {
                    "{0}&page=1&JL=6_0_0"
                }
            });
            context.AddEntityType(typeof(Product));
            return(context);
        }