コード例 #1
0
            protected override EntitySpider GetEntitySpider()
            {
                EntitySpider context = new EntitySpider(new Site());

                context.SetSite(new Site());
                context.SetThreadNum(2);
                context.ThreadNum = 1;
                context.RetryWhenResultIsEmpty = false;
                context.Deep           = 100;
                context.EmptySleepTime = 5000;
                context.SetEmptySleepTime(5000);
                context.ExitWhenComplete = true;
                context.CachedSize       = 1;
                context.SetDownloader(new HttpClientDownloader());
                context.SetScheduler(new QueueDuplicateRemovedScheduler());

                context.SkipWhenResultIsEmpty = true;
                context.SpawnUrl = true;
                context.AddPipeline(new CollectEntityPipeline());
                context.AddStartUrl("http://www.cas.cn/kx/kpwz/index.shtml");
                context.AddEntityType(typeof(ArticleSummary));

                Name  = "qidian";
                Batch = DateTime.Now.ToString("yyyy_MM_dd_HHmmss");
                return(context);
            }
コード例 #2
0
        protected override EntitySpider GetEntitySpider()
        {
            EntitySpider context = new EntitySpider(new Site());

            context.SetIdentity("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddPipeline(new MySqlEntityPipeline("Database='mysql';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306"));
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product));
            context.SetDownloader(new WebDriverDownloader(Browser.Chrome));
            return(context);
        }
コード例 #3
0
        protected override EntitySpider GetEntitySpider()
        {
            EntitySpider context = new EntitySpider(new Site
            {
                //HttpProxyPool = new HttpProxyPool(new KuaidailiProxySupplier("快代理API"))
            });

            context.SetThreadNum(1);
            context.SetIdentity("JD_sku_store_test_" + DateTime.Now.ToString("yyyy_MM_dd_hhmmss"));
            // dowload html by http client
            context.SetDownloader(new HttpClientDownloader());
            // save data to mysql.
            context.AddPipeline(new MySqlEntityPipeline("Database='test';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306"));
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product));
            return(context);
        }
コード例 #4
0
        protected override EntitySpider GetEntitySpider()
        {
            EntitySpider context = new EntitySpider(new Site());

            context.SetIdentity("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddEntityPipeline(new MySqlEntityPipeline("Database='mysql';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306"));
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product), new TargetUrlExtractor
            {
                Region = new Selector {
                    Type = SelectorType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.SetDownloader(new WebDriverDownloader(Browser.Chrome));
            return(context);
        }