Пример #1
0
        public static void Run()
        {
            var table    = new TableInfo("youku", "show", TableNamePostfix.Today);
            var selector = new Selector("//div[@class='yk-pack pack-film']");
            var fields   = new[]
            {
                new Field(".//img[@class='quic']/@alt", "name"),
                new Field("index", "index", SelectorType.Enviroment, DataType.Int),
                new Field("", "id", SelectorType.Enviroment, DataType.Int)
                {
                    IsPrimary = true
                },
            };
            var targetUrlsSelector = new TargetUrlsSelector("//ul[@class='yk-pages']");
            var model = new ModelDefine(selector, fields, table, targetUrlsSelector);

            // Config encoding, header, cookie, proxy etc... 定义采集的 Site 对象, 设置 Header、Cookie、代理等
            var site = new Site {
                EncodingName = "UTF-8"
            };

            for (int i = 1; i < 5; ++i)
            {
                // Add start/feed urls. 添加初始采集链接
                site.AddStartUrl($"http://list.youku.com/category/show/c_96_s_1_d_1_p_{i}.html");

                Spider spider = Spider.Create(site,
                                              new QueueDuplicateRemovedScheduler(),
                                              new ModelProcessor(model))
                                .AddPipeline(new ConsoleEntityPipeline());
                // Start crawler 启动爬虫
                spider.Run();
            }
        }
Пример #2
0
 public ModelDefine(Selector selector, IEnumerable <Field> fields, TableInfo table, TargetUrlsSelector targetUrlsSelector)
     : this(selector, fields, table, new[] { targetUrlsSelector })
 {
 }