Ejemplo n.º 1
0
        public void Run()
        {
            // Config encoding, header, cookie, proxy etc... 定义采集的 Site 对象, 设置 Header、Cookie、代理等
            //var site = new Site { EncodingName = "UTF-8" };
            var site = new Site();

            // Add start/feed urls. 添加初始采集链接
            var context = new StockLearningEntities();
            var stocks  = context.Stocks.ToList();


            foreach (var stock in stocks)
            {
                string range = "sh";
                if (stock.StockId.StartsWith("0") || stock.StockId.StartsWith("3"))
                {
                    range = "sz";
                }
                site.AddStartUrl($"http://f9.eastmoney.com/{range}{stock.StockId}.html");
            }

            DotnetSpider.Core.Spider spider = DotnetSpider.Core.Spider.Create(site,
                                              // use memoery queue scheduler. 使用内存调度
                                                                              new QueueDuplicateRemovedScheduler(),
                                              // use custmize processor for  Processor
                                                                              new StockJJRPageProcessor())
                                              // use custmize pipeline for  Pipeline
                                              .AddPipeline(new StockJJRPipeline());
            spider.Downloader     = new HttpClientDownloader();
            spider.ThreadNum      = 1;
            spider.EmptySleepTime = 3000;

            // Start crawler 启动爬虫
            spider.Run();
        }
Ejemplo n.º 2
0
        private void CustmizeProcessorAndPipeline()
        {
            var site = new Site {
                EncodingName = "UTF-8", RemoveOutboundLinks = true
            };
            string url = this.tb_url.Text;

            site.AddStartUrl(url);

            DotnetSpider.Core.Spider spider = DotnetSpider.Core.Spider.Create(site, new QueueDuplicateRemovedScheduler(), new TouTiaoPageProcessor());
            spider.AddPipeline(new TotiaoPipeline());
            spider.Downloader     = new HttpClientDownloader();
            spider.ThreadNum      = 1;
            spider.EmptySleepTime = 3000;

            // Start crawler 启动爬虫
            spider.Run();
        }
Ejemplo n.º 3
0
 public static void Run()
 {
     DotnetSpider.Core.Spider spider = DotnetSpider.Core.Spider.Create(
         // use memoery queue scheduler. 使用内存调度
         new QueueDuplicateRemovedScheduler(),
         // use custmize processor for youku 为优酷自定义的 Processor
         new YoukuPageProcessor())
                                       // use custmize pipeline for youku 为优酷自定义的 Pipeline
                                       .AddPipeline(new YoukuPipeline());
     // Start crawler 启动爬虫
     spider.EncodingName = "UTF-8";
     for (int i = 1; i < 5; ++i)
     {
         // Add start/feed urls. 添加初始采集链接
         spider.AddRequests($"http://list.youku.com/category/show/c_96_s_1_d_1_p_{i}.html");
     }
     spider.Run();
 }
        public void Run()
        {
            // Config encoding, header, cookie, proxy etc... 定义采集的 Site 对象, 设置 Header、Cookie、代理等
            //var site = new Site { EncodingName = "UTF-8" };
            var site = new Site();

            // Add start/feed urls. 添加初始采集链接
            site.AddStartUrl("http://quote.eastmoney.com/stocklist.html");

            DotnetSpider.Core.Spider spider = DotnetSpider.Core.Spider.Create(site,
                                              // use memoery queue scheduler. 使用内存调度
                                                                              new QueueDuplicateRemovedScheduler(),
                                              // use custmize processor for  Processor
                                                                              new StockListPageProcessor())
                                              // use custmize pipeline for  Pipeline
                                              .AddPipeline(new StockListPipeline());
            spider.Downloader     = new HttpClientDownloader();
            spider.ThreadNum      = 1;
            spider.EmptySleepTime = 3000;

            // Start crawler 启动爬虫
            spider.Run();
        }