public void Run() { // Config encoding, header, cookie, proxy etc... 定义采集的 Site 对象, 设置 Header、Cookie、代理等 //var site = new Site { EncodingName = "UTF-8" }; var site = new Site(); // Add start/feed urls. 添加初始采集链接 var context = new StockLearningEntities(); var stocks = context.Stocks.ToList(); foreach (var stock in stocks) { string range = "sh"; if (stock.StockId.StartsWith("0") || stock.StockId.StartsWith("3")) { range = "sz"; } site.AddStartUrl($"http://f9.eastmoney.com/{range}{stock.StockId}.html"); } DotnetSpider.Core.Spider spider = DotnetSpider.Core.Spider.Create(site, // use memoery queue scheduler. 使用内存调度 new QueueDuplicateRemovedScheduler(), // use custmize processor for Processor new StockJJRPageProcessor()) // use custmize pipeline for Pipeline .AddPipeline(new StockJJRPipeline()); spider.Downloader = new HttpClientDownloader(); spider.ThreadNum = 1; spider.EmptySleepTime = 3000; // Start crawler 启动爬虫 spider.Run(); }
private void CustmizeProcessorAndPipeline() { var site = new Site { EncodingName = "UTF-8", RemoveOutboundLinks = true }; string url = this.tb_url.Text; site.AddStartUrl(url); DotnetSpider.Core.Spider spider = DotnetSpider.Core.Spider.Create(site, new QueueDuplicateRemovedScheduler(), new TouTiaoPageProcessor()); spider.AddPipeline(new TotiaoPipeline()); spider.Downloader = new HttpClientDownloader(); spider.ThreadNum = 1; spider.EmptySleepTime = 3000; // Start crawler 启动爬虫 spider.Run(); }
public static void Run() { DotnetSpider.Core.Spider spider = DotnetSpider.Core.Spider.Create( // use memoery queue scheduler. 使用内存调度 new QueueDuplicateRemovedScheduler(), // use custmize processor for youku 为优酷自定义的 Processor new YoukuPageProcessor()) // use custmize pipeline for youku 为优酷自定义的 Pipeline .AddPipeline(new YoukuPipeline()); // Start crawler 启动爬虫 spider.EncodingName = "UTF-8"; for (int i = 1; i < 5; ++i) { // Add start/feed urls. 添加初始采集链接 spider.AddRequests($"http://list.youku.com/category/show/c_96_s_1_d_1_p_{i}.html"); } spider.Run(); }
public void Run() { // Config encoding, header, cookie, proxy etc... 定义采集的 Site 对象, 设置 Header、Cookie、代理等 //var site = new Site { EncodingName = "UTF-8" }; var site = new Site(); // Add start/feed urls. 添加初始采集链接 site.AddStartUrl("http://quote.eastmoney.com/stocklist.html"); DotnetSpider.Core.Spider spider = DotnetSpider.Core.Spider.Create(site, // use memoery queue scheduler. 使用内存调度 new QueueDuplicateRemovedScheduler(), // use custmize processor for Processor new StockListPageProcessor()) // use custmize pipeline for Pipeline .AddPipeline(new StockListPipeline()); spider.Downloader = new HttpClientDownloader(); spider.ThreadNum = 1; spider.EmptySleepTime = 3000; // Start crawler 启动爬虫 spider.Run(); }