static void RunSpider() { var bloomFilter = new MemoryBloomFilter<string>(1000 * 10, 1000 * 10 * 20); _spider = new Spider(new SequenceScheduler(bloomFilter)); var downloaders = new List<IDownloader> { new HttpClientDownloader(4) }; _spider.RegisterDownloader(downloaders); _spider.RegisterPageAnalyzer<UsashopcnPageAnalyzer>(UsashopcnPageAnalyzer.SiteId); _spider.RegisterResultPipeModule(new ConsoleModule(0, 20, 400, 500, true, true)); _spider.Start(TopicType.StaticHtml, SiteIndex.Usashopcn, "http://www.usashopcn.com/"); var statusTimer = new Timer(spider => { Console.WriteLine(((Spider) spider).RunStatusInfo()); }, _spider, 0, 2000); }
public void RunSpider() { var bloomFilter = new MemoryBloomFilter<string>(1000 * 10, 1000 * 10 * 20); _spider = new Spider(new SequenceScheduler(bloomFilter)); //var downloaders = new List<IDownloader> { new FakeDownloader(4) }; var downloaders = new List<IDownloader> { new HttpClientDownloader(4) }; _spider.RegisterDownloader(downloaders); _spider.RegisterPageAnalyzer<UsashopcnPageAnalyzer>(UsashopcnPageAnalyzer.SiteId); _spider.RegisterResultPipeModule(new ConsoleModule(500, 0, 400, 500, true, true)); _spider.Start(TopicType.StaticHtml, SiteIndex.Usashopcn, GetUrls()); var statusTimer = new Timer(spider => { var status = ((Spider) spider).RunStatusInfo(); Console.WriteLine(String.Format("QueueCount:{0}, TaskCount={1}, ConsumeTotal:{2}, ResultTotal:{3}", status.QueueCount, status.TaskCount, status.ConsumeTotal, status.ResultTotal)); }, _spider, 2000, 2000); }