protected override void MyInit(params string[] arguments) { var word = "可乐|雪碧"; AddPipeline(new DefaultMySqlPipeline(Core.Environment.DataConnectionString, "baidu", "mysql_baidu_search")); AddStartUrl(string.Format("http://news.baidu.com/ns?word={0}&tn=news&from=news&cl=2&pn=0&rn=20&ct=1", word), new Dictionary <string, dynamic> { { "Keyword", word } }); var processor = new DefaultPageProcessor(); processor.AddTargetUrlExtractor("//p[@id=\"page\"]", "&pn=[0-9]+&"); AddPageProcessor(processor); }
public void SpiderTest_Success() { var site = new Site(); var processor = new DefaultPageProcessor(); var spider = new Spider(site, processor); Assert.IsNotNull(spider); Assert.IsNotNull(spider.Scheduler); Assert.IsNotNull(spider.DownLoader); Assert.IsNotNull(spider.Site); Assert.IsNotNull(spider.PageProcessor); Assert.IsTrue(spider.Status == SpiderStatusEnum.Init); Assert.IsTrue(spider.Pipelines != null); Assert.IsTrue(spider.SpiderListening != null); }
protected override void OnInit(params string[] arguments) { var word = "可乐|雪碧"; AddPipeline(new DefaultMySqlPipeline(Env.DataConnectionString, "baidu", "mysql_baidu_search")); AddRequest(string.Format("http://news.baidu.com/ns?word={0}&tn=news&from=news&cl=2&pn=0&rn=20&ct=1", word), new Dictionary <string, dynamic> { { "Keyword", word } }); var processor = new DefaultPageProcessor(); processor.RequestExtractor = new XPathRequestExtractor("//p[@id=\"page\"]"); processor.Filter = new PatternFilter(new[] { "&pn=[0-9]+&" }); AddPageProcessors(processor); }
public void RunTest_Success() { var site = new Site { ThreadCount = 30 }; var pageProcessor = new DefaultPageProcessor(); var htmlPiepline = new HtmlFilePiepline(); var filePiepline = new FilePipeline(); var listening = new ConsoleSpiderListening(); var spider = new Spider(site, pageProcessor); spider.AddSeedUrl("http://sh.lianjia.com/") .AddPiepline(htmlPiepline) .AddPiepline(filePiepline) .AddListening(listening) .Run(); }