public void TestPaging2() { var crawler = new RuiJiCrawler(); var request = new Request("https://3w.huanqiu.com/a/4e2d56fd7f51/7DHitRASkPC?p=1&agt=8"); var response = crawler.Request(request); var content = response.Data.ToString(); var exp = @" [meta] #title css h1.a-title #date_dt css .time:text #content css .a-con:ohtml [paging] css .a-page css a[href]"; var block = RuiJiBlockParser.ParserBlock(exp); var result = RuiJiExtractor.Extract(content, block); if (result.Paging != null && result.Paging.Count > 0 && result.Metas != null && result.Metas.ContainsKey("content")) { result = PagingExtractor.MergeContent(request.Uri, result, block); } Assert.True(true); }
public object TestRule([FromBody] RuleModel rule, bool debug = false) { var request = new Request(rule.Url); request.Method = rule.Method; request.RunJS = (rule.RunJS == Status.ON); if (request.RunJS) { request.WaitDom = request.WaitDom; } var response = Crawler.Request(request); if (response != null && response.Data != null) { var content = response.Data.ToString(); var block = RuiJiBlockParser.ParserBlock(rule.RuiJiExpression); var r = new ExtractRequest(); r.Content = content; r.Blocks = new List <ExtractFeatureBlock> { new ExtractFeatureBlock(block, rule.Feature) }; var results = Extractor.Extract(r); var result = results.OrderByDescending(m => m.Metas.Count).FirstOrDefault(); if (result != null && result.Paging != null && result.Paging.Count > 0 && result.Metas != null && result.Metas.ContainsKey("content")) { result = PagingExtractor.MergeContent(new Uri(rule.Url), result, block); } if (!debug) { CrawlTaskFunc.ClearContent(result); } return(result); } return(new { }); }
public void TestPaging() { var crawler = new RuiJiCrawler(); var request = new Request("https://www.kuaidaili.com/free/inha/10"); var response = crawler.Request(request); var content = response.Data.ToString(); var exp = @" [tile] css table.table-bordered tr:gt(0):ohtml [meta] #ip css td[data-title='IP']:text #port css td[data-title='PORT']:text [paging] css #listnav a[href]"; var block = RuiJiBlockParser.ParserBlock(exp); var result = RuiJiExtractor.Extract(content, block); if (result.Paging != null && result.Paging.Count > 0 && result.Tiles != null) { var storage = new FileStorage(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "www", "download")); PagingExtractor.CrawlPage(request.Uri, result, block, (u, res) => { var c = new DownloadContentModel(); c.Url = u.AbsolutePath.Trim(); c.IsRaw = false; c.Data = JsonConvert.SerializeObject(res.Tiles); storage.Insert(c); }, int.MaxValue); } Assert.True(true); }