public object TestFeed(FeedModel feed, [FromUri] bool down, [FromUri] bool debug = false) { try { var compile = new UrlCompile(); var addrs = compile.GetResult(feed.Address); var results = new List <ExtractResult>(); foreach (var addr in addrs) { feed.Address = addr.ToString(); var job = new FeedJob(); var snap = job.DoTask(feed, false); if (string.IsNullOrEmpty(feed.RuiJiExpression)) { results.Add(new ExtractResult()); continue; } var block = RuiJiBlockParser.ParserBlock(feed.RuiJiExpression); var result = RuiJiExtractor.Extract(snap.Content, block); if (!debug) { CrawlTaskFunc.ClearContent(result); } if (down) { var s = new FileStorage(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "www", "download")); var files = result.Content.ToString().Replace("\r\n", "\n").Split('\n'); foreach (var file in files) { if (!string.IsNullOrEmpty(file) && Uri.IsWellFormedUriString(file, UriKind.Absolute)) { var res = Crawler.Request(file); var c = new DownloadContentModel(); c.Url = file.Trim(); c.IsRaw = res.IsRaw; c.Data = res.Data; s.Insert(c); } } } results.Add(result); } return(results); } catch (Exception ex) { return(ex); } }
public object TestRule([FromBody] RuleModel rule, bool debug = false) { var request = new Request(rule.Url); request.Method = rule.Method; request.RunJS = (rule.RunJS == Status.ON); if (request.RunJS) { request.WaitDom = request.WaitDom; } var response = Crawler.Request(request); if (response != null && response.Data != null) { var content = response.Data.ToString(); var block = RuiJiBlockParser.ParserBlock(rule.RuiJiExpression); var r = new ExtractRequest(); r.Content = content; r.Blocks = new List <ExtractFeatureBlock> { new ExtractFeatureBlock(block, rule.Feature) }; var results = Extractor.Extract(r); var result = results.OrderByDescending(m => m.Metas.Count).FirstOrDefault(); if (result != null && result.Paging != null && result.Paging.Count > 0 && result.Metas != null && result.Metas.ContainsKey("content")) { result = PagingExtractor.MergeContent(new Uri(rule.Url), result, block); } if (!debug) { CrawlTaskFunc.ClearContent(result); } return(result); } return(new { }); }