public void TestMethod1() { var url = "http://www.onezh.com/hall/show_{# page(146,707) #}.html"; CodeCompilerManager.Create("url", new List <ICodeProvider> { new LiteDbCodeProvider(Node.Feed.Db.FuncType.URLFUNCTION), new FileCodeProvider("funcs/js", "fun") }); CodeCompilerManager.Create("proc", new List <ICodeProvider> { new LiteDbCodeProvider(Node.Feed.Db.FuncType.SELECTORPROCESSOR), new FileCodeProvider("funcs/js", "pro") }); var addrs = CodeCompilerManager.GetResult("url", url); Assert.True(true); }
/// <summary> /// process need /// </summary> /// <param name="selector">function selector</param> /// <param name="result">pre process result</param> /// <returns>new process result</returns> public override ProcessResult ProcessNeed(FunctionSelector selector, ProcessResult result) { var pr = new ProcessResult(); var r = CodeCompilerManager.GetResult("proc", selector.Name, result.Content); if (r.Length > 0) { pr.Matches.Add(r.First().ToString()); } else { pr.Matches.Add(result.Content); } return(pr); }
public async Task Execute(IJobExecutionContext context) { baseUrl = context.JobDetail.JobDataMap.Get("baseUrl").ToString(); var feedRequest = context.JobDetail.JobDataMap.Get("request") as FeedRequest; Logger.GetLogger(baseUrl).Info(" feed job " + context.JobDetail.Key + " add to feed crawl queue"); var addrs = CodeCompilerManager.GetResult("url", feedRequest.Request.Uri.ToString()); foreach (var addr in addrs) { queuePool.QueueAction(() => { Logger.GetLogger(baseUrl).Info(" feed job " + addr.ToString() + " starting"); feedRequest.Request = feedRequest.Request.Clone() as Request; feedRequest.Request.Uri = new Uri(addr.ToString()); var response = DoTask(feedRequest); Save(feedRequest, response); }); } }
public object TestFeed([FromBody] FeedModel feed, bool down, bool debug = false) { try { //var compile = new Node.Compile.JSUrlCompile(); var addrs = CodeCompilerManager.GetResult("url", feed.Address); //compile.GetResult(feed.Address); var results = new List <ExtractResult>(); foreach (var addr in addrs) { feed.Address = addr.ToString(); var job = new FeedJob(); var response = job.DoTask(feed); if (response.StatusCode != System.Net.HttpStatusCode.OK) { return(response.Data); } if (string.IsNullOrEmpty(feed.RuiJiExpression)) { results.Add(new ExtractResult()); continue; } var block = RuiJiBlockParser.ParserBlock(feed.RuiJiExpression); var result = RuiJiExtractor.Extract(response.Data.ToString(), block); if (!debug) { CrawlTaskFunc.ClearContent(result); } if (down) { var s = new FileStorage(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "wwwroot", "download")); var files = result.Content.ToString().Replace("\r\n", "\n").Split('\n'); foreach (var file in files) { if (!string.IsNullOrEmpty(file) && Uri.IsWellFormedUriString(file, UriKind.Absolute)) { var res = Crawler.Request(file); var c = new DownloadContentModel(); c.Url = file.Trim(); c.IsRaw = res.IsRaw; c.Data = res.Data; s.Insert(c); } } } results.Add(result); } return(results); } catch (Exception ex) { return(ex); } }
public object Run(object t, ParallelTask task) { var model = t as CrawlTaskModel; var results = new List <object>(); var reporter = task.Progress as IProgress <string>; reporter.Report("正在读取Feed记录"); var feed = FeedLiteDb.GetFeed(model.FeedId); reporter.Report("正在下载 Feed"); //var compile = new Node.Compile.JSUrlCompile(); var addrs = CodeCompilerManager.GetResult("url", feed.Address); //compile.GetResult(feed.Address); foreach (var addr in addrs) { feed.Address = addr.ToString(); var job = new FeedJob(); var response = job.DoTask(feed); reporter.Report("Feed 下载完成"); var block = RuiJiBlockParser.ParserBlock(feed.RuiJiExpression); var feedResult = RuiJiExtractor.Extract(response.Data.ToString(), block); results.Add(feedResult); var snap = new FeedSnapshot { Url = feed.Address, Content = response.Data.ToString(), Type = feed.Type, RuiJiExpression = feed.RuiJiExpression }; reporter.Report("正在提取Feed地址"); var j = new FeedExtractJob(); var urls = j.ExtractAddress(snap); reporter.Report("Feed地址提取完成"); if (!string.IsNullOrEmpty(snap.RuiJiExpression)) { foreach (var url in urls) { reporter.Report("正在提取地址 " + url); var result = Cooperater.GetResult(url); if (result != null) { var cm = new ContentModel(); cm.Id = model.FeedId; cm.Url = url; cm.Metas = result.Metas; cm.CDate = DateTime.Now; results.Add(cm); } } } reporter.Report("计算完成"); if (!model.IncludeContent) { results.ForEach((m) => { ClearContent(m); }); } } return(results); }