public void TestUrlExtract() { var result = Cooperater.GetResult("http://www.cannews.com.cn/2018/0606/177699.shtml"); Assert.True(true); }
public object Run(object t, ParallelTask task) { var model = t as CrawlTaskModel; var results = new List <object>(); var reporter = task.Progress as IProgress <string>; reporter.Report("正在读取Feed记录"); var feed = FeedLiteDb.GetFeed(model.FeedId); reporter.Report("正在下载 Feed"); var compile = new UrlCompile(); var addrs = compile.GetResult(feed.Address); foreach (var addr in addrs) { feed.Address = addr.ToString(); var job = new FeedJob(); var snap = job.DoTask(feed, false); reporter.Report("Feed 下载完成"); var block = RuiJiBlockParser.ParserBlock(feed.RuiJiExpression); var feedResult = RuiJiExtractor.Extract(snap.Content, block); results.Add(feedResult); reporter.Report("正在提取Feed地址"); var j = new FeedExtractJob(); var urls = j.ExtractAddress(snap); reporter.Report("Feed地址提取完成"); if (!string.IsNullOrEmpty(snap.RuiJiExpression)) { foreach (var url in urls) { reporter.Report("正在提取地址 " + url); var result = Cooperater.GetResult(url); if (result != null) { var cm = new ContentModel(); cm.Id = model.FeedId; cm.Url = url; cm.Metas = result.Metas; cm.CDate = DateTime.Now; results.Add(cm); } } } reporter.Report("计算完成"); if (!model.IncludeContent) { results.ForEach((m) => { ClearContent(m); }); } } return(results); }