protected void Save(FeedRequest feedRequest, Response response) { if (response == null) { Logger.GetLogger(baseUrl).Error(feedRequest.Request.Uri + " response save response is null."); return; } var request = feedRequest.Request; var content = Convert(response.Data.ToString(), Encoding.GetEncoding(response.Charset), Encoding.UTF8); var snap = new FeedSnapshot { Url = request.Uri.ToString(), Content = content, RuiJiExpression = feedRequest.Expression }; var json = JsonConvert.SerializeObject(snap, Formatting.Indented); var fileName = Path.Combine(snapshotPath, feedRequest.Setting.Id + "_" + DateTime.Now.Ticks + ".json"); if (feedRequest.Setting.Delay > 0) { fileName = Path.Combine(delayPath, feedRequest.Setting.Id + "_" + DateTime.Now.AddMinutes(feedRequest.Setting.Delay).Ticks + ".json"); } Logger.GetLogger(baseUrl).Info(request.Uri + " response save to " + fileName); File.WriteAllText(fileName, json, Encoding.UTF8); }
public FeedSnapshot DoTask(FeedModel feed, bool persistence = false) { try { var request = new Request(feed.Address); if (feed.Headers != null) { request.Headers = feed.Headers; } request.Headers.Add(new WebHeader("Referer", request.Uri.AbsoluteUri)); request.Method = feed.Method; if (feed.Method == "POST" && !string.IsNullOrEmpty(feed.PostParam)) { request.PostParam = feed.PostParam; } var response = new RuiJi.Net.NodeVisitor.Crawler().Request(request); if (response != null && response.StatusCode == HttpStatusCode.OK) { var content = Convert(response.Data.ToString(), Encoding.GetEncoding(response.Charset), Encoding.UTF8); var snap = new FeedSnapshot { Url = feed.Address, Content = content, Type = feed.Type, BlockExpression = feed.BlockExpression, RuiJiExpression = feed.RuiJiExpression }; if (persistence) { var json = JsonConvert.SerializeObject(snap, Formatting.Indented); var fileName = baseDir + @"snapshot\" + feed.Id + "_" + DateTime.Now.Ticks + ".json"; if (feed.Delay > 0) { fileName = baseDir + @"delay\" + feed.Id + "_" + DateTime.Now.AddMinutes(feed.Delay).Ticks + ".json"; } File.WriteAllText(fileName, json, Encoding.UTF8); } return(snap); } } catch (Exception ex) { } return(null); }
public List <string> ExtractAddress(FeedSnapshot feed) { var block = new ExtractBlock(); block.TileSelector.Selectors.Add(new CssSelector("a", "href")); if (feed.UseBlock) { if (!string.IsNullOrEmpty(feed.BlockExpression)) { block = JsonConvert.DeserializeObject <ExtractBlock>(feed.BlockExpression); } } else { if (!string.IsNullOrEmpty(feed.RuiJiExpression)) { block.TileSelector.Selectors.Clear(); var parser = new RuiJiParser(); var s = RuiJiBlockParser.ParserBase(feed.RuiJiExpression).Selectors; block.TileSelector.Selectors.AddRange(s); } } var result = RuiJiExtractor.Extract(feed.Content, block); var results = new List <string>(); if (result.Tiles != null) { foreach (var item in result.Tiles) { var href = item.Content.ToString(); if (href.Contains("#")) { href = href.Substring(0, href.IndexOf('#')); } if (Uri.IsWellFormedUriString(href, UriKind.Relative)) { href = new Uri(new Uri(feed.Url), href).AbsoluteUri.ToString(); } results.Add(href); } } return(results.Distinct().ToList()); }
public FeedSnapshot DoTask(FeedModel feed, bool persistence = false) { try { Logger.GetLogger(baseUrl).Info("do task -> request address " + feed.Address); var request = new Request(feed.Address); request.RunJS = (feed.RunJS == Status.ON); if (feed.Headers != null) { request.Headers = feed.Headers; if (request.Headers.Count(m => m.Name == "Referer") == 0) { request.Headers.Add(new WebHeader("Referer", request.Uri.AbsoluteUri)); } } request.Method = feed.Method; if (feed.Method == "POST" && !string.IsNullOrEmpty(feed.Data)) { request.Data = feed.Data; } var ua = UALiteDb.GetOne(); if (!string.IsNullOrEmpty(ua)) { request.Headers.Add(new WebHeader("User-Agent", ua)); } var response = NodeVisitor.Crawler.Request(request); if (response != null) { Logger.GetLogger(baseUrl).Info("request " + feed.Address + " response code is " + response.StatusCode); } if (response == null) { Logger.GetLogger(baseUrl).Error("request " + feed.Address + " response is null"); } if (response != null && response.StatusCode == HttpStatusCode.OK) { var content = Convert(response.Data.ToString(), Encoding.GetEncoding(response.Charset), Encoding.UTF8); var snap = new FeedSnapshot { Url = feed.Address, Content = content, Type = feed.Type, BlockExpression = feed.BlockExpression, RuiJiExpression = feed.RuiJiExpression }; if (persistence) { var json = JsonConvert.SerializeObject(snap, Formatting.Indented); var fileName = baseDir + @"snapshot\" + feed.Id + "_" + DateTime.Now.Ticks + ".json"; if (feed.Delay > 0) { fileName = baseDir + @"delay\" + feed.Id + "_" + DateTime.Now.AddMinutes(feed.Delay).Ticks + ".json"; } Logger.GetLogger(baseUrl).Info(feed.Address + " response save to " + fileName); File.WriteAllText(fileName, json, Encoding.UTF8); } return(snap); } } catch (Exception ex) { Logger.GetLogger(baseUrl).Info("do task -> request address failed " + ex.Message); } return(null); }