private void ParseCompanyName(HttpContentModel response) { var doc = response.response.GetDocument(); var lists = doc.QuerySelectorAll("div.zx-list-wrap div.zx-list-item"); foreach (var item in lists) { Console.WriteLine(item.QuerySelector("h3 a").TextContent); } var meta = response.meta; if (lists.Length == 0) { meta["empty"] = (int)meta["empty"] + 1; } else { meta["empty"] = 0; } if ((int)meta["empty"] > this.threshold) { return; } meta["num"] = (int)meta["num"] + 1; string road = meta["road"].ToString(); string name = road + meta["num"].ToString() + "号"; string url = $"https://xin.baidu.com/s?q={System.Web.HttpUtility.UrlEncode(name)}&t=0"; this.http.Get(url, this.ParseCompanyName, meta); }
public void ParseListpage(HttpContentModel response) { var r = response.response.GetHtml(); //Console.WriteLine(DateTime.Now.ToString()+"成功访问了百度"); var document = parser.ParseDocument(r); foreach (var item in document.QuerySelectorAll("div.tableWrap tr td:nth-child(1) a")) { string cName = item.TextContent; string url = "https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php"; Dictionary <string, string> _params = new Dictionary <string, string>(); _params.Add("resource_id", "6899"); _params.Add("query", "失信被执行人名单"); _params.Add("cardNum", ""); _params.Add("iname", cName); _params.Add("areaName", ""); _params.Add("ie", "utf-8"); _params.Add("oe", "utf-8"); _params.Add("format", "json"); _params.Add("t", DateHelper.GetTimestamp(DateTime.Now)); _params.Add("cb", "jQuery110208514957267839198_1561021902645"); _params.Add("_", DateHelper.GetTimestamp(DateTime.Now)); url = url + "?" + UrlHelper.ConvertUrlParams(_params); var meta = new MetaModel(); meta.Add("companyName", cName); this.http.Get(url, ParseCompanyPage, meta); } }
private void ParseAppDetail(HttpContentModel response) { var meta = response.meta as MetaModel; Console.WriteLine(meta["appName"]); IosAppItem appItem = new IosAppItem(); appItem.AppName = meta["appName"].ToString(); appItem.AppUrl = response.request.Url.ToString(); string html = response.response.GetHtml(); string json1 = RegexHelper.RegexOne(html, "class=\"ember-view\" type=\"application/ld\\+json\"\\>([\\s\\S]*?)\\<\\/script\\>"); string json2 = RegexHelper.RegexOne(html, "script type=\"fastboot/shoebox\" id=\"shoebox-ember-data-store\"\\>([\\s\\S]*?)\\<\\/script\\>"); var jObj1 = JsonConvert.DeserializeObject <JObject>(json1); var jObj2 = JsonConvert.DeserializeObject <JObject>(json2); appItem.Description = jObj1["description"].ToString(); appItem.AppType = jObj1["applicationCategory"].ToString(); appItem.FirstReleaseDate = jObj1["datePublished"].ToString(); appItem.Producer = jObj1["author"]?["name"]?.ToString(); appItem.Star = jObj1["aggregateRating"]?["ratingValue"]?.ToString(); appItem.FeedbackRate = appItem.Star; appItem.FeedbackCount = jObj1["aggregateRating"]?["reviewCount"]?.ToString(); appItem.CommentCount = appItem.FeedbackCount; appItem.Price = jObj1["offers"]?["price"]?.ToString(); appItem.Size = jObj2["data"]?["attributes"]?["size"]?.ToString(); appItem.ReleaseDate = jObj2["data"]?["attributes"]?["versionHistory"]?[0]?["releaseDate"]?.ToString(); appItem.VersionNumber = jObj2["data"]?["attributes"]?["versionHistory"]?[0]?["versionString"]?.ToString(); appItem.ImgUrl = jObj1["image"]?.ToString(); //TODO 写入数据库 }
private void ParseRoadIndex(HttpContentModel response) { var letters = new string[] { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" }; foreach (var letter in letters) { string url = $"http://sh.city8.com/road/{letter}/"; this.http.Get(url, ParseRoadName); } }
public void ParseCompanyPage(HttpContentModel response) { Console.WriteLine((response.meta)["companyName"]); var r = response.response.GetHtml(); string content = r; string jsonStr = content.Replace("/**/jQuery110208514957267839198_1561021902645(", "").Replace(");", ""); JObject jsonObj = JsonConvert.DeserializeObject <JObject>(jsonStr); foreach (var item in jsonObj?["data"]?[0]?["result"]) { //自己解析 } }
private void ParseAppList(HttpContentModel response) { var document = response.response.GetDocument(); foreach (var item in document.QuerySelectorAll("#selectedcontent div a")) { var appName = item.TextContent; var appUrl = item.GetAttribute("href"); var meta = response.meta.Copy(); meta["appName"] = appName; this.http.Get(appUrl, ParseAppDetail, meta); } }
private void ParseRoadName(HttpContentModel response) { var doc = response.response.GetDocument(); var lists = doc.QuerySelectorAll("div.road_sahngjia.road_zm_list a"); foreach (var item in lists) { string road = item.TextContent.Trim(); string name = road + "1号"; string url = $"https://xin.baidu.com/s?q={System.Web.HttpUtility.UrlEncode(name)}&t=0"; MetaModel meta = new MetaModel(); meta.Add("num", 1); meta.Add("empty", 0); meta.Add("road", road); this.http.Get(url, this.ParseCompanyName, meta); } }
/// <summary> /// 使用RequestEntity进行请求 /// </summary> /// <param name="client"></param> /// <returns></returns> public override async Task <ResponseEntity> CreateHttpRequestSend(HttpContentModel model) { return(await SendAsync(model.request)); }
/// <summary> /// 根据HttpHCModel 响应请求 /// </summary> /// <param name="HttpContentModel"></param> /// <returns></returns> public abstract Task <ResponseEntity> CreateHttpRequestSend(HttpContentModel model);