///// <summary> ///// GetWebContent ///// </summary> ///// <param name="url"></param> ///// <param name="cookies"></param> ///// <returns></returns> //private string GetWebContent(string url,ref string cookies) //{ // MyHttpHelper myHttpHelper = new MyHttpHelper(); // HttpItem httpItem = new HttpItem // { // URL = url, // MaximumAutomaticRedirections = 10, // Timeout = 60000, // Allowautoredirect = false, // UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" // }; // if (!string.IsNullOrEmpty(Cookies)) // httpItem.Cookie = cookies; // HttpResult httpResult = myHttpHelper.GetHtml(httpItem); // if (string.IsNullOrEmpty(cookies)) // { // cookies = httpResult.Cookie; // CookieCollection cookie = httpResult.CookieCollection; // } // return httpResult.Html; //} /// <summary> /// GetMainWebContent /// </summary> /// <param name="nextUrl"></param> /// <param name="postData"></param> /// <param name="cookies"></param> /// <param name="currentUrl"></param> /// <returns></returns> protected override string GetMainWebContent(string nextUrl, byte[] postData, ref string cookies, string currentUrl) { WebRequestCtrl.GetWebContentParam @default = WebRequestCtrl.GetWebContentParam.Default; @default.Refere = currentUrl; @default.MaxRedirect = 10; @default.TimeOut = 60000; cookies = cookies ?? string.Empty; string html = string.Empty; bool isSuccess = false; while (!isSuccess) { try { html = WebRequestCtrl.GetWebContent(nextUrl, postData, ref cookies, 1, @default); } catch (Exception e) { Console.WriteLine(e.ToString()); if (e.ToString().Contains("操作超时") || e.ToString().Contains("操作已超时")) { continue; } } isSuccess = true; } return(html); }
/// <summary> /// 定义第一个链接,案件类型:刑事案件,法院地域:北京市,裁判年份:2016 /// </summary> /// <param name="param">案件类型[刑事案件,民事案件,行政案件,赔偿案件,执行案件]</param> /// <returns></returns> protected override string InitFirstUrl(FaYuanParameter param) { IDictionary <string, string> dic = new Dictionary <string, string>(); dic.Add("Param", $"案件类型:{param.Reason},法院地域:{param.Court},裁判年份:{param.Year}"); dic.Add("Page", "20"); dic.Add("Order", "裁判日期"); dic.Add("Index", "1"); dic.Add("Direction", "asc"); var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8); var cookies = string.Empty; var htmlString = base.GetMainWebContent(HomePage, postData, ref cookies, HomePage); //var htmlString = GetHtmlFromPost(HomePage, Encoding.UTF8,$"Param=案件类型:{param.Keyword}&Page=20&Order=法院层级&Index=1&Direction=asc"); _pageInfo = new PageInfo { Index = 1, CaseType = param.Reason, Area = param.Court, Year = param.Year, Page = 20, TotalPages = GetTotalPages(htmlString) }; _pageInfo.TotalIndex = _pageInfo.TotalPages / _pageInfo.Page; Console.WriteLine($"总共页数:{_pageInfo.TotalIndex}"); return(HomePage); }
/// <summary> /// GetMainWebContent /// </summary> /// <param name="nextUrl"></param> /// <param name="postData"></param> /// <param name="cookies"></param> /// <param name="currentUrl"></param> /// <returns></returns> protected override string GetMainWebContent(string nextUrl, byte[] postData, ref string cookies, string currentUrl) { WebRequestCtrl.GetWebContentParam @default = WebRequestCtrl.GetWebContentParam.Default; @default.Refere = currentUrl; @default.MaxRedirect = 20; cookies = cookies ?? string.Empty; return(WebRequestCtrl.GetWebContent(nextUrl, postData, ref cookies, 1, @default)); }
/// <summary> /// InitPostDataQueue /// </summary> /// <param name="infoDic"></param> private void InitPostDataQueue(Dictionary <string, string> infoDic) { for (var i = 0; i < _totalPage; i++) { infoDic["page"] = $"{i} + 1"; var postDataCur = WebRequestCtrl.BuildPostDatas(infoDic, Encoding.UTF8); _postDataQueue.Enqueue(Encoding.Default.GetString(postDataCur)); } }
/* /// <summary> * /// 获取用户数据 * /// </summary> * /// <param name="shopId">The shop identifier.</param> * /// <returns></returns> * private IResut GetCompanyInfo(string shopId) * { * IResut resut; * companyInfoDictionary.TryGetValue(shopId, out resut); * if (resut != null) * { * return resut; * } * * resut = this.GetCompanyInfoOnline(shopId); * companyInfoDictionary[shopId] = resut; * * return resut; * }*/ /* /// <summary> * /// 在线获取用户公司信息 * /// </summary> * /// <param name="shopId">The shop identifier.</param> * /// <returns></returns> * private IResut GetCompanyInfoOnline(string shopId) * { * var url = $"http://mall.jd.com/shopLevel-{shopId}.html"; * var cookies = this.Cookies ?? string.Empty; * var webContent = WebRequestCtrl.GetWebContent(url, null, ref cookies, 2); * * var parser = new CompanyInfoParser(); * return parser.Parse(webContent); * }*/ /* /// <summary> * /// 获取公司信息 * /// </summary> * /// <param name="shopIds">The shop ids.</param> * /// <returns></returns> * private IDictionary<string, IResut> GetCompanyInfos(string[] shopIds) * { * IDictionary<string, IResut> resultDictionary = new Dictionary<string, IResut>(); * foreach (var shopId in shopIds) * { * var resut = this.GetCompanyInfo(shopId); * resultDictionary[shopId] = resut; * } * * return resultDictionary; * }*/ /// <summary> /// 获取店铺信息 /// </summary> /// <returns></returns> private IDictionary <string, IResut> GetListShopItemInfo() { var htmlSource = this.HtmlSource; IDictionary <string, IResut> resultDictionary = new Dictionary <string, IResut>(); var shopIds = this.ParseShopIds(htmlSource); if (shopIds.Length == 0) { return(resultDictionary); } // http://search.jd.com/shop_new.php?ids=1000004042,1000000922,1000015485,1000015427,1000010664 var collection = Url.CreateQueryCollection(); collection.Add(@"ids", string.Join(",", shopIds)); var url = Url.CombinUrl(@"http://search.jd.com/shop_new.php", collection); var cookies = this.Cookies ?? string.Empty; var param = WebRequestCtrl.GetWebContentParam.Default; param.Refere = this.CurrentUrl; var webContent = WebRequestCtrl.GetWebContent(url, null, ref cookies, 2, param); this.TryUpdateHasMoreFlag(webContent); var keyword = this.InnerParam.Keyword; var page = this.CurrentPage; var rank = 0; var jArray = JArray.Parse(webContent); foreach (var jToken in jArray) { rank++; var shopId = jToken[@"shop_id"]?.Value <string>(); if (shopId == null) { continue; } var jObject = (JObject)jToken; var result = this.ConvertToResult(jObject); result[@"ShopUrl"] = $"http://mall.jd.com/index-{shopId}.html"; result[@"SearchKeyword"] = keyword; this.SetResultSearchPageRank(result, rank); this.SetResultSearchPageIndex(result, page); resultDictionary[shopId] = result; } return(resultDictionary); }
/// <summary> /// 找到页面内容 /// </summary> /// <param name="nextUrl"></param> /// <param name="postData"></param> /// <param name="cookies"></param> /// <param name="currentUrl"></param> /// <returns></returns> protected override string GetMainWebContent(string nextUrl, byte[] postData, ref string cookies, string currentUrl) { IDictionary <string, string> dic = new Dictionary <string, string>(); var paramString = $"案件类型:{_pageInfo.CaseType}"; if (!string.IsNullOrEmpty(_pageInfo.ReasonLevel) && !string.IsNullOrEmpty(_pageInfo.ReasonKey)) { paramString = $"{paramString},{_pageInfo.ReasonLevel}:{_pageInfo.ReasonKey}"; } if (!string.IsNullOrEmpty(_pageInfo.CourtLevel) && !string.IsNullOrEmpty(_pageInfo.CourtKey)) { paramString = $"{paramString},{_pageInfo.CourtLevel}:{_pageInfo.CourtKey}"; } if (!string.IsNullOrEmpty(_pageInfo.Year)) { paramString = $"{paramString},裁判年份:{_pageInfo.Year}"; } if (_pageInfo.CourtLevel.Equals("法院地域")) { paramString = $"{paramString},法院层级:高级法院"; } dic.Add("Param", paramString); dic.Add("Page", "20"); dic.Add("Order", "裁判日期"); if (_pageInfo.Index <= _pageInfo.MaxIndex) { dic.Add("Index", $"{_pageInfo.Index}"); dic.Add("Direction", "asc"); } else { dic.Add("Index", $"{_pageInfo.Index - _pageInfo.MaxIndex}"); dic.Add("Direction", "desc"); } var postDataCur = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8); var htmlString = base.GetMainWebContent(HomePage, postDataCur, ref cookies, HomePage); const string shielded = "\"remind\""; if (!htmlString.Equals(shielded)) { return(htmlString); } //等于指定字符串,则处理验证码 this.LoopHandleValidateCode(); return(base.GetMainWebContent(HomePage, postDataCur, ref cookies, HomePage)); }
/// <summary> /// 验证验证码 /// </summary> /// <param name="code"></param> /// <param name="name"></param> /// <returns></returns> private bool VerificationCode(string code, string name) { IDictionary <string, string> dic = new Dictionary <string, string>(); dic.Add("verifyCode", code); dic.Add("name", name); var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8); var htmlString = base.GetWebContent("http://gsxt.zjaic.gov.cn/search/doValidatorVerifyCode.do", postData, ref _cookies, "http://gsxt.zjaic.gov.cn/search/doEnGeneralQueryPage.do", true); var value = Regex.Match(htmlString, "(?<=\"message\":\").*?(?=\")").Value; return(value.Equals("true")); }
/// <summary> /// GetCorpid /// </summary> /// <param name="code"></param> /// <param name="name"></param> private void GetCorpid(string code, string name) { IDictionary <string, string> dic = new Dictionary <string, string>(); dic.Add("clickType", "1"); dic.Add("verifyCode", code); dic.Add("name", name); var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8); var htmlString = base.GetWebContent("http://gsxt.zjaic.gov.cn/search/doGetAppSearchResult.do", postData, ref _cookies, "http://gsxt.zjaic.gov.cn/search/doEnGeneralQueryPage.do", false); _corpid = Regex.Match(htmlString, @"(?<=href=""/appbasicinfo/doViewAppBasicInfoByLog\.do\?corpid=).*(?="")").Value; }
/// <summary> /// /// </summary> /// <param name="nextUrl"></param> /// <param name="postData"></param> /// <param name="cookies"></param> /// <param name="currentUrl"></param> /// <returns></returns> protected override string GetMainWebContent(string nextUrl, byte[] postData, ref string cookies, string currentUrl) { Uri uri = new Uri(nextUrl); WebRequestCtrl.WebRequestCtrlSetting setting = new WebRequestCtrl.WebRequestCtrlSetting(uri, postData, true, cookies, currentUrl, 5 * 60000); using (var args = WebRequestCtrl.GetResponse(setting)) { var exception = args.Error; if (exception != null) { throw exception; } return(args.GetWebContentString()); } }
/// <summary> /// 返回指定 url 的页面内容 /// </summary> /// <param name="url">The URL.</param> /// <param name="postData">The post data.</param> /// <param name="cookies">The cookies.</param> /// <param name="refere">The refere.</param> /// <param name="isAjax">是不是ajax请求.</param> /// <returns></returns> protected string GetWebContent( string url, byte[] postData, ref string cookies, string refere = "", bool isAjax = false) { var param = WebRequestCtrl.GetWebContentParam.Default; param.Refere = refere; param.IsAjax = isAjax; cookies = cookies ?? string.Empty; var webContent = WebRequestCtrl.GetWebContent(url, null, ref cookies, 1, param); return(webContent); }
/// <summary> /// 找到页面内容 /// </summary> /// <param name="nextUrl"></param> /// <param name="postData"></param> /// <param name="cookies"></param> /// <param name="currentUrl"></param> /// <returns></returns> protected override string GetMainWebContent( string nextUrl, byte[] postData, ref string cookies, string currentUrl) { IDictionary <string, string> dic = new Dictionary <string, string>(); dic.Add("Param", $"案件类型:{_pageInfo.CaseType},法院地域:{_pageInfo.Area},裁判年份:{_pageInfo.Year}"); dic.Add("Page", "20"); dic.Add("Order", "法院层级"); dic.Add("Index", $"{_pageInfo.Index}"); dic.Add("Direction", "asc"); var postDataCur = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8); return(base.GetMainWebContent(HomePage, postDataCur, ref cookies, HomePage)); //return GetHtmlFromPost(HomePage, Encoding.UTF8, $"Param=案件类型:{_pageInfo.Reason}&Page=20&Order=法院层级&Index={_pageInfo.Index}&Direction=asc"); }
/// <summary> /// GetHtmlByUrl /// </summary> /// <returns></returns> private string GetHtmlByUrl(UrlType urlType) { TesseractDemo tesseractDemo = new TesseractDemo(); string cookies; string code = string.Empty; var dic = new Dictionary <string, string>(); string html = string.Empty; if (urlType == UrlType.ZhiXing) { code = tesseractDemo.GetValidateCodeByUrlExtend(_zhixingCaptchaUrl); cookies = tesseractDemo.Cookies; //这部分cookie要去掉才能显示正确的内容 cookies = cookies.Replace(";Path=/search", ""); dic.Add("selectCourtId", "1"); dic.Add("selectCourtArrange", "1"); dic.Add("searchCourtName", "全国法院(包含地方各级法院)"); dic.Add("pname", _name); dic.Add("cardNum", _identifier); dic.Add("j_captcha", code.ToString()); var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8); html = base.GetWebContent(_zhixingValidateUrl, postData, ref cookies); } else { var time = DateTime.Now; _shixinCaptchaUrl = $"{_shixinCaptchaUrl}?date={time}"; code = tesseractDemo.GetValidateCodeByUrlExtend(_shixinCaptchaUrl); cookies = tesseractDemo.Cookies; var times = 1; while (string.IsNullOrEmpty(cookies) || string.IsNullOrEmpty(code)) { times++; if (times > 3) { return(_failInfo); //throw new Exception("三次请求都没有得到cookies或者解析验证码出错!"); } code = tesseractDemo.GetValidateCodeByUrl(_shixinCaptchaUrl); cookies = tesseractDemo.Cookies; } cookies = cookies.Replace("; Path=/", ""); //cookies = tesseractDemo.Cookies; dic.Add("pProvince", "0"); dic.Add("pName", _name); dic.Add("pCode", code.ToString()); dic.Add("pCardNum", _identifier); var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8); html = GetHtmlFaildReturnEmpty(_shixinValidateUrl, postData, cookies); times = 1; while (string.IsNullOrEmpty(html)) { times++; if (times > 3) { return(_failInfo); //throw new Exception("三次请求都没有得到网页内容!"); } html = GetHtmlFaildReturnEmpty(_shixinValidateUrl, postData, cookies); } } return(html); }
/// <summary> /// 定义第一个链接,案件类型:刑事案件,法院地域:北京市,裁判年份:2016 /// </summary> /// <param name="param">参数</param> /// <returns></returns> protected override string InitFirstUrl(FaYuanParameter param) { var paramDic = GetParam(param); //案件类型:民事案件,四级案由:遗嘱继承纠纷,中级法院:北京市第一中级人民法院,裁判年份:2016 IDictionary <string, string> dic = new Dictionary <string, string>(); var paramString = $"案件类型:{paramDic["CaseType"]}"; if (!string.IsNullOrEmpty(paramDic["ReasonLevel"]) && !string.IsNullOrEmpty(paramDic["ReasonKey"])) { paramString = $"{paramString},{paramDic["ReasonLevel"]}:{paramDic["ReasonKey"]}"; } if (!string.IsNullOrEmpty(paramDic["CourtLevel"]) && !string.IsNullOrEmpty(paramDic["CourtKey"])) { paramString = $"{paramString},{paramDic["CourtLevel"]}:{paramDic["CourtKey"]}"; } if (!string.IsNullOrEmpty(paramDic["Year"])) { paramString = $"{paramString},裁判年份:{paramDic["Year"]}"; } if (paramDic["CourtLevel"].Equals("法院地域")) { paramString = $"{paramString},法院层级:高级法院"; } dic.Add("Param", paramString); dic.Add("Page", "20"); dic.Add("Order", "裁判日期"); dic.Add("Index", "1"); dic.Add("Direction", "asc"); var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8); var cookies = string.Empty; var htmlString = base.GetMainWebContent(HomePage, postData, ref cookies, HomePage); //处理验证码 const string shielded = "\"remind\""; if (htmlString.Equals(shielded)) { //循环处理验证码,直到验证通过 this.LoopHandleValidateCode(); //处理完了重新来 htmlString = base.GetMainWebContent(HomePage, postData, ref cookies, HomePage); } //设置参数 _pageInfo = new PageInfo { Index = 1, CaseType = paramDic["CaseType"], ReasonLevel = paramDic["ReasonLevel"], ReasonKey = paramDic["ReasonKey"], CourtLevel = paramDic["CourtLevel"], CourtKey = paramDic["CourtKey"], Year = paramDic["Year"], Page = 20, TotalPages = GetTotalPages(htmlString), }; _pageInfo.TotalIndex = (_pageInfo.TotalPages / _pageInfo.Page) + (_pageInfo.TotalPages % _pageInfo.Page == 0?0:1); _pageInfo.ActuallyIndex = _pageInfo.TotalIndex > 2 * _pageInfo.MaxIndex ? 2 * _pageInfo.MaxIndex : _pageInfo.TotalIndex; return(HomePage); }