///// <summary>
        ///// GetWebContent
        ///// </summary>
        ///// <param name="url"></param>
        ///// <param name="cookies"></param>
        ///// <returns></returns>
        //private string GetWebContent(string url,ref string cookies)
        //{
        //    MyHttpHelper myHttpHelper = new MyHttpHelper();
        //    HttpItem httpItem = new HttpItem
        //    {
        //        URL = url,
        //        MaximumAutomaticRedirections = 10,
        //        Timeout = 60000,
        //        Allowautoredirect = false,
        //        UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
        //    };
        //    if (!string.IsNullOrEmpty(Cookies))
        //        httpItem.Cookie = cookies;
        //    HttpResult httpResult = myHttpHelper.GetHtml(httpItem);
        //    if (string.IsNullOrEmpty(cookies))
        //    {
        //        cookies = httpResult.Cookie;
        //        CookieCollection cookie = httpResult.CookieCollection;
        //    }
        //    return httpResult.Html;
        //}

        /// <summary>
        /// GetMainWebContent
        /// </summary>
        /// <param name="nextUrl"></param>
        /// <param name="postData"></param>
        /// <param name="cookies"></param>
        /// <param name="currentUrl"></param>
        /// <returns></returns>
        protected override string GetMainWebContent(string nextUrl, byte[] postData, ref string cookies, string currentUrl)
        {
            WebRequestCtrl.GetWebContentParam @default = WebRequestCtrl.GetWebContentParam.Default;
            @default.Refere      = currentUrl;
            @default.MaxRedirect = 10;
            @default.TimeOut     = 60000;
            cookies = cookies ?? string.Empty;
            string html      = string.Empty;
            bool   isSuccess = false;


            while (!isSuccess)
            {
                try
                {
                    html = WebRequestCtrl.GetWebContent(nextUrl, postData, ref cookies, 1, @default);
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.ToString());
                    if (e.ToString().Contains("操作超时") || e.ToString().Contains("操作已超时"))
                    {
                        continue;
                    }
                }

                isSuccess = true;
            }

            return(html);
        }
Пример #2
0
        /// <summary>
        /// 定义第一个链接,案件类型:刑事案件,法院地域:北京市,裁判年份:2016
        /// </summary>
        /// <param name="param">案件类型[刑事案件,民事案件,行政案件,赔偿案件,执行案件]</param>
        /// <returns></returns>
        protected override string InitFirstUrl(FaYuanParameter param)
        {
            IDictionary <string, string> dic = new Dictionary <string, string>();

            dic.Add("Param", $"案件类型:{param.Reason},法院地域:{param.Court},裁判年份:{param.Year}");
            dic.Add("Page", "20");
            dic.Add("Order", "裁判日期");
            dic.Add("Index", "1");
            dic.Add("Direction", "asc");

            var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8);

            var cookies    = string.Empty;
            var htmlString = base.GetMainWebContent(HomePage, postData, ref cookies, HomePage);

            //var htmlString = GetHtmlFromPost(HomePage, Encoding.UTF8,$"Param=案件类型:{param.Keyword}&Page=20&Order=法院层级&Index=1&Direction=asc");

            _pageInfo = new PageInfo
            {
                Index      = 1,
                CaseType   = param.Reason,
                Area       = param.Court,
                Year       = param.Year,
                Page       = 20,
                TotalPages = GetTotalPages(htmlString)
            };
            _pageInfo.TotalIndex = _pageInfo.TotalPages / _pageInfo.Page;

            Console.WriteLine($"总共页数:{_pageInfo.TotalIndex}");


            return(HomePage);
        }
Пример #3
0
 /// <summary>
 /// GetMainWebContent
 /// </summary>
 /// <param name="nextUrl"></param>
 /// <param name="postData"></param>
 /// <param name="cookies"></param>
 /// <param name="currentUrl"></param>
 /// <returns></returns>
 protected override string GetMainWebContent(string nextUrl, byte[] postData, ref string cookies, string currentUrl)
 {
     WebRequestCtrl.GetWebContentParam @default = WebRequestCtrl.GetWebContentParam.Default;
     @default.Refere      = currentUrl;
     @default.MaxRedirect = 20;
     cookies = cookies ?? string.Empty;
     return(WebRequestCtrl.GetWebContent(nextUrl, postData, ref cookies, 1, @default));
 }
 /// <summary>
 /// InitPostDataQueue
 /// </summary>
 /// <param name="infoDic"></param>
 private void InitPostDataQueue(Dictionary <string, string> infoDic)
 {
     for (var i = 0; i < _totalPage; i++)
     {
         infoDic["page"] = $"{i} + 1";
         var postDataCur = WebRequestCtrl.BuildPostDatas(infoDic, Encoding.UTF8);
         _postDataQueue.Enqueue(Encoding.Default.GetString(postDataCur));
     }
 }
Пример #5
0
        /*  /// <summary>
         * ///     获取用户数据
         * /// </summary>
         * /// <param name="shopId">The shop identifier.</param>
         * /// <returns></returns>
         * private IResut GetCompanyInfo(string shopId)
         * {
         *  IResut resut;
         *  companyInfoDictionary.TryGetValue(shopId, out resut);
         *  if (resut != null)
         *  {
         *      return resut;
         *  }
         *
         *  resut = this.GetCompanyInfoOnline(shopId);
         *  companyInfoDictionary[shopId] = resut;
         *
         *  return resut;
         * }*/

        /*        /// <summary>
         * ///     在线获取用户公司信息
         * /// </summary>
         * /// <param name="shopId">The shop identifier.</param>
         * /// <returns></returns>
         * private IResut GetCompanyInfoOnline(string shopId)
         * {
         *  var url = $"http://mall.jd.com/shopLevel-{shopId}.html";
         *  var cookies = this.Cookies ?? string.Empty;
         *  var webContent = WebRequestCtrl.GetWebContent(url, null, ref cookies, 2);
         *
         *  var parser = new CompanyInfoParser();
         *  return parser.Parse(webContent);
         * }*/

        /*    /// <summary>
         * ///     获取公司信息
         * /// </summary>
         * /// <param name="shopIds">The shop ids.</param>
         * /// <returns></returns>
         * private IDictionary<string, IResut> GetCompanyInfos(string[] shopIds)
         * {
         *  IDictionary<string, IResut> resultDictionary = new Dictionary<string, IResut>();
         *  foreach (var shopId in shopIds)
         *  {
         *      var resut = this.GetCompanyInfo(shopId);
         *      resultDictionary[shopId] = resut;
         *  }
         *
         *  return resultDictionary;
         * }*/

        /// <summary>
        ///     获取店铺信息
        /// </summary>
        /// <returns></returns>
        private IDictionary <string, IResut> GetListShopItemInfo()
        {
            var htmlSource = this.HtmlSource;
            IDictionary <string, IResut> resultDictionary = new Dictionary <string, IResut>();

            var shopIds = this.ParseShopIds(htmlSource);

            if (shopIds.Length == 0)
            {
                return(resultDictionary);
            }

            // http://search.jd.com/shop_new.php?ids=1000004042,1000000922,1000015485,1000015427,1000010664
            var collection = Url.CreateQueryCollection();

            collection.Add(@"ids", string.Join(",", shopIds));
            var url     = Url.CombinUrl(@"http://search.jd.com/shop_new.php", collection);
            var cookies = this.Cookies ?? string.Empty;

            var param = WebRequestCtrl.GetWebContentParam.Default;

            param.Refere = this.CurrentUrl;
            var webContent = WebRequestCtrl.GetWebContent(url, null, ref cookies, 2, param);

            this.TryUpdateHasMoreFlag(webContent);

            var keyword = this.InnerParam.Keyword;
            var page    = this.CurrentPage;
            var rank    = 0;

            var jArray = JArray.Parse(webContent);

            foreach (var jToken in jArray)
            {
                rank++;

                var shopId = jToken[@"shop_id"]?.Value <string>();
                if (shopId == null)
                {
                    continue;
                }

                var jObject = (JObject)jToken;
                var result  = this.ConvertToResult(jObject);
                result[@"ShopUrl"]       = $"http://mall.jd.com/index-{shopId}.html";
                result[@"SearchKeyword"] = keyword;
                this.SetResultSearchPageRank(result, rank);
                this.SetResultSearchPageIndex(result, page);


                resultDictionary[shopId] = result;
            }

            return(resultDictionary);
        }
Пример #6
0
        /// <summary>
        /// 找到页面内容
        /// </summary>
        /// <param name="nextUrl"></param>
        /// <param name="postData"></param>
        /// <param name="cookies"></param>
        /// <param name="currentUrl"></param>
        /// <returns></returns>
        protected override string GetMainWebContent(string nextUrl, byte[] postData, ref string cookies, string currentUrl)
        {
            IDictionary <string, string> dic = new Dictionary <string, string>();

            var paramString = $"案件类型:{_pageInfo.CaseType}";

            if (!string.IsNullOrEmpty(_pageInfo.ReasonLevel) && !string.IsNullOrEmpty(_pageInfo.ReasonKey))
            {
                paramString = $"{paramString},{_pageInfo.ReasonLevel}:{_pageInfo.ReasonKey}";
            }
            if (!string.IsNullOrEmpty(_pageInfo.CourtLevel) && !string.IsNullOrEmpty(_pageInfo.CourtKey))
            {
                paramString = $"{paramString},{_pageInfo.CourtLevel}:{_pageInfo.CourtKey}";
            }
            if (!string.IsNullOrEmpty(_pageInfo.Year))
            {
                paramString = $"{paramString},裁判年份:{_pageInfo.Year}";
            }
            if (_pageInfo.CourtLevel.Equals("法院地域"))
            {
                paramString = $"{paramString},法院层级:高级法院";
            }

            dic.Add("Param", paramString);
            dic.Add("Page", "20");
            dic.Add("Order", "裁判日期");

            if (_pageInfo.Index <= _pageInfo.MaxIndex)
            {
                dic.Add("Index", $"{_pageInfo.Index}");
                dic.Add("Direction", "asc");
            }
            else
            {
                dic.Add("Index", $"{_pageInfo.Index - _pageInfo.MaxIndex}");
                dic.Add("Direction", "desc");
            }

            var postDataCur = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8);

            var htmlString = base.GetMainWebContent(HomePage, postDataCur, ref cookies, HomePage);


            const string shielded = "\"remind\"";

            if (!htmlString.Equals(shielded))
            {
                return(htmlString);
            }
            //等于指定字符串,则处理验证码
            this.LoopHandleValidateCode();

            return(base.GetMainWebContent(HomePage, postDataCur, ref cookies, HomePage));
        }
Пример #7
0
        /// <summary>
        /// 验证验证码
        /// </summary>
        /// <param name="code"></param>
        /// <param name="name"></param>
        /// <returns></returns>
        private bool VerificationCode(string code, string name)
        {
            IDictionary <string, string> dic = new Dictionary <string, string>();

            dic.Add("verifyCode", code);
            dic.Add("name", name);
            var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8);

            var htmlString = base.GetWebContent("http://gsxt.zjaic.gov.cn/search/doValidatorVerifyCode.do", postData,
                                                ref _cookies, "http://gsxt.zjaic.gov.cn/search/doEnGeneralQueryPage.do", true);

            var value = Regex.Match(htmlString, "(?<=\"message\":\").*?(?=\")").Value;

            return(value.Equals("true"));
        }
Пример #8
0
        /// <summary>
        /// GetCorpid
        /// </summary>
        /// <param name="code"></param>
        /// <param name="name"></param>
        private void GetCorpid(string code, string name)
        {
            IDictionary <string, string> dic = new Dictionary <string, string>();

            dic.Add("clickType", "1");
            dic.Add("verifyCode", code);
            dic.Add("name", name);

            var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8);

            var htmlString = base.GetWebContent("http://gsxt.zjaic.gov.cn/search/doGetAppSearchResult.do", postData,
                                                ref _cookies, "http://gsxt.zjaic.gov.cn/search/doEnGeneralQueryPage.do", false);


            _corpid = Regex.Match(htmlString, @"(?<=href=""/appbasicinfo/doViewAppBasicInfoByLog\.do\?corpid=).*(?="")").Value;
        }
Пример #9
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="nextUrl"></param>
        /// <param name="postData"></param>
        /// <param name="cookies"></param>
        /// <param name="currentUrl"></param>
        /// <returns></returns>
        protected override string GetMainWebContent(string nextUrl, byte[] postData, ref string cookies, string currentUrl)
        {
            Uri uri = new Uri(nextUrl);

            WebRequestCtrl.WebRequestCtrlSetting setting = new WebRequestCtrl.WebRequestCtrlSetting(uri, postData, true, cookies, currentUrl, 5 * 60000);

            using (var args = WebRequestCtrl.GetResponse(setting))
            {
                var exception = args.Error;
                if (exception != null)
                {
                    throw exception;
                }

                return(args.GetWebContentString());
            }
        }
Пример #10
0
        /// <summary>
        ///     返回指定 url 的页面内容
        /// </summary>
        /// <param name="url">The URL.</param>
        /// <param name="postData">The post data.</param>
        /// <param name="cookies">The cookies.</param>
        /// <param name="refere">The refere.</param>
        /// <param name="isAjax">是不是ajax请求.</param>
        /// <returns></returns>
        protected string GetWebContent(
            string url,
            byte[] postData,
            ref string cookies,
            string refere = "",
            bool isAjax   = false)
        {
            var param = WebRequestCtrl.GetWebContentParam.Default;

            param.Refere = refere;
            param.IsAjax = isAjax;

            cookies = cookies ?? string.Empty;
            var webContent = WebRequestCtrl.GetWebContent(url, null, ref cookies, 1, param);

            return(webContent);
        }
Пример #11
0
        /// <summary>
        /// 找到页面内容
        /// </summary>
        /// <param name="nextUrl"></param>
        /// <param name="postData"></param>
        /// <param name="cookies"></param>
        /// <param name="currentUrl"></param>
        /// <returns></returns>
        protected override string GetMainWebContent(
            string nextUrl,
            byte[] postData,
            ref string cookies,
            string currentUrl)
        {
            IDictionary <string, string> dic = new Dictionary <string, string>();

            dic.Add("Param", $"案件类型:{_pageInfo.CaseType},法院地域:{_pageInfo.Area},裁判年份:{_pageInfo.Year}");
            dic.Add("Page", "20");
            dic.Add("Order", "法院层级");
            dic.Add("Index", $"{_pageInfo.Index}");
            dic.Add("Direction", "asc");

            var postDataCur = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8);


            return(base.GetMainWebContent(HomePage, postDataCur, ref cookies, HomePage));

            //return GetHtmlFromPost(HomePage, Encoding.UTF8, $"Param=案件类型:{_pageInfo.Reason}&Page=20&Order=法院层级&Index={_pageInfo.Index}&Direction=asc");
        }
Пример #12
0
        /// <summary>
        /// GetHtmlByUrl
        /// </summary>
        /// <returns></returns>
        private string GetHtmlByUrl(UrlType urlType)
        {
            TesseractDemo tesseractDemo = new TesseractDemo();
            string        cookies;
            string        code = string.Empty;

            var    dic  = new Dictionary <string, string>();
            string html = string.Empty;

            if (urlType == UrlType.ZhiXing)
            {
                code    = tesseractDemo.GetValidateCodeByUrlExtend(_zhixingCaptchaUrl);
                cookies = tesseractDemo.Cookies;
                //这部分cookie要去掉才能显示正确的内容
                cookies = cookies.Replace(";Path=/search", "");
                dic.Add("selectCourtId", "1");
                dic.Add("selectCourtArrange", "1");
                dic.Add("searchCourtName", "全国法院(包含地方各级法院)");
                dic.Add("pname", _name);
                dic.Add("cardNum", _identifier);
                dic.Add("j_captcha", code.ToString());
                var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8);
                html = base.GetWebContent(_zhixingValidateUrl, postData, ref cookies);
            }
            else
            {
                var time = DateTime.Now;
                _shixinCaptchaUrl = $"{_shixinCaptchaUrl}?date={time}";



                code    = tesseractDemo.GetValidateCodeByUrlExtend(_shixinCaptchaUrl);
                cookies = tesseractDemo.Cookies;
                var times = 1;
                while (string.IsNullOrEmpty(cookies) || string.IsNullOrEmpty(code))
                {
                    times++;
                    if (times > 3)
                    {
                        return(_failInfo);
                        //throw new Exception("三次请求都没有得到cookies或者解析验证码出错!");
                    }
                    code    = tesseractDemo.GetValidateCodeByUrl(_shixinCaptchaUrl);
                    cookies = tesseractDemo.Cookies;
                }
                cookies = cookies.Replace("; Path=/", "");
                //cookies = tesseractDemo.Cookies;
                dic.Add("pProvince", "0");
                dic.Add("pName", _name);
                dic.Add("pCode", code.ToString());
                dic.Add("pCardNum", _identifier);
                var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8);


                html  = GetHtmlFaildReturnEmpty(_shixinValidateUrl, postData, cookies);
                times = 1;
                while (string.IsNullOrEmpty(html))
                {
                    times++;
                    if (times > 3)
                    {
                        return(_failInfo);
                        //throw new Exception("三次请求都没有得到网页内容!");
                    }
                    html = GetHtmlFaildReturnEmpty(_shixinValidateUrl, postData, cookies);
                }
            }

            return(html);
        }
Пример #13
0
        /// <summary>
        /// 定义第一个链接,案件类型:刑事案件,法院地域:北京市,裁判年份:2016
        /// </summary>
        /// <param name="param">参数</param>
        /// <returns></returns>
        protected override string InitFirstUrl(FaYuanParameter param)
        {
            var paramDic = GetParam(param);

            //案件类型:民事案件,四级案由:遗嘱继承纠纷,中级法院:北京市第一中级人民法院,裁判年份:2016
            IDictionary <string, string> dic = new Dictionary <string, string>();
            var paramString = $"案件类型:{paramDic["CaseType"]}";

            if (!string.IsNullOrEmpty(paramDic["ReasonLevel"]) && !string.IsNullOrEmpty(paramDic["ReasonKey"]))
            {
                paramString = $"{paramString},{paramDic["ReasonLevel"]}:{paramDic["ReasonKey"]}";
            }
            if (!string.IsNullOrEmpty(paramDic["CourtLevel"]) && !string.IsNullOrEmpty(paramDic["CourtKey"]))
            {
                paramString = $"{paramString},{paramDic["CourtLevel"]}:{paramDic["CourtKey"]}";
            }
            if (!string.IsNullOrEmpty(paramDic["Year"]))
            {
                paramString = $"{paramString},裁判年份:{paramDic["Year"]}";
            }
            if (paramDic["CourtLevel"].Equals("法院地域"))
            {
                paramString = $"{paramString},法院层级:高级法院";
            }

            dic.Add("Param", paramString);
            dic.Add("Page", "20");
            dic.Add("Order", "裁判日期");
            dic.Add("Index", "1");
            dic.Add("Direction", "asc");

            var postData = WebRequestCtrl.BuildPostDatas(dic, Encoding.UTF8);

            var cookies    = string.Empty;
            var htmlString = base.GetMainWebContent(HomePage, postData, ref cookies, HomePage);

            //处理验证码
            const string shielded = "\"remind\"";

            if (htmlString.Equals(shielded))
            {
                //循环处理验证码,直到验证通过
                this.LoopHandleValidateCode();
                //处理完了重新来
                htmlString = base.GetMainWebContent(HomePage, postData, ref cookies, HomePage);
            }

            //设置参数
            _pageInfo = new PageInfo
            {
                Index       = 1,
                CaseType    = paramDic["CaseType"],
                ReasonLevel = paramDic["ReasonLevel"],
                ReasonKey   = paramDic["ReasonKey"],
                CourtLevel  = paramDic["CourtLevel"],
                CourtKey    = paramDic["CourtKey"],
                Year        = paramDic["Year"],
                Page        = 20,
                TotalPages  = GetTotalPages(htmlString),
            };
            _pageInfo.TotalIndex    = (_pageInfo.TotalPages / _pageInfo.Page) + (_pageInfo.TotalPages % _pageInfo.Page == 0?0:1);
            _pageInfo.ActuallyIndex = _pageInfo.TotalIndex > 2 * _pageInfo.MaxIndex ? 2 * _pageInfo.MaxIndex : _pageInfo.TotalIndex;


            return(HomePage);
        }