public void TestMethod12() { // var m1 = new HttpRequestParam() { Timeout = 30000, DownloadTimeout = 20000 }; var b1 = m1.Timeout == 20000 && m1.Timeout <= m1.DownloadTimeout; // var m2 = new HttpRequestParam() { Timeout = 30000, DownloadTimeout = 40000 }; var b2 = m2.Timeout == 30000 && m2.Timeout <= m2.DownloadTimeout; // var m3 = new HttpRequestParam() { Timeout = 30000, DownloadTimeout = null }; var b3 = m3.Timeout == 30000; // var m4 = new HttpRequestParam() { //Timeout = 30000, DownloadTimeout = null }; var b4 = m4.Timeout == 30000; // var m5 = new HttpRequestParam() { //Timeout = 30000, //DownloadTimeout = null }; var b5 = m5.Timeout == 30000; // var m6 = new HttpRequestParam() { //Timeout = 30000, DownloadTimeout = 20000 }; var b6 = m6.Timeout == 20000 && m6.Timeout <= m6.DownloadTimeout; Assert.IsTrue(b1 && b2 && b3 && b4 && b5 && b6 ); }
/// <summary> /// 执行抓取 /// </summary> /// <returns></returns> public List <OpenResult> Pick() { HttpRequestParam param = new HttpRequestParam { Url = this._url }; string errorInfo = string.Empty; string html = HttpHelper.GetHtml(param, ref errorInfo); if (!string.IsNullOrEmpty(errorInfo)) { throw new Exception($"从彩世界采集{this._lotteryType}出错。错误信息:{errorInfo},抓取地址:{this._url}"); } Regex regex = new Regex(@"<table[^><]*class=""history|tbHistory|dataContainer""[^><]*>(?<html>[\S\s]*?</table>", RegexOptions.IgnoreCase); Match match = regex.Match(html); if (!match.Success) { throw new Exception($"从彩世界采集{this._lotteryType}出错。抓取地址:{this._url},源代码:{html}"); } html = match.Groups["html"].Value; regex = new Regex(@"<tr>\s*<td>\s*<i[^><]*class=""font_gray666"">(?<issueNo>[^><]+)</i>\s*<i[^><]*class=""font_gray999"">(?<time>[^><]+)</i>\s*</td>\s*<td>\s*<div[^><]*>(?<num>[\s\S]*?)</div>", RegexOptions.IgnoreCase); MatchCollection matches = regex.Matches(html); if (matches.Count == 0) { throw new Exception($"从彩世界采集{this._lotteryType}出错。抓取地址:{this._url},源代码:{html}"); } List <OpenResult> resultList = new List <OpenResult>(); foreach (Match m in matches) { OpenResult result = new OpenResult { create_time = DateTime.Now, lottery_code = this._lotteryType, data_source = DataSourceEnum.CSJ }; result.issue_number = Convert.ToInt64(m.Groups["issueNo"].Value.Replace("-", "")); result.open_time = Convert.ToDateTime(m.Groups["time"].Value); result.open_data = this.GetOpenData(m.Groups["num"].Value); if (string.IsNullOrEmpty(result.open_data)) { continue; } resultList.Add(result); } return(resultList); }
/// <summary> /// 采集开奖结果 /// </summary> /// <returns></returns> public List <OpenResult> Pick() { HttpRequestParam param = this.GetParam(this._lotteryCode); string errorInfo = string.Empty; string html = HttpHelper.GetHtml(param, ref errorInfo); if (!string.IsNullOrEmpty(errorInfo)) { throw new Exception($"从快彩在线采集{this._lotteryType}出错。错误信息:{errorInfo} 抓取地址:{param.Url}"); } if (string.IsNullOrEmpty(html)) { throw new Exception($"从快彩在线采集{this._lotteryType}出错。抓取地址:{param.Url}"); } if (html.ToLower().Contains("robots")) { throw new Exception($"从快彩在线采集{this._lotteryType}出错。返回的html错误(被反爬虫截断请求)。HTML:{html}"); } KCResponse response = html.ToEntity <KCResponse>(); if (response.Code < 1) { return(new List <OpenResult>()); } if (response.BackData == null || response.BackData.Count == 0) { return(new List <OpenResult>()); } return((from o in response.BackData select new OpenResult { create_time = DateTime.Now, open_time = DateTime.Parse(o.OpenTime), lottery_code = _lotteryType, issue_number = Convert.ToInt64(o.IssueNo), open_data = o.LotteryOpen, data_source = DataSourceEnum.KC }).OrderBy(o => o.issue_number).ToList()); }
/// <summary> /// 根据彩票类型获取参数 /// </summary> /// <param name="lotteryCode"></param> /// <returns></returns> private HttpRequestParam GetParam(int lotteryCode) { HttpRequestParam param = new HttpRequestParam { Origin = "http://www.cpkk7.com", Methond = "POST", Url = "http://www.cpkk7.com/tools/ssc_ajax.ashx?A=GetLotteryOpen&S=kczx&U=dw9527" }; if (lotteryCode <= 0) { return(param); } param.PostData = "Action=GetLotteryOpen&LotteryCode=" + lotteryCode + "&IssueNo=0&DataNum=5&SourceName=PC"; return(param); }
private HttpRequestParam getHttpRequestParam() { if (_requestData != null) { return(_requestData); } var param = new HttpRequestParam(); var query = this.Request.Query; foreach (var item in query) { param[item.Key] = item.Value; } if (this.Request.HasFormContentType) { var from = this.Request.Form; foreach (var item in from) { param[item.Key] = item.Value; } } return(_requestData = param); }
/// <summary> /// Gets the HTTP request data. /// </summary> /// <param name="param">The parameter.</param> /// <returns>HttpResult.</returns> public static HttpResult GetHttpRequestData(HttpRequestParam param) { HttpInstance instance = new HttpInstance(); return instance.GetHttpRequestData(param); }
/// <summary> /// 抓取指数 /// </summary> public void ExceuteCrawIndex() { try { List <string> keywordsList = new List <string>(); ////从文本读取关键词 using (System.IO.StreamReader sr = new System.IO.StreamReader("F:\\amekeyword.txt", Encoding.GetEncoding("GB2312"))) { string str; while ((str = sr.ReadLine()) != null) { keywordsList.Add(str); } } MessagePipe.ExcuteWriteMessageEvent("取到关键词" + keywordsList.Count + "条", 0); ////开始遍历关键词 foreach (string keyword in keywordsList) { try { MessagePipe.ExcuteWriteMessageEvent("开始处理关键词【" + keyword + "】", 0); HttpRequestParam param = new HttpRequestParam(); string tempword = System.Web.HttpUtility.UrlEncode(keyword, System.Text.Encoding.GetEncoding("GB2312")); tempword = tempword.ToUpper(); param.URL = string.Format("http://index.baidu.com/?tpl=crowd&word={0}", tempword); param.Method = "get"; param.AllowAutoRedirect = false; param.IsIEProxy = true; param.Cookie = this.cookie; param.Timeout = 7 * 1000; param.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"; param.ResultType = ResultType.String; HttpResult result = HttpHelper.GetHttpRequestData(param); String temphtml = result.Html; temphtml = Regex.Replace(temphtml, @"\r|\n|\s|\t", string.Empty, RegexOptions.IgnoreCase); Regex regextemp = new Regex("PPval.ppt=\'(?<str>.*?)\',"); Match matchresult = regextemp.Match(temphtml); string tempstr = matchresult.Groups["str"].Value; tempstr += "&res2="; ////拼凑字符串,进行解析 string temprefer = param.URL; param.Referer = temprefer; param.URL = string.Format("http://index.baidu.com/Interface/Social/getSocial/?res={0}", tempstr); param.Header.Add("X-Requested-With", "XMLHttpRequest"); result = HttpHelper.GetHttpRequestData(param); string jsonstr = result.Html; jsonstr = jsonstr.Replace("\"", string.Empty); regextemp = new Regex("str_age:\\{(?<age>.*?)\\},str_sex:\\{(?<sex>.*?)\\}"); matchresult = regextemp.Match(jsonstr); string ageregion = matchresult.Groups["age"].Value; string sexstr = matchresult.Groups["sex"].Value; List <string> agelist = ageregion.Split(',').ToList(); List <string> sexlist = sexstr.Split(',').ToList(); ////解析数据 string content = string.Empty; foreach (string tempage in agelist) { List <string> tempageList = tempage.Split(':').ToList(); content += tempageList[1] + " "; } foreach (string tempsex in sexlist) { List <string> tempsexlist = tempsex.Split(':').ToList(); content += tempsexlist[1] + " "; } ////追加到txt WriteTxt.WriteAppendTxt("F:\\\baidu.txt", content); MessagePipe.ExcuteWriteMessageEvent("关键词【" + keyword + "】指数数据添加" + content, 0); } catch (Exception ex) { MessagePipe.ExcuteWriteMessageEvent("处理关键词【" + keyword + "】发生异常:" + ex.Message, 1); } } } catch (Exception ex) { } }