public void Start() { Task.Factory.StartNew(() => { _action(string.Format("开始{0}to{1}....", _startIndex, _endIndex)); for (int i = _startIndex; i < _endIndex + 1; i++) { HttpLibSyncRequestItem item = new HttpLibSyncRequestItem(); item.Encoding = "gb2312"; item.Url = string.Format(_url, i); if (i == 1) { item.Url = item.Url.Replace("_1.html", ".html"); } var html = HttpLibSyncRequest.Get(item); Regex regex = new Regex("<a.target=._blank..href=.(?<url>.*?)..alt=.(?<title>.*?).><img.src2=.(?<img>.*?)..alt=.*?></a>", RegexOptions.IgnoreCase); MatchCollection mc = regex.Matches(html); foreach (Match match in mc) { HtmlTemplate tpl = new HtmlTemplate(); item = new HttpLibSyncRequestItem(); item.Encoding = "gb2312"; item.Url = "http://sc.chinaz.com" + match.Groups["url"].ToString(); tpl.HtmlTemplateThumbnailUrl = match.Groups["img"].ToString(); tpl.HtmlTemplateName = match.Groups["title"].ToString(); html = HttpLibSyncRequest.Get(item); regex = new Regex("http.*?.rar", RegexOptions.IgnoreCase); mc = regex.Matches(html); List<string> durls = new List<string>(); foreach (Match durl in mc) { durls.Add(durl.ToString()); } tpl.HtmlTemplateDownloadUrls = string.Join("|", durls); tpl.HtmlTemplateAlias = durls[0].GetFileNameWithoutExtension(); regex = new Regex("<span>类别.*?>(?<category>.*?)</a></span>", RegexOptions.IgnoreCase); Match category = regex.Match(html); tpl.HtmlTemplateCategoryName = category.Groups["category"].ToString(); tpl.HtmlTemplateCategoryAlias = SpellCodeHelper.GetFirstPYLetter(tpl.HtmlTemplateCategoryName); regex = new Regex("<a href=.(?<bigimg>.*?)..title.*?class=\"image_gall\"><img", RegexOptions.IgnoreCase); Match bimg = regex.Match(html); tpl.HtmlTemplateImageUrl = bimg.Groups["bigimg"].ToString(); regex = new Regex("<em>(?<datetime>.*?)</em>", RegexOptions.IgnoreCase); Match dt = regex.Match(html); tpl.HtmlTemplateCreateTime = dt.Groups["datetime"].ToString().ToDataTime(); if (DMContext.Query<HtmlTemplate>().Where(p => p.HtmlTemplateName == tpl.HtmlTemplateName).Single() == null) { DMContext.Insert(tpl); _action(string.Format("成功采集[{0}]{1}", tpl.HtmlTemplateCategoryName, tpl.HtmlTemplateName)); } else { _action(string.Format("已经存在[{0}]{1}", tpl.HtmlTemplateCategoryName, tpl.HtmlTemplateName)); } } _action("ok"); } _action(string.Format("完成{0}to{1}....", _startIndex, _endIndex)); }); }
public static string Get(HttpLibSyncRequestItem objhttpItem) { #region 初始设置 //默认的编码 Encoding encoding = Encoding.UTF8; //读取流的对象 StreamReader reader = null; //HttpWebRequest对象用来发起请求 HttpWebRequest request = null; //获取影响流的数据对象 HttpWebResponse response = null; //需要返回的数据对象 string returnData = "String Error"; #region 验证证书 if (!string.IsNullOrEmpty(objhttpItem.CerPath)) { //这一句一定要写在创建连接的前面。使用回调的方法进行证书验证。 ServicePointManager.ServerCertificateValidationCallback = new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult); //初始化对像,并设置请求的URL地址 request = (HttpWebRequest)WebRequest.Create(GetUrl(objhttpItem.Url)); //创建证书文件 X509Certificate objx509 = new X509Certificate(objhttpItem.CerPath); //添加到请求里 request.ClientCertificates.Add(objx509); } else { //初始化对像,并设置请求的URL地址 request = (HttpWebRequest)WebRequest.Create(GetUrl(objhttpItem.Url)); } #endregion 验证证书 #region 设置代理 if (string.IsNullOrEmpty(objhttpItem.ProxyUserName) && string.IsNullOrEmpty(objhttpItem.ProxyPwd) && string.IsNullOrEmpty(objhttpItem.ProxyIp)) { //不需要设置 } else { //设置代理服务器 WebProxy myProxy = new WebProxy(objhttpItem.ProxyIp, false); //建议连接 myProxy.Credentials = new NetworkCredential(objhttpItem.ProxyUserName, objhttpItem.ProxyPwd); //给当前请求对象 request.Proxy = myProxy; //设置安全凭证 request.Credentials = CredentialCache.DefaultNetworkCredentials; } #endregion 设置代理 //请求方式Get或者Post request.Method = objhttpItem.Method; request.Timeout = objhttpItem.Timeout; request.ReadWriteTimeout = objhttpItem.ReadWriteTimeout; //Accept request.Accept = objhttpItem.Accept; //ContentType返回类型 request.ContentType = objhttpItem.ContentType; //UserAgent客户端的访问类型,包括浏览器版本和操作系统信息 request.UserAgent = objhttpItem.UserAgent; #region 编码 if (string.IsNullOrEmpty(objhttpItem.Encoding) || objhttpItem.Encoding.ToLower().Trim() == "null") { //读取数据时的编码方式 encoding = Encoding.UTF8; } else { //读取数据时的编码方式 encoding = System.Text.Encoding.GetEncoding(objhttpItem.Encoding); } #endregion 编码 #region Cookie if (!string.IsNullOrEmpty(objhttpItem.Cookie)) { //Cookie request.Headers[HttpRequestHeader.Cookie] = objhttpItem.Cookie; } //设置Cookie if (objhttpItem.CookieCollection != null) { if (request.CookieContainer.Count == 0) { request.CookieContainer.Add(objhttpItem.CookieCollection); } else { request.CookieContainer = new CookieContainer(); request.CookieContainer.Add(objhttpItem.CookieCollection); } } #endregion Cookie //来源地址 request.Referer = objhttpItem.Referer; //是否执行跳转功能 request.AllowAutoRedirect = objhttpItem.Allowautoredirect; #region Post数据 //验证在得到结果时是否有传入数据 if (!string.IsNullOrEmpty(objhttpItem.Postdata) && request.Method.Trim().ToLower().Contains("post")) { byte[] buffer = encoding.GetBytes(objhttpItem.Postdata); request.ContentLength = buffer.Length; request.GetRequestStream().Write(buffer, 0, buffer.Length); } #endregion Post数据 //设置最大连接 if (objhttpItem.Connectionlimit > 0) { request.ServicePoint.ConnectionLimit = objhttpItem.Connectionlimit; } #endregion #region 读取数据 try { using (response = (HttpWebResponse)request.GetResponse()) { if (response.Cookies != null) { objhttpItem.CookieCollection = response.Cookies; } if (response.Headers["set-cookie"] != null) { objhttpItem.Cookie = response.Headers["set-cookie"]; } objhttpItem.Response = response; objhttpItem.Request = request; //从这里开始我们要无视编码了 if (encoding == null) { MemoryStream stream = new MemoryStream(); if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) { objhttpItem.Reader = reader; //开始读取流并设置编码方式 //new GZipStream(response.GetResponseStream(), CompressionMode.Decompress).CopyTo(stream, 10240); //.net4.0以下写法 stream = GetMemoryStream(response.GetResponseStream()); } else { objhttpItem.Reader = reader; //response.GetResponseStream().CopyTo(stream, 10240); // .net4.0以下写法 stream = GetMemoryStream(response.GetResponseStream()); } byte[] RawResponse = stream.ToArray(); string temp = Encoding.Default.GetString(RawResponse, 0, RawResponse.Length); //<meta(.*?)charset([\s]?)=[^>](.*?)> Match meta = Regex.Match(temp, "<meta([^<]*)charset=([^<]*)[\"']", RegexOptions.IgnoreCase | RegexOptions.Multiline); string charter = (meta.Groups.Count > 2) ? meta.Groups[2].Value : string.Empty; charter = charter.Replace("\"", string.Empty).Replace("'", string.Empty).Replace(";", string.Empty); if (charter.Length > 0) { charter = charter.ToLower().Replace("iso-8859-1", "gbk"); encoding = Encoding.GetEncoding(charter); } else { if (response.CharacterSet.ToLower().Trim() == "iso-8859-1") { encoding = Encoding.GetEncoding("gbk"); } else { if (string.IsNullOrEmpty(response.CharacterSet.Trim())) { encoding = Encoding.UTF8; } else { encoding = Encoding.GetEncoding(response.CharacterSet); } } } returnData = encoding.GetString(RawResponse); } else { if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) { //开始读取流并设置编码方式 using (reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding)) { objhttpItem.Reader = reader; returnData = reader.ReadToEnd(); } } else { //开始读取流并设置编码方式 using (reader = new StreamReader(response.GetResponseStream(), encoding)) { objhttpItem.Reader = reader; returnData = reader.ReadToEnd(); } } } } } catch (WebException ex) { //这里是在发生异常时返回的错误信息 returnData = "String Error"; response = (HttpWebResponse)ex.Response; objhttpItem.Response = response; } if (objhttpItem.IsToLower) { returnData = returnData.ToLower(); } return returnData; #endregion }
private void GetAccessToken() { HttpLibSyncRequestItem item = new HttpLibSyncRequestItem(); item.Url = string.Format("{0}token?grant_type=client_credential&appid={1}&secret={2}", _config.ApiUrl, _config.AppId, _config.AppSecret); string json = HttpLibSyncRequest.Get(item); var data = new WeChatData(json); _config.AccessToken = data["access_token"]; _config.ExpiresIn = DateTime.Now.AddSeconds(data.Get<int>("expires_in")); }