Example #1
0
        public void Start()
        {
            Task.Factory.StartNew(() =>
            {
                _action(string.Format("开始{0}to{1}....", _startIndex, _endIndex));

                for (int i = _startIndex; i < _endIndex + 1; i++)
                {
                    HttpLibSyncRequestItem item = new HttpLibSyncRequestItem();
                    item.Encoding = "gb2312";
                    item.Url = string.Format(_url, i);
                    if (i == 1) { item.Url = item.Url.Replace("_1.html", ".html"); }
                    var html = HttpLibSyncRequest.Get(item);
                    Regex regex = new Regex("<a.target=._blank..href=.(?<url>.*?)..alt=.(?<title>.*?).><img.src2=.(?<img>.*?)..alt=.*?></a>", RegexOptions.IgnoreCase);
                    MatchCollection mc = regex.Matches(html);
                    foreach (Match match in mc)
                    {
                        HtmlTemplate tpl = new HtmlTemplate();
                        item = new HttpLibSyncRequestItem();
                        item.Encoding = "gb2312";
                        item.Url = "http://sc.chinaz.com" + match.Groups["url"].ToString();
                        tpl.HtmlTemplateThumbnailUrl = match.Groups["img"].ToString();
                        tpl.HtmlTemplateName = match.Groups["title"].ToString();
                        html = HttpLibSyncRequest.Get(item);
                        regex = new Regex("http.*?.rar", RegexOptions.IgnoreCase);
                        mc = regex.Matches(html);
                        List<string> durls = new List<string>();
                        foreach (Match durl in mc)
                        {
                            durls.Add(durl.ToString());
                        }
                        tpl.HtmlTemplateDownloadUrls = string.Join("|", durls);

                        tpl.HtmlTemplateAlias = durls[0].GetFileNameWithoutExtension();

                        regex = new Regex("<span>类别.*?>(?<category>.*?)</a></span>", RegexOptions.IgnoreCase);
                        Match category = regex.Match(html);
                        tpl.HtmlTemplateCategoryName = category.Groups["category"].ToString();
                        tpl.HtmlTemplateCategoryAlias = SpellCodeHelper.GetFirstPYLetter(tpl.HtmlTemplateCategoryName);
                        regex = new Regex("<a href=.(?<bigimg>.*?)..title.*?class=\"image_gall\"><img", RegexOptions.IgnoreCase);
                        Match bimg = regex.Match(html);
                        tpl.HtmlTemplateImageUrl = bimg.Groups["bigimg"].ToString();

                        regex = new Regex("<em>(?<datetime>.*?)</em>", RegexOptions.IgnoreCase);
                        Match dt = regex.Match(html);
                        tpl.HtmlTemplateCreateTime = dt.Groups["datetime"].ToString().ToDataTime();


                        if (DMContext.Query<HtmlTemplate>().Where(p => p.HtmlTemplateName == tpl.HtmlTemplateName).Single() == null)
                        {
                            DMContext.Insert(tpl);
                            _action(string.Format("成功采集[{0}]{1}", tpl.HtmlTemplateCategoryName, tpl.HtmlTemplateName));
                        }
                        else 
                        {
                            _action(string.Format("已经存在[{0}]{1}", tpl.HtmlTemplateCategoryName, tpl.HtmlTemplateName));
                        }
                        
                    }
                    _action("ok");
                }
                _action(string.Format("完成{0}to{1}....", _startIndex, _endIndex));
            });
        }
Example #2
0
        public static string Get(HttpLibSyncRequestItem objhttpItem)
        {
            #region 初始设置
            //默认的编码
            Encoding encoding = Encoding.UTF8;
            //读取流的对象
            StreamReader reader = null;

            //HttpWebRequest对象用来发起请求
            HttpWebRequest request = null;

            //获取影响流的数据对象
            HttpWebResponse response = null;

            //需要返回的数据对象
            string returnData = "String Error";


            #region 验证证书

            if (!string.IsNullOrEmpty(objhttpItem.CerPath))
            {
                //这一句一定要写在创建连接的前面。使用回调的方法进行证书验证。
                ServicePointManager.ServerCertificateValidationCallback =
                    new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult);

                //初始化对像,并设置请求的URL地址
                request = (HttpWebRequest)WebRequest.Create(GetUrl(objhttpItem.Url));
                //创建证书文件
                X509Certificate objx509 = new X509Certificate(objhttpItem.CerPath);

                //添加到请求里
                request.ClientCertificates.Add(objx509);
            }
            else
            {
                //初始化对像,并设置请求的URL地址
                request = (HttpWebRequest)WebRequest.Create(GetUrl(objhttpItem.Url));
            }

            #endregion 验证证书

            #region 设置代理

            if (string.IsNullOrEmpty(objhttpItem.ProxyUserName) && string.IsNullOrEmpty(objhttpItem.ProxyPwd) && string.IsNullOrEmpty(objhttpItem.ProxyIp))
            {
                //不需要设置
            }
            else
            {
                //设置代理服务器
                WebProxy myProxy = new WebProxy(objhttpItem.ProxyIp, false);

                //建议连接
                myProxy.Credentials = new NetworkCredential(objhttpItem.ProxyUserName, objhttpItem.ProxyPwd);

                //给当前请求对象
                request.Proxy = myProxy;
                //设置安全凭证
                request.Credentials = CredentialCache.DefaultNetworkCredentials;
            }

            #endregion 设置代理

            //请求方式Get或者Post
            request.Method = objhttpItem.Method;
            request.Timeout = objhttpItem.Timeout;
            request.ReadWriteTimeout = objhttpItem.ReadWriteTimeout;
            //Accept
            request.Accept = objhttpItem.Accept;
            //ContentType返回类型
            request.ContentType = objhttpItem.ContentType;
            //UserAgent客户端的访问类型,包括浏览器版本和操作系统信息
            request.UserAgent = objhttpItem.UserAgent;

            #region 编码

            if (string.IsNullOrEmpty(objhttpItem.Encoding) || objhttpItem.Encoding.ToLower().Trim() == "null")
            {
                //读取数据时的编码方式
                encoding = Encoding.UTF8;
            }
            else
            {
                //读取数据时的编码方式
                encoding = System.Text.Encoding.GetEncoding(objhttpItem.Encoding);
            }

            #endregion 编码

            #region Cookie

            if (!string.IsNullOrEmpty(objhttpItem.Cookie))
            {
                //Cookie
                request.Headers[HttpRequestHeader.Cookie] = objhttpItem.Cookie;
            }

            //设置Cookie
            if (objhttpItem.CookieCollection != null)
            {
                if (request.CookieContainer.Count == 0)
                {
                    request.CookieContainer.Add(objhttpItem.CookieCollection);
                }
                else
                {
                    request.CookieContainer = new CookieContainer();
                    request.CookieContainer.Add(objhttpItem.CookieCollection);
                }
            }

            #endregion Cookie

            //来源地址
            request.Referer = objhttpItem.Referer;
            //是否执行跳转功能
            request.AllowAutoRedirect = objhttpItem.Allowautoredirect;

            #region Post数据

            //验证在得到结果时是否有传入数据
            if (!string.IsNullOrEmpty(objhttpItem.Postdata) && request.Method.Trim().ToLower().Contains("post"))
            {
                byte[] buffer = encoding.GetBytes(objhttpItem.Postdata);
                request.ContentLength = buffer.Length;
                request.GetRequestStream().Write(buffer, 0, buffer.Length);
            }

            #endregion Post数据

            //设置最大连接
            if (objhttpItem.Connectionlimit > 0)
            {
                request.ServicePoint.ConnectionLimit = objhttpItem.Connectionlimit;
            }
            #endregion

            #region 读取数据

            try
            {

                using (response = (HttpWebResponse)request.GetResponse())
                {
                    if (response.Cookies != null)
                    {
                        objhttpItem.CookieCollection = response.Cookies;
                    }
                    if (response.Headers["set-cookie"] != null)
                    {
                        objhttpItem.Cookie = response.Headers["set-cookie"];
                    }
                    objhttpItem.Response = response;
                    objhttpItem.Request = request;

                    //从这里开始我们要无视编码了
                    if (encoding == null)
                    {
                        MemoryStream stream = new MemoryStream();
                        if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
                        {
                            objhttpItem.Reader = reader;

                            //开始读取流并设置编码方式
                            //new GZipStream(response.GetResponseStream(), CompressionMode.Decompress).CopyTo(stream, 10240);
                            //.net4.0以下写法
                            stream = GetMemoryStream(response.GetResponseStream());
                        }
                        else
                        {
                            objhttpItem.Reader = reader;

                            //response.GetResponseStream().CopyTo(stream, 10240);
                            // .net4.0以下写法
                            stream = GetMemoryStream(response.GetResponseStream());
                        }
                        byte[] RawResponse = stream.ToArray();
                        string temp = Encoding.Default.GetString(RawResponse, 0, RawResponse.Length);

                        //<meta(.*?)charset([\s]?)=[^>](.*?)>
                        Match meta = Regex.Match(temp, "<meta([^<]*)charset=([^<]*)[\"']", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                        string charter = (meta.Groups.Count > 2) ? meta.Groups[2].Value : string.Empty;
                        charter = charter.Replace("\"", string.Empty).Replace("'", string.Empty).Replace(";", string.Empty);
                        if (charter.Length > 0)
                        {
                            charter = charter.ToLower().Replace("iso-8859-1", "gbk");
                            encoding = Encoding.GetEncoding(charter);
                        }
                        else
                        {
                            if (response.CharacterSet.ToLower().Trim() == "iso-8859-1")
                            {
                                encoding = Encoding.GetEncoding("gbk");
                            }
                            else
                            {
                                if (string.IsNullOrEmpty(response.CharacterSet.Trim()))
                                {
                                    encoding = Encoding.UTF8;
                                }
                                else
                                {
                                    encoding = Encoding.GetEncoding(response.CharacterSet);
                                }
                            }
                        }
                        returnData = encoding.GetString(RawResponse);
                    }
                    else
                    {
                        if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
                        {
                            //开始读取流并设置编码方式
                            using (reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding))
                            {
                                objhttpItem.Reader = reader;
                                returnData = reader.ReadToEnd();
                            }
                        }
                        else
                        {
                            //开始读取流并设置编码方式
                            using (reader = new StreamReader(response.GetResponseStream(), encoding))
                            {
                                objhttpItem.Reader = reader;
                                returnData = reader.ReadToEnd();
                            }
                        }
                    }
                }
            }
            catch (WebException ex)
            {
                //这里是在发生异常时返回的错误信息
                returnData = "String Error";
                response = (HttpWebResponse)ex.Response;
                objhttpItem.Response = response;
            }
            if (objhttpItem.IsToLower)
            {
                returnData = returnData.ToLower();
            }
            return returnData;
            #endregion
        }
Example #3
0
 private void GetAccessToken()
 {
     HttpLibSyncRequestItem item = new HttpLibSyncRequestItem();
     item.Url = string.Format("{0}token?grant_type=client_credential&appid={1}&secret={2}", _config.ApiUrl, _config.AppId, _config.AppSecret);
     string json = HttpLibSyncRequest.Get(item);
     var data = new WeChatData(json);
     _config.AccessToken = data["access_token"];
     _config.ExpiresIn = DateTime.Now.AddSeconds(data.Get<int>("expires_in"));
 }