/// <summary> /// 根据相传入的数据,得到相应页面数据 /// </summary> /// <param name="item">参数类对象</param> /// <returns>返回HttpResult类型</returns> public HttpResult GetHtml(HttpItem item) { //返回参数 HttpResult result = new HttpResult(); try { //准备参数 SetRequest(item); } catch (Exception ex) { //配置参数时出错 return new HttpResult() { Cookie = string.Empty, Header = null, Html = ex.Message, StatusDescription = "配置参数时出错:" + ex.Message }; } try { //请求数据 using (response = (HttpWebResponse)request.GetResponse()) { GetData(item, result); } } catch (WebException ex) { using (response = (HttpWebResponse)ex.Response) { GetData(item, result); } } catch (Exception ex) { result.Html = ex.Message; } if (item.IsToLower) result.Html = result.Html.ToLower(); return result; }
/// <summary> /// 设置代理 /// </summary> /// <param name="item">参数对象</param> private void SetProxy(HttpItem item) { bool isIeProxy = false; if (!string.IsNullOrEmpty(item.ProxyIp)) { isIeProxy = item.ProxyIp.ToLower().Contains("ieproxy"); } if (!string.IsNullOrEmpty(item.ProxyIp) && !isIeProxy) { //设置代理服务器 if (item.ProxyIp.Contains(":")) { string[] plist = item.ProxyIp.Split(':'); WebProxy myProxy = new WebProxy(plist[0].Trim(), Convert.ToInt32(plist[1].Trim())); //建议连接 myProxy.Credentials = new NetworkCredential(item.ProxyUserName, item.ProxyPwd); //给当前请求对象 request.Proxy = myProxy; } else { WebProxy myProxy = new WebProxy(item.ProxyIp, false); //建议连接 myProxy.Credentials = new NetworkCredential(item.ProxyUserName, item.ProxyPwd); //给当前请求对象 request.Proxy = myProxy; } } else if (isIeProxy) { //设置为IE代理 } else { request.Proxy = item.WebProxy; } }
/// <summary> /// 为请求准备参数 /// </summary> ///<param name="item">参数列表</param> private void SetRequest(HttpItem item) { // 验证证书 SetCer(item); //设置Header参数 if (item.Header != null && item.Header.Count > 0) foreach (string key in item.Header.AllKeys) { request.Headers.Add(key, item.Header[key]); } // 设置代理 SetProxy(item); if (item.ProtocolVersion != null) request.ProtocolVersion = item.ProtocolVersion; request.ServicePoint.Expect100Continue = item.Expect100Continue; //请求方式Get或者Post request.Method = item.Method; request.Timeout = item.Timeout; request.KeepAlive = item.KeepAlive; request.ReadWriteTimeout = item.ReadWriteTimeout; if (item.IfModifiedSince != null) request.IfModifiedSince = Convert.ToDateTime(item.IfModifiedSince); //Accept request.Accept = item.Accept; //ContentType返回类型 request.ContentType = item.ContentType; //UserAgent客户端的访问类型,包括浏览器版本和操作系统信息 request.UserAgent = item.UserAgent; // 编码 encoding = item.Encoding; //设置安全凭证 request.Credentials = item.ICredentials; //设置Cookie SetCookie(item); //来源地址 request.Referer = item.Referer; //是否执行跳转功能 request.AllowAutoRedirect = item.Allowautoredirect; if (item.MaximumAutomaticRedirections > 0) { request.MaximumAutomaticRedirections = item.MaximumAutomaticRedirections; } //设置Post数据 SetPostData(item); //设置最大连接 if (item.Connectionlimit > 0) request.ServicePoint.ConnectionLimit = item.Connectionlimit; }
/// <summary> /// 设置编码 /// </summary> /// <param name="item">HttpItem</param> /// <param name="result">HttpResult</param> /// <param name="ResponseByte">byte[]</param> private void SetEncoding(HttpItem item, HttpResult result, byte[] ResponseByte) { //是否返回Byte类型数据 if (item.ResultType == ResultType.Byte) result.ResultByte = ResponseByte; //从这里开始我们要无视编码了 if (encoding == null) { Match meta = Regex.Match(Encoding.Default.GetString(ResponseByte), "<meta[^<]*charset=([^<]*)[\"']", RegexOptions.IgnoreCase); string c = string.Empty; if (meta != null && meta.Groups.Count > 0) { c = meta.Groups[1].Value.ToLower().Trim(); } if (c.Length > 2) { try { encoding = Encoding.GetEncoding(c.Replace("\"", string.Empty).Replace("'", "").Replace(";", "").Replace("iso-8859-1", "gbk").Trim()); } catch { if (string.IsNullOrEmpty(response.CharacterSet)) { encoding = Encoding.UTF8; } else { encoding = Encoding.GetEncoding(response.CharacterSet); } } } else { if (string.IsNullOrEmpty(response.CharacterSet)) { encoding = Encoding.UTF8; } else { encoding = Encoding.GetEncoding(response.CharacterSet); } } } }
/// <summary> /// 设置Post数据 /// </summary> /// <param name="item">Http参数</param> private void SetPostData(HttpItem item) { //验证在得到结果时是否有传入数据 if (request.Method.Trim().ToLower().Contains("post")) { if (item.PostEncoding != null) { postencoding = item.PostEncoding; } byte[] buffer = null; //写入Byte类型 if (item.PostDataType == PostDataType.Byte && item.PostdataByte != null && item.PostdataByte.Length > 0) { //验证在得到结果时是否有传入数据 buffer = item.PostdataByte; }//写入文件 else if (item.PostDataType == PostDataType.FilePath && !string.IsNullOrEmpty(item.Postdata)) { StreamReader r = new StreamReader(item.Postdata, postencoding); buffer = postencoding.GetBytes(r.ReadToEnd()); r.Close(); } //写入字符串 else if (!string.IsNullOrEmpty(item.Postdata)) { buffer = postencoding.GetBytes(item.Postdata); } if (buffer != null) { request.ContentLength = buffer.Length; request.GetRequestStream().Write(buffer, 0, buffer.Length); } } }
/// <summary> /// 设置多个证书 /// </summary> /// <param name="item"></param> private void SetCerList(HttpItem item) { if (item.ClentCertificates != null && item.ClentCertificates.Count > 0) { foreach (X509Certificate c in item.ClentCertificates) { request.ClientCertificates.Add(c); } } }
/// <summary> /// 设置Cookie /// </summary> /// <param name="item">Http参数</param> private void SetCookie(HttpItem item) { if (!string.IsNullOrEmpty(item.Cookie)) request.Headers[HttpRequestHeader.Cookie] = item.Cookie; //设置CookieCollection if (item.ResultCookieType == ResultCookieType.CookieCollection) { request.CookieContainer = new CookieContainer(); if (item.CookieCollection != null && item.CookieCollection.Count > 0) request.CookieContainer.Add(item.CookieCollection); } }
/// <summary> /// 设置证书 /// </summary> /// <param name="item"></param> private void SetCer(HttpItem item) { if (!string.IsNullOrEmpty(item.CerPath)) { //这一句一定要写在创建连接的前面。使用回调的方法进行证书验证。 ServicePointManager.ServerCertificateValidationCallback = new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult); //初始化对像,并设置请求的URL地址 request = (HttpWebRequest)WebRequest.Create(item.URL); SetCerList(item); //将证书添加到请求里 request.ClientCertificates.Add(new X509Certificate(item.CerPath)); } else { //初始化对像,并设置请求的URL地址 request = (HttpWebRequest)WebRequest.Create(item.URL); SetCerList(item); } }
/// <summary> /// 获取数据的并解析的方法 /// </summary> /// <param name="item"></param> /// <param name="result"></param> private void GetData(HttpItem item, HttpResult result) { #region base //获取StatusCode result.StatusCode = response.StatusCode; //获取StatusDescription result.StatusDescription = response.StatusDescription; //获取Headers result.Header = response.Headers; //获取CookieCollection if (response.Cookies != null) result.CookieCollection = response.Cookies; //获取set-cookie if (response.Headers["set-cookie"] != null) result.Cookie = response.Headers["set-cookie"]; #endregion #region byte //处理网页Byte byte[] ResponseByte = GetByte(); #endregion #region Html if (ResponseByte != null & ResponseByte.Length > 0) { //设置编码 SetEncoding(item, result, ResponseByte); //得到返回的HTML result.Html = encoding.GetString(ResponseByte); } else { //没有返回任何Html代码 result.Html = string.Empty; } #endregion }
public void LoadAllUrl() { Dictionary<string, string> combine = new Dictionary<string, string>(); //学段 string[] ids = new string[] { "3", "1063", "2075" }; Queue<string> idqueue = new Queue<string>(); for (int i = 0; i < ids.Length; i++) { idqueue.Enqueue(ids[i]); combine.Add(ids[i], ids[i]); } //end for while (idqueue.Count != 0) { string nowid = idqueue.Dequeue(); string url = string.Format("http://www.91taoke.com/Taocan/taocan_list/id/{0}", nowid); List<string> list = GetCousreList(url); HttpItem item = new HttpItem() { URL = url, Method = "GET", IsToLower = false, Timeout = 100000, ReadWriteTimeout = 60000, UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)", ContentType = "text/html", Allowautoredirect = false, ResultType = ResultType.String }; HttpResult result = http.GetHtml(item); Document doc = NSoupClient.Parse(result.Html); Elements ems = doc.Body.GetElementsByAttributeValue("class", "menuList cf"); Elements xueduanlinks = ems[0].GetElementsByTag("a"); List<string> xueduan = new List<string>(); foreach (Element e in xueduanlinks) { string href = e.Attr("href").ToString(); xueduan.Add(href.Substring(href.IndexOf(',') + 1, href.Length - href.IndexOf(',') - 2)); } Elements nianjilinks = ems[1].GetElementsByTag("a"); List<string> nianji = new List<string>(); foreach (Element e in nianjilinks) { string href = e.Attr("href").ToString(); nianji.Add(href.Substring(href.IndexOf(',') + 1, href.Length - href.IndexOf(',') - 2)); } Elements xuekelinks = ems[2].GetElementsByTag("a"); List<string> xueke = new List<string>(); foreach (Element e in xuekelinks) { string href = e.Attr("href").ToString(); xueke.Add(href.Substring(href.IndexOf(',') + 1, href.Length - href.IndexOf(',') - 2)); } Elements versionlinks = ems[3].GetElementsByTag("a"); List<string> version = new List<string>(); foreach (Element e in versionlinks) { string href = e.Attr("href").ToString(); version.Add(href.Substring(href.IndexOf(',') + 1, href.Length - href.IndexOf(',') - 2)); } if (nowid.Split(',').Length == 1) { for (int j = 0; j < nianji.Count; j++) { if (!combine.ContainsKey(nowid + "," + nianji[j])) { combine.Add(nowid + "," + nianji[j], nowid + "," + nianji[j]); idqueue.Enqueue(nowid + "," + nianji[j]); } for (int k = 0; k < xueke.Count; k++) { if (!combine.ContainsKey(nowid + "," + nianji[j] + "," + xueke[k])) { combine.Add(nowid + "," + nianji[j] + "," + xueke[k], nowid + "," + nianji[j] + "," + xueke[k]); idqueue.Enqueue(nowid + "," + nianji[j] + "," + xueke[k]); } for (int l = 0; l < version.Count; l++) { if (!combine.ContainsKey(nowid + "," + nianji[j] + "," + xueke[k] + "," + version[l])) { combine.Add(nowid + "," + nianji[j] + "," + xueke[k] + "," + version[l], nowid + "," + nianji[j] + "," + xueke[k] + "," + version[l]); idqueue.Enqueue(nowid + "," + nianji[j] + "," + xueke[k] + "," + version[l]); } } } } } if (nowid.Split(',').Length == 2) { for (int k = 0; k < xueke.Count; k++) { if (!combine.ContainsKey(nowid + "," + xueke[k])) { combine.Add(nowid + "," + xueke[k], nowid + "," + "," + xueke[k]); idqueue.Enqueue(nowid + "," + xueke[k]); } for (int l = 0; l < version.Count; l++) { if (!combine.ContainsKey(nowid + "," + xueke[k] + "," + version[l])) { combine.Add(nowid + "," + xueke[k] + "," + version[l], nowid + "," + xueke[k] + "," + version[l]); idqueue.Enqueue(nowid + "," + xueke[k] + "," + version[l]); } } } } if (nowid.Split(',').Length == 3) { for (int l = 0; l < version.Count; l++) { if (!combine.ContainsKey(nowid + "," + version[l])) { combine.Add(nowid + "," + version[l], nowid + "," + version[l]); idqueue.Enqueue(nowid + "," + version[l]); } } } //for (int i = 0; i < xueduan.Count; i++) //{ // if (!combine.ContainsKey(xueduan[i])) // { // combine.Add(xueduan[i], xueduan[i]); // idqueue.Enqueue(xueduan[i]); // } // for (int j = 0; j < nianji.Count; j++) // { // if (!combine.ContainsKey(xueduan[i] + "," + nianji[j])) // { // combine.Add(xueduan[i] + "," + nianji[j], xueduan[i] + "," + nianji[j]); // idqueue.Enqueue(xueduan[i] + "," + nianji[j]); // } // for (int k = 0; k < xueke.Count; k++) // { // if (!combine.ContainsKey(xueduan[i] + "," + nianji[j] + "," + xueke[k])) // { // combine.Add(xueduan[i] + "," + nianji[j] + "," + xueke[k], xueduan[i] + "," + nianji[j] + "," + xueke[k]); // idqueue.Enqueue(xueduan[i] + "," + nianji[j] + "," + xueke[k]); // } // for (int l = 0; l < version.Count; l++) // { // if (!combine.ContainsKey(xueduan[i] + "," + nianji[j] + "," + xueke[k] + "," + version[l])) // { // combine.Add(xueduan[i] + "," + nianji[j] + "," + xueke[k] + "," + version[l], xueduan[i] + "," + nianji[j] + "," + xueke[k] + "," + version[l]); // idqueue.Enqueue(xueduan[i] + "," + nianji[j] + "," + xueke[k] + "," + version[l]); // } // } // } // } //} WriteToLog(idqueue.Count + "----" + combine.Count + "----" + nowid + "----" + url); } foreach (var item in combine) { try { FileStream file = new FileStream("d://url.txt", FileMode.Append); StreamWriter sw = new StreamWriter(file); sw.WriteLine(string.Format("http://www.91taoke.com/Taocan/taocan_list/id/{0}", item.Value)); sw.Close(); file.Close(); } catch (Exception) { } } }
public string GetPlayHtml(string id,string nid) { string url = string.Format("http://www.91taoke.com/index.php?m=Taocan&a=taocan_play&id={0}&nid={1}",id ,nid); HttpItem item = new HttpItem() { URL = url, Method = "GET", IsToLower = false, Timeout = 100000, ReadWriteTimeout = 60000, UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)", ContentType = "text/html", Allowautoredirect = false, ResultType = ResultType.String }; HttpResult result = http.GetHtml(item); return result.Html; }
//从url获取课程id public List<string> GetCousreList(string url) { List<string> list = new List<string>(); HttpItem item = new HttpItem() { URL = url, Method = "GET", IsToLower = false, Timeout = 100000, ReadWriteTimeout = 60000, UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)", ContentType = "text/html", Allowautoredirect = false, ResultType = ResultType.String }; HttpResult result = http.GetHtml(item); string html = result.Html; Document doc = NSoupClient.Parse(html); try { Elements uls = doc.GetElementsByClass("xhr_wksp_left"); if (uls.Count == 0) return list; Element ul = uls[0]; Elements lis = ul.GetElementsByTag("li"); foreach (Element e in lis) { Elements divs = e.GetElementsByClass("xhr_wksp_r_h"); if (divs.Count == 0) continue; Element a = divs[0].GetElementsByTag("a")[0]; string[] array = a.Attr("href").Split('/'); string courseid = array[array.Length - 1]; list.Add(courseid); } } catch (Exception) { return list; } return list; }