/// <summary> /// 获取CooKie /// </summary> /// <param name="loginUrl"></param> /// <param name="postdata"></param> /// <param name="header"></param> /// <returns></returns> public static CookieContainer GetCooKie(string loginUrl, string postdata, HttpHeader header) { HttpWebRequest request = null; HttpWebResponse response = null; CookieContainer cc = new CookieContainer(); request = (HttpWebRequest)WebRequest.Create(loginUrl); request.Method = "POST"; request.ContentType = header.contentType; byte[] postdatabyte = Encoding.UTF8.GetBytes(postdata); request.ContentLength = postdatabyte.Length; request.AllowAutoRedirect = false; request.CookieContainer = cc; request.KeepAlive = true; //提交请求 Stream stream; stream = request.GetRequestStream(); stream.Write(postdatabyte, 0, postdatabyte.Length); stream.Close(); //接收响应 response = (HttpWebResponse)request.GetResponse(); response.Cookies = request.CookieContainer.GetCookies(request.RequestUri); CookieCollection cook = response.Cookies; //Cookie字符串格式 string strcrook = request.CookieContainer.GetCookieHeader(request.RequestUri); return cc; }
/// <summary> /// 组装知乎的header头 /// </summary> /// <returns></returns> public static HttpHeader GetHeader() { HttpHeader header = new HttpHeader(); header.accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; header.acceptEncoding = "gzip, deflate, sdch"; header.acceptLaguage = "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2"; header.connection = "keep-alive"; header.host = "www.zhihu.com"; header.userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36"; header.referer = "http://www.zhihu.com/"; return header; }
/// <summary> /// 带Cookie获取html /// </summary> /// <param name="getUrl"></param> /// <param name="cookieContainer"></param> /// <param name="header"></param> /// <returns></returns> public static string GetHtml(string getUrl, CookieContainer cookieContainer, HttpHeader header) { HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; httpWebRequest = (HttpWebRequest)WebRequest.Create(getUrl); httpWebRequest.CookieContainer = cookieContainer; httpWebRequest.ContentType = header.contentType; //httpWebRequest.ServicePoint.ConnectionLimit = header.maxTry; httpWebRequest.Referer = getUrl; httpWebRequest.Accept = header.accept; httpWebRequest.UserAgent = header.userAgent; httpWebRequest.Method = "GET"; httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); Stream responseStream = httpWebResponse.GetResponseStream(); StreamReader streamReader = new StreamReader(responseStream, Encoding.UTF8); string html = streamReader.ReadToEnd(); streamReader.Close(); responseStream.Close(); httpWebRequest.Abort(); httpWebResponse.Close(); return html; }
/// <summary> /// 通过问题链接读取问题信息 /// </summary> /// <param name="getUrl"></param> /// <param name="cookieContainer"></param> /// <param name="header"></param> public void GetQuesionInfo(string getUrl, CookieContainer cookieContainer, HttpHeader header) { QuestionModel qm=new QuestionModel(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(HttpHelper.GetHtml(getUrl, cookieContainer, header)); //读取问题信息 qm.Qname= doc.DocumentNode.SelectSingleNode("//*[@id='zh-question-title']/h2/text()").InnerText; //标签组循环获取 var tipLists = doc.DocumentNode.SelectNodes("//*[@class='zm-item-tag']"); foreach (var tip in tipLists) { var qtip = tip.InnerText; qm.Qtip.Add(qtip); } //循环读取回答信息 //存储 }
/// <summary> /// 获取个人主页信息 /// </summary> /// <param name="getUrl"></param> /// <param name="cookieContainer"></param> /// <param name="header"></param> private void GetMainPage(string getUrl, CookieContainer cookieContainer, HttpHeader header) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(HttpHelper.GetHtml(getUrl, cookieContainer, header)); //昵称 var name = doc.DocumentNode.SelectSingleNode("/html/body/div[3]/div[1]/div/div[1]/div[1]/div[1]/div[2]/span[1]").InnerText; //一句话描述 var title = doc.DocumentNode.SelectSingleNode("/html/body/div[3]/div[1]/div/div[1]/div[1]/div[1]/div[2]/span[2]").InnerText; //坐标 var pos = doc.DocumentNode.SelectSingleNode("/html/body/div[3]/div[1]/div/div[1]/div[1]/div[2]/div[2]/div/div[1]/div[1]/span[1]/span[1]").InnerText; //行业 var industry = doc.DocumentNode.SelectSingleNode("/html/body/div[3]/div[1]/div/div[1]/div[1]/div[2]/div[2]/div/div[1]/div[1]/span[1]/span[2]").InnerText; //公司 var company = doc.DocumentNode.SelectSingleNode("/html/body/div[3]/div[1]/div/div[1]/div[1]/div[2]/div[2]/div/div[1]/div[2]/span[1]/span[1]").InnerText; //职位 var post = doc.DocumentNode.SelectSingleNode("/html/body/div[3]/div[1]/div/div[1]/div[1]/div[2]/div[2]/div/div[1]/div[2]/span[1]/span[2]").InnerText; Console.WriteLine($"姓名:{name} 说明:{title} \r\n地址:{pos} 行业:{industry} 公司:{company} 职位:{post}"); }