static void BeautyFlow(int id) { HttpWebResponse httpWebResponse = null; try { string htmlContent = ""; string url = BeautyFlowBaseUrl + id + "/"; HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(url); httpWebRequest.Method = "GET"; httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK)) { StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024)); htmlContent = reader.ReadToEnd(); //调试代码 //StreamWriter fw = new StreamWriter("debug.html"); //fw.Write(htmlContent); //fw.Close(); //调试完毕 httpWebResponse.Close(); reader.Close(); } if (!htmlContent.Equals("")) { Console.WriteLine("第一个html读取完成!"); int startIndex = htmlContent.IndexOf("/girl/"); int endIndex = htmlContent.IndexOf("/", startIndex + 6) + 1; string beautyMorePicturesLink = "http://curator.im" + htmlContent.Substring(startIndex, endIndex - startIndex); //Console.WriteLine(beautyMorePicturesLink); string htmlContentTwo = ""; httpWebRequest = HttpWebRequest.CreateHttp(beautyMorePicturesLink); httpWebRequest.Method = "GET"; httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK)) { StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024)); htmlContentTwo = reader.ReadToEnd(); httpWebResponse.Close(); reader.Close(); } Console.WriteLine("第二个html读取完成!"); Lexer lexer = new Lexer(htmlContentTwo); Parser parser = new Parser(lexer); parser.AnalyzePage(); NodeList divList = parser.ExtractAllNodesThatMatch(BeautyNameFilter); string beautyName = ""; if (divList.Count == 1) { beautyName = divList[0].ToPlainTextString(); endIndex = beautyName.IndexOf('|') - 1; beautyName = beautyName.Substring(0, endIndex); } else { Console.WriteLine("获取正妹名称出错! id=" + id); Console.Read(); return; } parser.AnalyzePage(); divList = parser.ExtractAllNodesThatMatch(BeautyFlowImgFilter); for (int i = 0; i < divList.Count; i++) { ImageTag imgNode = (ImageTag)divList[i]; GetPicUrlsFromBeautyPersonalPage(imgNode, i, 2); } } else { Console.WriteLine("得到的HTML为空!"); return; } } catch (Exception ex) { //if (httpWebResponse != null) //{ // httpWebResponse = (HttpWebResponse)ex.Response; // if (!httpWebResponse.StatusCode.Equals(HttpStatusCode.NotFound)) // { // Console.WriteLine("访问网页出错!状态码:" + httpWebResponse.StatusCode); // } // httpWebResponse.Close(); //} } }
static int OneDayOneBeauty(string date) { try { string htmlContent = ""; string url = oneDayOneBeautyBaseUrl + date + "/"; HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(url); httpWebRequest.Method = "GET"; HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK)) { StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024)); htmlContent = reader.ReadToEnd(); //调试代码 //StreamWriter fw = new StreamWriter("debug.html"); //fw.Write(htmlContent); //fw.Close(); //调试完毕 httpWebResponse.Close(); reader.Close(); } if (!htmlContent.Equals("")) { Lexer lexer = new Lexer(htmlContent); Parser parser = new Parser(lexer); parser.AnalyzePage(); NodeList divList = parser.ExtractAllNodesThatMatch(OneDayOneBeautyImgFilter); if (divList.Count == 0) { parser.AnalyzePage(); divList = parser.ExtractAllNodesThatMatch(OneDayOneBeautyImgFilter2); } for (int i = 0; i < divList.Count; i++) { ImageTag imgNode = (ImageTag)divList[i]; //2014年5月16日根据网页结构修改 GetPicUrlsFromBeautyPersonalPage(imgNode, i, 1); } return divList.Count; } else { Console.WriteLine("得到的HTML为空!"); return 0; } } catch (WebException e) { HttpWebResponse httpWebResponse = (HttpWebResponse)e.Response; if (httpWebResponse.StatusCode.Equals(HttpStatusCode.NotFound)) { Console.WriteLine("网页未找到!"); } else { Console.WriteLine("访问网页出错!状态码:" + httpWebResponse.StatusCode); } httpWebResponse.Close(); return 0; } }