예제 #1
0
 static void BeautyFlow(int id)
 {
     HttpWebResponse httpWebResponse = null;
     try
     {
         string htmlContent = "";
         string url = BeautyFlowBaseUrl + id + "/";
         HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(url);
         httpWebRequest.Method = "GET";
         httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
         if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK))
         {
             StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024));
             htmlContent = reader.ReadToEnd();
             //调试代码
             //StreamWriter fw = new StreamWriter("debug.html");
             //fw.Write(htmlContent);
             //fw.Close();
             //调试完毕
             httpWebResponse.Close();
             reader.Close();
         }
         if (!htmlContent.Equals(""))
         {
             Console.WriteLine("第一个html读取完成!");
             int startIndex = htmlContent.IndexOf("/girl/");
             int endIndex = htmlContent.IndexOf("/", startIndex + 6) + 1;
             string beautyMorePicturesLink = "http://curator.im" + htmlContent.Substring(startIndex, endIndex - startIndex);
             //Console.WriteLine(beautyMorePicturesLink);
             string htmlContentTwo = "";
             httpWebRequest = HttpWebRequest.CreateHttp(beautyMorePicturesLink);
             httpWebRequest.Method = "GET";
             httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
             if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK))
             {
                 StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024));
                 htmlContentTwo = reader.ReadToEnd();
                 httpWebResponse.Close();
                 reader.Close();
             }
             Console.WriteLine("第二个html读取完成!");
             Lexer lexer = new Lexer(htmlContentTwo);
             Parser parser = new Parser(lexer);
             parser.AnalyzePage();
             NodeList divList = parser.ExtractAllNodesThatMatch(BeautyNameFilter);
             string beautyName = "";
             if (divList.Count == 1)
             {
                 beautyName = divList[0].ToPlainTextString();
                 endIndex = beautyName.IndexOf('|') - 1;
                 beautyName = beautyName.Substring(0, endIndex);
             }
             else
             {
                 Console.WriteLine("获取正妹名称出错! id=" + id);
                 Console.Read();
                 return;
             }
             parser.AnalyzePage();
             divList = parser.ExtractAllNodesThatMatch(BeautyFlowImgFilter);
             for (int i = 0; i < divList.Count; i++)
             {
                 ImageTag imgNode = (ImageTag)divList[i];
                 GetPicUrlsFromBeautyPersonalPage(imgNode, i, 2);
             }
         }
         else
         {
             Console.WriteLine("得到的HTML为空!");
             return;
         }
     }
     catch (Exception ex)
     {
         //if (httpWebResponse != null)
         //{
         //    httpWebResponse = (HttpWebResponse)ex.Response;
         //    if (!httpWebResponse.StatusCode.Equals(HttpStatusCode.NotFound))
         //    {
         //        Console.WriteLine("访问网页出错!状态码:" + httpWebResponse.StatusCode);
         //    }
         //    httpWebResponse.Close();
         //}
     }
 }
예제 #2
0
 static int OneDayOneBeauty(string date)
 {
     try
     {
         string htmlContent = "";
         string url = oneDayOneBeautyBaseUrl + date + "/";
         HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(url);
         httpWebRequest.Method = "GET";
         HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
         if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK))
         {
             StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024));
             htmlContent = reader.ReadToEnd();
             //调试代码
             //StreamWriter fw = new StreamWriter("debug.html");
             //fw.Write(htmlContent);
             //fw.Close();
             //调试完毕
             httpWebResponse.Close();
             reader.Close();
         }
         if (!htmlContent.Equals(""))
         {
             Lexer lexer = new Lexer(htmlContent);
             Parser parser = new Parser(lexer);
             parser.AnalyzePage();
             NodeList divList = parser.ExtractAllNodesThatMatch(OneDayOneBeautyImgFilter);
             if (divList.Count == 0)
             {
                 parser.AnalyzePage();
                 divList = parser.ExtractAllNodesThatMatch(OneDayOneBeautyImgFilter2);
             }
             for (int i = 0; i < divList.Count; i++)
             {
                 ImageTag imgNode = (ImageTag)divList[i];
                 //2014年5月16日根据网页结构修改
                 GetPicUrlsFromBeautyPersonalPage(imgNode, i, 1);
             }
             return divList.Count;
         }
         else
         {
             Console.WriteLine("得到的HTML为空!");
             return 0;
         }
     }
     catch (WebException e)
     {
         HttpWebResponse httpWebResponse = (HttpWebResponse)e.Response;
         if (httpWebResponse.StatusCode.Equals(HttpStatusCode.NotFound))
         {
             Console.WriteLine("网页未找到!");
         }
         else
         {
             Console.WriteLine("访问网页出错!状态码:" + httpWebResponse.StatusCode);
         }
         httpWebResponse.Close();
         return 0;
     }
 }