//传递过来一个List对象 //返回的是一个 类 public static Web_Content Test_Ip(Ip ip, string url) { //Console.WriteLine(ip.Port); //尝试代理Ip 接受请求 WebProxy proxyObject = new WebProxy(ip.Address, Convert.ToInt32(ip.Port)); // WebProxy proxyObject = new WebProxy("47.240.100.124"); //设置请求 HttpWebRequest Req = (HttpWebRequest)WebRequest.Create(url); // 61.183.192.5 Req.UserAgent = @"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; QQWubi 133; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; CIBA; InfoPath.2)"; Req.Proxy = proxyObject; //设置代理 Req.Method = "GET"; Req.Timeout = 5000; Web_Content result = new Web_Content(); try { HttpWebResponse Resp = (HttpWebResponse)Req.GetResponse(); Encoding code = Encoding.GetEncoding("UTF-8"); using (StreamReader sr = new StreamReader(Resp.GetResponseStream(), code)) { string str = null; str = sr.ReadToEnd(); //获取得到的网址html返回数据,这里就可以使用某些解析html的dll直接使用了,比如htmlpaser if (str != null) { result.Connect = true; result.Content = str; return(result); } } } catch (Exception e) { result.Content = ""; result.Connect = false; } result.Content = ""; result.Connect = false; return(result); }
static void Main(string[] args) { /*获取网页内容*/ try { //传递的字符串 string url = "http://www.xicidaili.com/nn/1"; string Html = Get_Html(url); /* //获取代理*/ #region string regex = @"<td>(\d+\.\d+\.\d+\.\d+)</td>\s+<td>(\d+)</td>"; Match mstr = Regex.Match(Html, regex); //c#高效队列 //ConcurrentQueue<string> proxyIp = new ConcurrentQueue<string>(); List <Ip> proxyIp = new List <Ip>(); //设置dictionary 来存放 while (mstr.Success) { // Response.Write(match.Value + "<br/>"); // match = match.NextMatch(); //} 获取 /* Console.WriteLine(mstr.Groups[1].Value + ":" + mstr.Groups[2].Value);*/ /*存放进高效队列*/ proxyIp.Add(new Ip(mstr.Groups[1].Value.ToString(), mstr.Groups[2].Value.ToString())); mstr = mstr.NextMatch(); } #endregion //输出获得代理ip List中的个数 Console.WriteLine("从代理网站获取的Ip个数" + proxyIp.Count); //处理获取到的Ip 用来测试网页 输入值为List中的Ip,输出值为爬取的网站 #region string Catched_url = "https://baidu.com"; //string Catched_url = @"http://ditu.92cha.com/dizhen.php?dizhen_ly=china&dizhen_zjs=1&dizhen_zje=6&dizhen_riqis=&dizhen_riqie=&ckwz="; int i = 0; foreach (Ip ip in proxyIp) { Web_Content result = Test_Ip(ip, Catched_url); if (result.Connect == false) { Console.WriteLine(ip.Address + "不可以用"); continue; } else { Console.WriteLine(ip.Address + "可以用"); //写入文件 //StreamWriter.Write //在将文本写入文件前,处理文本行 //StreamWriter一个参数默认覆盖 //StreamWriter第二个参数为false覆盖现有文件,为true则把文本追加到文件末尾 //StreamWriter(Stream stream);推荐使用,灵活便于理解多态 //StreamWriter(string path);写死了就是指向硬盘写文件 /* * //C#文件流写文件,默认追加FileMode.Append * Console.WriteLine("你爬取的网站为"+Catched_url+"\n"+"请输入你保存文件的名字"); * string FileName = Console.ReadLine(); * FileName = @"../../"+DateTime.Now.Year.ToString()+"_"+DateTime.Now.Month.ToString()+"_"+DateTime.Now.Day.ToString()+"_"+DateTime.Now.Hour+"_"+DateTime.Now.Minute +"_"+ FileName + ".txt"; * byte[] myByte = System.Text.Encoding.UTF8.GetBytes(result.Content); * using (FileStream fsWrite = new FileStream(FileName, FileMode.Append)) * { * fsWrite.Write(myByte, 0, myByte.Length); * }; * Console.WriteLine("写入完成"); * * */ i++; continue; } } #endregion Console.WriteLine(i + "可用"); } catch (Exception e) { //输出当前异常的信息 Console.WriteLine(e.Message); Console.WriteLine(e.ToString()); } Console.ReadKey(); }