int ExecuteNum = 0; //执行次数 #endregion Fields #region Methods /// <summary> /// 创建文件夹,并保存图片到此文件夹 /// </summary> /// <param name="url">抓取图片的URI</param> /// <param name="path">保存图片的PATH</param> public void CatchImgPutPath(string url,string path,int titleid) { CreateFloder(path); //创建文件夹 Elements links = null; MyWebClient webClient = new MyWebClient(); ExecuteNum = 0; //将重连次数清零 String HtmlString = GetHtmlString(webClient, url); NSoup.Nodes.Document doc = NSoup.NSoupClient.Parse(HtmlString); links = doc.Select("#read_tpc img"); foreach (var item in links) { string imguri = item.Attr("src"); if (!string.IsNullOrEmpty(imguri)) { int count = 0; string filename = ""; string imgpath = TestStream(imguri, path,ref filename, ref count); while (string.IsNullOrEmpty(imgpath) && count < 3) { imgpath = TestStream(imguri, path, ref filename, ref count); } if (!string.IsNullOrEmpty(imgpath)) { dal.AddTitleDetail(101, titleid, filename, imgpath); } } } }
//public void Test(string url) //{ // Elements links = null; // MyWebClient webClient = new MyWebClient(); // String HtmlString = GetHtmlString(webClient, url); // NSoup.Nodes.Document doc = NSoup.NSoupClient.Parse(HtmlString); // links = doc.Select("#read_tpc img"); // foreach (var item in links) { // string imguri = item.Attr("src"); // if (!string.IsNullOrEmpty(imguri)) // CommonManager.FileObj.DowdLoad_ImgByUrl(imguri, "/DownLoad/Img", ""); // } //} /// <summary> /// 根据抓取地址,取得里面的下级地址,再保存到指定下载地址 /// </summary> /// <param name="url">抓取地址</param> /// <param name="downpath">保存地址</param> /// <param name="hosturi">网址使用虚拟地址的时候取得HOST进行拼接,生成完整http访问地址</param> public void CatchUriByPUri(string url,string downpath,string hosturi) { Elements links = null; MyWebClient webClient = new MyWebClient(); String HtmlString = GetHtmlString(webClient, url); //三次都在超时就暂时不抓了 if (string.IsNullOrEmpty(HtmlString)) { return; } NSoup.Nodes.Document doc = NSoup.NSoupClient.Parse(HtmlString); links = doc.Select(".t_one h3 a"); foreach (var item in links) { string uri = hosturi + item.Attr("href"); //抓取图片的URI string text = FilterFloder(item.Text()); //将Title做为名称创建成文件夹,是否合法 int titleid = dal.CreateTitle(101, text); //添加类型到数据库 string path = downpath + titleid; //存放图片的文件夹 if (!string.IsNullOrEmpty(text) && !string.IsNullOrEmpty(path)) { //int titleid = dal.CreateTitle(101, text); //添加类型到数据库 CatchImgPutPath(uri, path,titleid); } return; //调试阶段不用执行那么多次 } }
/// <summary> /// 一个网址连续抓取三次,如果三次都抓取超时或者失败,就不抓了 /// </summary> /// <param name="webClient"></param> /// <param name="url"></param> /// <returns></returns> private string GetHtmlString(MyWebClient webClient,string url) { string result = ""; try { result = Encoding.GetEncoding("utf-8").GetString(webClient.DownloadData(url)); } catch (Exception er) { ExecuteNum++; if (ExecuteNum > 3) return ""; GetHtmlString(webClient, url); } return result; }