public static void ImagePage(string Id, int pageIndex, Action <string, int> action, Action <Exception> exaction) { Task.Factory.StartNew(() => { try { string URL = "https://www.mzitu.com/" + Id + "/" + pageIndex; HTTP hTTP = new HTTP(); HttpItem httpItem = new HttpItem { Allowautoredirect = true, URL = URL, Referer = "https://www.mzitu.com/", Host = "www.mzitu.com" }; HttpResult response = hTTP.GetHtml(httpItem); string msg = response.Html; //msg是首页的html //<ul id="pins"> </ul> // 定义正则表达式用来匹配 标签 List <Image> list = new List <Image>(); int pageSize = 0; string img = ""; //<div class="main-image"><p><a href="http://www.mzitu.com/104746/2" ><img src="http://i.meizitu.net/2017/10/06c01.jpg" alt="性感美女温伊怡肉弹袭击 巨乳Q弹水嫩无比" /></a></p></ div > var matches = Regex.Matches(msg, "<div class=\"main-image\">[\\s\\S]*?</div>");//取出每个<tr> foreach (Match mc in matches) { string allText = mc.Groups[0].Value; var matchesItem = Regex.Matches(allText, "<img[\\s\\S]*?/>");//取出每个<tr> foreach (Match mcItem in matchesItem) { img = mcItem.Groups[0].Value.Split('\"')[1]; } } var page = Regex.Matches(msg, "<span>[\\s\\S]*?</span>"); foreach (Match mc in page) { if (mc.Groups[0] != null && mc.Groups[0].Value != null) { string allText = mc.Groups[0].Value; allText = allText.Replace("<span>", "").Replace("</span>", ""); if (allText.Length < 5) { int temp = Convert.ToInt32(allText); if (temp > pageSize) { pageSize = temp; } } } } DownImage(Id + "\\", Id + "_" + pageIndex + ".jpg", img, new Action <string>((path) => { action(path, pageSize); })); } catch (Exception ex)//全局错误-网络错误 操作错误 { exaction(ex); //MessageBox.Show("首页数据解析失败!"+ex.Message); } }); }
//http://www.mzitu.com/ /// <summary> /// 首页 /// </summary> public static void MainPage(int pageIndex, Action <List <Image>, int> action, Action <Exception> exaction) { Task.Factory.StartNew(() => { try { string URL = "https://www.mzitu.com/page/" + pageIndex + "/"; HTTP hTTP = new HTTP(); HttpItem httpItem = new HttpItem { Allowautoredirect = true, URL = URL, Referer = "https://www.mzitu.com/", Host = "www.mzitu.com" }; HttpResult response = hTTP.GetHtml(httpItem); //使用手册 //返回加密的GZIP //GZipStream g = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress); //StreamReader myStreamReader = new StreamReader(g, Encoding.GetEncoding("UTF-8")); string msg = response.Html; //msg是首页的html //<ul id="pins"> </ul> // 定义正则表达式用来匹配 标签 List <Image> list = new List <Image>(); int pageSize = 0; var matches = Regex.Matches(msg, "<ul id=\"pins\">[\\s\\S]*?</ul>");//取出每个<tr> foreach (Match mc in matches) { string allText = mc.Groups[0].Value; var matchesItem = Regex.Matches(allText, "<li>[\\s\\S]*?</li>");//取出每个<tr> foreach (Match mcItem in matchesItem) { Image image = new Image(); string ItemText = mcItem.Groups[0].Value; //<a href="http://www.mzitu.com/104369" target="_blank">雪千寻VS雪千紫</a> image.Name = Regex.Matches(ItemText, "<a[\\s\\S]*?</a>")[1].Value.Split('>')[1].Split('<')[0]; //<a href="http://www.mzitu.com/104369" image.Href = Regex.Matches(ItemText, "<a[\\s\\S]*?target=")[0].Value.Split('"')[1]; //<span class="time">2017-09-30</span> image.Time = Regex.Matches(ItemText, "time\"[\\s\\S]*?</span>")[0].Value.Split('>')[1].Split('<')[0]; //<span class="view">4,482次</span> //image.Count = Regex.Matches(ItemText, "view\"[\\s\\S]*?</span>")[0].Value.Split('>')[1].Split('<')[0]; image.Id = image.Href.Substring(image.Href.LastIndexOf("/") + 1); //先放置初始图片 //异步加载图片 image.ImageUrl = Regex.Matches(ItemText, "data-original=[\\s\\S]*?/>")[0].Value.Split('\'')[1]; DownImage(image.Href.Substring(image.Href.LastIndexOf('/')) + ".jpg", image.ImageUrl, new Action <string>((path) => { image.ImageUrl = path; })); list.Add(image); } } //</span>153<span class="meta-nav var page = Regex.Matches(msg, "/page/[\\s\\S]*?/"); foreach (Match mc in page) { if (mc.Groups[0] != null && mc.Groups[0].Value != null) { string allText = mc.Groups[0].Value; allText = allText.Replace("/page/", "").Replace("/", ""); int temp = Convert.ToInt32(allText); if (temp > pageSize) { pageSize = temp; } } } //<a class="page-numbers" //回传数据 action(list, pageSize); } catch (Exception ex)//全局错误-网络错误 操作错误 { exaction(ex); //MessageBox.Show("首页数据解析失败!"+ex.Message); } }); }
public void GetHtmlTest() { var res = HTTP.GetHtml("http://httpbin.org/html"); Assert.Equal("Herman Melville - Moby-Dick", res.GetElementsByTagName("h1")[0].TextContent); }