Beispiel #1
0
        public static void ImagePage(string Id, int pageIndex, Action <string, int> action, Action <Exception> exaction)
        {
            Task.Factory.StartNew(() =>
            {
                try
                {
                    string URL = "https://www.mzitu.com/" + Id + "/" + pageIndex;

                    HTTP hTTP         = new HTTP();
                    HttpItem httpItem = new HttpItem
                    {
                        Allowautoredirect = true,
                        URL     = URL,
                        Referer = "https://www.mzitu.com/",
                        Host    = "www.mzitu.com"
                    };
                    HttpResult response = hTTP.GetHtml(httpItem);
                    string msg          = response.Html;



                    //msg是首页的html
                    //<ul id="pins">  </ul>
                    // 定义正则表达式用来匹配 标签
                    List <Image> list = new List <Image>();
                    int pageSize      = 0;
                    string img        = "";
                    //<div class="main-image"><p><a href="http://www.mzitu.com/104746/2" ><img src="http://i.meizitu.net/2017/10/06c01.jpg" alt="性感美女温伊怡肉弹袭击 巨乳Q弹水嫩无比" /></a></p></ div >
                    var matches = Regex.Matches(msg, "<div class=\"main-image\">[\\s\\S]*?</div>");//取出每个<tr>
                    foreach (Match mc in matches)
                    {
                        string allText = mc.Groups[0].Value;

                        var matchesItem = Regex.Matches(allText, "<img[\\s\\S]*?/>");//取出每个<tr>
                        foreach (Match mcItem in matchesItem)
                        {
                            img = mcItem.Groups[0].Value.Split('\"')[1];
                        }
                    }


                    var page = Regex.Matches(msg, "<span>[\\s\\S]*?</span>");
                    foreach (Match mc in page)
                    {
                        if (mc.Groups[0] != null && mc.Groups[0].Value != null)
                        {
                            string allText = mc.Groups[0].Value;

                            allText = allText.Replace("<span>", "").Replace("</span>", "");
                            if (allText.Length < 5)
                            {
                                int temp = Convert.ToInt32(allText);
                                if (temp > pageSize)
                                {
                                    pageSize = temp;
                                }
                            }
                        }
                    }


                    DownImage(Id + "\\", Id + "_" + pageIndex + ".jpg", img, new Action <string>((path) =>
                    {
                        action(path, pageSize);
                    }));
                }
                catch (Exception ex)//全局错误-网络错误 操作错误
                {
                    exaction(ex);
                    //MessageBox.Show("首页数据解析失败!"+ex.Message);
                }
            });
        }
Beispiel #2
0
        //http://www.mzitu.com/
        /// <summary>
        /// 首页
        /// </summary>
        public static void MainPage(int pageIndex, Action <List <Image>, int> action, Action <Exception> exaction)
        {
            Task.Factory.StartNew(() =>
            {
                try
                {
                    string URL = "https://www.mzitu.com/page/" + pageIndex + "/";
                    HTTP hTTP  = new HTTP();

                    HttpItem httpItem = new HttpItem
                    {
                        Allowautoredirect = true,
                        URL     = URL,
                        Referer = "https://www.mzitu.com/",
                        Host    = "www.mzitu.com"
                    };
                    HttpResult response = hTTP.GetHtml(httpItem);

                    //使用手册
                    //返回加密的GZIP
                    //GZipStream g = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress);
                    //StreamReader myStreamReader = new StreamReader(g, Encoding.GetEncoding("UTF-8"));
                    string msg = response.Html;

                    //msg是首页的html
                    //<ul id="pins">  </ul>
                    // 定义正则表达式用来匹配 标签
                    List <Image> list = new List <Image>();
                    int pageSize      = 0;
                    var matches       = Regex.Matches(msg, "<ul id=\"pins\">[\\s\\S]*?</ul>");//取出每个<tr>
                    foreach (Match mc in matches)
                    {
                        string allText = mc.Groups[0].Value;

                        var matchesItem = Regex.Matches(allText, "<li>[\\s\\S]*?</li>");//取出每个<tr>
                        foreach (Match mcItem in matchesItem)
                        {
                            Image image     = new Image();
                            string ItemText = mcItem.Groups[0].Value;
                            //<a href="http://www.mzitu.com/104369" target="_blank">雪千寻VS雪千紫</a>
                            image.Name = Regex.Matches(ItemText, "<a[\\s\\S]*?</a>")[1].Value.Split('>')[1].Split('<')[0];
                            //<a href="http://www.mzitu.com/104369"
                            image.Href = Regex.Matches(ItemText, "<a[\\s\\S]*?target=")[0].Value.Split('"')[1];
                            //<span class="time">2017-09-30</span>
                            image.Time = Regex.Matches(ItemText, "time\"[\\s\\S]*?</span>")[0].Value.Split('>')[1].Split('<')[0];
                            //<span class="view">4,482次</span>
                            //image.Count = Regex.Matches(ItemText, "view\"[\\s\\S]*?</span>")[0].Value.Split('>')[1].Split('<')[0];

                            image.Id = image.Href.Substring(image.Href.LastIndexOf("/") + 1);

                            //先放置初始图片 //异步加载图片
                            image.ImageUrl = Regex.Matches(ItemText, "data-original=[\\s\\S]*?/>")[0].Value.Split('\'')[1];

                            DownImage(image.Href.Substring(image.Href.LastIndexOf('/')) + ".jpg", image.ImageUrl, new Action <string>((path) =>
                            {
                                image.ImageUrl = path;
                            }));
                            list.Add(image);
                        }
                    }
                    //</span>153<span class="meta-nav
                    var page = Regex.Matches(msg, "/page/[\\s\\S]*?/");
                    foreach (Match mc in page)
                    {
                        if (mc.Groups[0] != null && mc.Groups[0].Value != null)
                        {
                            string allText = mc.Groups[0].Value;
                            allText        = allText.Replace("/page/", "").Replace("/", "");
                            int temp       = Convert.ToInt32(allText);
                            if (temp > pageSize)
                            {
                                pageSize = temp;
                            }
                        }
                    }
                    //<a class="page-numbers"
                    //回传数据
                    action(list, pageSize);
                }
                catch (Exception ex)//全局错误-网络错误 操作错误
                {
                    exaction(ex);
                    //MessageBox.Show("首页数据解析失败!"+ex.Message);
                }
            });
        }
        public void GetHtmlTest()
        {
            var res = HTTP.GetHtml("http://httpbin.org/html");

            Assert.Equal("Herman Melville - Moby-Dick", res.GetElementsByTagName("h1")[0].TextContent);
        }