public async Task <List <ImageModel> > FindImageUrls(ImagePageModel imagePageModel) { string html = await HttpDownloader.DownloadHtmlPage(imagePageModel.ImagePageUrl); IBrowsingContext context = BrowsingContext.New(); IDocument document = await context.OpenAsync(response => response.Content(html)); List <ImageModel> imageModels = new List <ImageModel>(); IElement titleElement = document.QuerySelector("title"); string albumTitle = titleElement?.InnerHtml; IElement divElement = document.QuerySelector("div.picture"); var imageElements = divElement.QuerySelectorAll("img"); if (imageElements == null || imageElements.Count() == 0) { Debug.WriteLine("在图片页面hmtl中没有匹配到图片url元素: " + imagePageModel.ImagePageUrl); throw new Exception("在图片页面hmtl中没有匹配到图片url元素"); } string pageNum = ""; IElement pageNumElement = document.QuerySelector("div.paging"); pageNum = pageNumElement?.QuerySelector("b")?.InnerHtml; int index = 0; foreach (IElement imgElement in imageElements) { string imageName; string url = imgElement.GetAttribute("src"); if (index > 0) { imageName = albumTitle + "_" + pageNum + "_" + index; } else { imageName = albumTitle + "_" + pageNum; } ImageModel imageModel = new ImageModel(imagePageModel, imageName, url); index++; Debug.WriteLine("成功:找到图片" + imageModel.ImageUrl + imageModel.ImageName + imageModel.ImagePage.ImagePageUrl); imageModels.Add(imageModel); } return(imageModels); }
public async Task <List <ImageModel> > FindImageUrls(ImagePageModel imagePageModel) { string html = await HttpDownloader.DownloadHtmlPage(imagePageModel.ImagePageUrl); IConfiguration config = Configuration.Default; IBrowsingContext context = BrowsingContext.New(config); IDocument document = await context.OpenAsync(response => response.Content(html)); IEnumerable <IElement> urlResult = document.All.Where(m => m.LocalName == "img" && m.ClassName == "lazy no_image"); string imageUrl; if (urlResult.Count() == 0) { urlResult = document.All.Where(m => m.LocalName == "div" && m.ClassName == "image_1"); if (urlResult.Count() == 0) { Debug.WriteLine("失败:" + "解析失败,没有找到图片链接。"); throw new Exception("失败:" + "解析失败,没有找到图片链接。"); } else { imageUrl = "https://" + urlResult.First().QuerySelector("img").GetAttribute("src").Remove(0, 2); } } else { imageUrl = "https://" + urlResult.First().GetAttribute("src").Remove(0, 2); } string imageName = document.All.Where(m => m.LocalName == "title").First().Text(); ImageModel imageModel = new ImageModel(imagePageModel, imageName, imageUrl); return(new List <ImageModel>() { imageModel }); }
public async Task <List <ImageModel> > FindImageUrls(ImagePageModel imagePageModel) { string html = await HttpDownloader.DownloadHtmlPage(imagePageModel.ImagePageUrl); IConfiguration config = Configuration.Default; IBrowsingContext context = BrowsingContext.New(config); IDocument document = await context.OpenAsync(response => response.Content(html)); var urlResult = document.All.Where(m => m.LocalName == "img" && m.Id == "img"); string imageUrl = urlResult.First().GetAttribute("src"); var imageNumResult = document.All.Where(m => m.LocalName == "div" && m.ClassName == "sn"); string imageNum = imageNumResult.First().QuerySelector("div").QuerySelector("span").Text(); var titleResult = document.All.Where(m => m.LocalName == "div" && m.Id == "i1"); string title = titleResult.First().QuerySelector("h1").Text(); string imageName = title + "_" + imageNum; return(new List <ImageModel>() { new ImageModel(imagePageModel, imageName, imageUrl) }); }