public async Task <List <string> > findImagePageUrl(string url) { string html = await HttpDownloader.DownloadHtmlPage(url); ParseResult result = await findNextPageLinkAndCurrentImagePages(html); List <string> imagePageList = new List <string>(); imagePageList.AddRange(result.ImagePageList); if (result.NextPage != null) { imagePageList.AddRange(await findImagePageUrl(result.NextPage)); } return(imagePageList); }
//private static string albumTitle; public async Task <List <ImagePageModel> > FindImagePageUrl(TaskItem taskItem) { //string url = taskItem.Url; //if (url[url.Length - 1] != '#') //{ // url += "#"; //} string html = await HttpDownloader.DownloadHtmlPage(taskItem.Url); IConfiguration config = Configuration.Default; IBrowsingContext context = BrowsingContext.New(config); IDocument document = await context.OpenAsync(response => response.Content(html)); //var result = document.All.Where(m => m.LocalName == "div" && m.ClassName == "preview_thumb"); //去页面中寻找Page元素,该元素包含当前漫画的总页数。 var result = document.All.Where(m => m.LocalName == "div" && m.ClassName == "pages").FirstOrDefault(); string pages = result.QuerySelector("h3").TextContent; Match pageNum = Regex.Match(pages, "[0-9]+"); int pageCount = Convert.ToInt32(pageNum.ToString()); //人工根据漫画页数构造每一页的URL List <ImagePageModel> imagePageModels = new List <ImagePageModel>(); string imagePageUrlBase = taskItem.Url.Replace("g", "gallery"); for (int i = 1; i <= pageCount; i++) { string imagePageUrl = Path.Combine(imagePageUrlBase, i.ToString()); imagePageModels.Add(new ImagePageModel(imagePageUrl, taskItem)); } return(imagePageModels); //if (result.Count() == 0) //{ // throw new TargetNotFindException("未找到div.preview_thumb元素"); //} //Debug.WriteLine("成功:" + "找到图片网页" + result.Count() + "个"); //List<ImagePageModel> imagePageModels = new List<ImagePageModel>(); //foreach (var res in result) //{ // string pageUrl = "https://asmhentai.com"+res.QuerySelector("a").GetAttribute("href"); // imagePageModels.Add(new ImagePageModel(pageUrl, taskItem)); //} //return imagePageModels; //return imagePageUrls.Select(link => "https://asmhentai.com" + link).ToList(); }
public async Task <List <ImageModel> > FindImageUrls(ImagePageModel imagePageModel) { string html = await HttpDownloader.DownloadHtmlPage(imagePageModel.ImagePageUrl); IConfiguration config = Configuration.Default; IBrowsingContext context = BrowsingContext.New(config); IDocument document = await context.OpenAsync(response => response.Content(html)); IEnumerable <IElement> urlResult = document.All.Where(m => m.LocalName == "img" && m.ClassName == "lazy no_image"); string imageUrl; if (urlResult.Count() == 0) { urlResult = document.All.Where(m => m.LocalName == "div" && m.ClassName == "image_1"); if (urlResult.Count() == 0) { Debug.WriteLine("失败:" + "解析失败,没有找到图片链接。"); throw new Exception("失败:" + "解析失败,没有找到图片链接。"); } else { imageUrl = "https://" + urlResult.First().QuerySelector("img").GetAttribute("src").Remove(0, 2); } } else { imageUrl = "https://" + urlResult.First().GetAttribute("src").Remove(0, 2); } string imageName = document.All.Where(m => m.LocalName == "title").First().Text(); ImageModel imageModel = new ImageModel(imagePageModel, imageName, imageUrl); return(new List <ImageModel>() { imageModel }); }
public async Task <List <ImageModel> > FindImageUrls(ImagePageModel imagePageModel) { string html = await HttpDownloader.DownloadHtmlPage(imagePageModel.ImagePageUrl); IConfiguration config = Configuration.Default; IBrowsingContext context = BrowsingContext.New(config); IDocument document = await context.OpenAsync(response => response.Content(html)); var urlResult = document.All.Where(m => m.LocalName == "img" && m.Id == "img"); string imageUrl = urlResult.First().GetAttribute("src"); var imageNumResult = document.All.Where(m => m.LocalName == "div" && m.ClassName == "sn"); string imageNum = imageNumResult.First().QuerySelector("div").QuerySelector("span").Text(); var titleResult = document.All.Where(m => m.LocalName == "div" && m.Id == "i1"); string title = titleResult.First().QuerySelector("h1").Text(); string imageName = title + "_" + imageNum; return(new List <ImageModel>() { new ImageModel(imagePageModel, imageName, imageUrl) }); }