예제 #1
0
        /// <summary>
        /// Scrape chapter page to get URLs to all images
        /// </summary>
        /// <param name="chapterUrl"></param>
        /// <returns>List of image URLs</returns>
        public List <string> ScrapeImages(string chapterUrl)
        {
            List <string> images = new List <string>();

            var                web   = new HtmlWeb();
            HtmlDocument       doc   = web.Load(chapterUrl);
            HtmlNodeCollection nodes = doc.DocumentNode.ChildNodes;

            // Get all page numbers
            IEnumerable <HtmlNode> links = nodes.CssSelect("div.pages-list>a.page");
            List <string>          ids   = links.Select(node => node.InnerText).ToList();

            // Get current page image
            HtmlNode image      = nodes.CssSelect("div#image>img").First();
            string   imgSrc     = image.GetAttributeValue("src", "");
            var      splitSrc   = imgSrc.Split('/');
            string   baseImgSrc = string.Join("/", splitSrc.SkipLast(1));
            string   extension  = splitSrc.Last().Split('.').Last();

            foreach (var id in ids)
            {
                images.Add($"{baseUrl}{baseImgSrc}/{id}.{extension}");
            }

            return(images);
        }
예제 #2
0
        /// <summary>
        /// Scrape links to chapters of a given manga
        /// </summary>
        /// <param name="mangaUrl"></param>
        /// <returns>List of chapter URLs</returns>
        public List <string> ScrapeChapters(string mangaUrl)
        {
            var                web   = new HtmlWeb();
            HtmlDocument       doc   = web.Load(mangaUrl);
            HtmlNodeCollection nodes = doc.DocumentNode.ChildNodes;

            IEnumerable <HtmlNode> links = nodes.CssSelect("dl.chapter-list>dd>a");
            List <string>          hrefs = links.Select(node => node.GetAttributeValue("href", "")).ToList();

            hrefs = hrefs.Select(s => baseUrl + s).ToList();

            return(hrefs);
        }