public static async Task<Tuple<InternetImage, List<InternetImage>>> Extract(string inputUrl) { var ret = new InternetImage() { ImageLink = inputUrl }; List<InternetImage> galleryUrls = null; var u = new Uri(inputUrl); if (u.Host == "m.imgur.com") { // Turn mobile page into normal page. var desktopU = new UriBuilder(u); desktopU.Host = "imgur.com"; u = desktopU.Uri; } if (u.Host == "imgur.com") { if (u.AbsolutePath.StartsWith("/a/")) { // Imgur albums. galleryUrls = new List<InternetImage>(); ret = null; var apArr = u.AbsolutePath.Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries); var albumBlogLayoutUrl = "http://" + u.Host + "/" + apArr[0] + "/" + apArr[1] + "/layout/blog"; var hc = new HttpClient(); string albumPage = null; try { albumPage = await hc.GetStringAsync(albumBlogLayoutUrl); } catch (Exception e) { Debug.WriteLine("Ex: {0}", e.Message); } if (albumPage != null) { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(albumPage); if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0 && htmlDoc.DocumentNode != null) { var imgDivs = htmlDoc.DocumentNode.Descendants().Where(n => n.Name == "div" && n.Attributes.FirstOrDefault(a => a.Name == "class" && a.Value == "image") != null); galleryUrls = imgDivs.Select(iDiv => { var imgNode = iDiv.Descendants().Where(n => n.Name == "img"); if (imgNode.Count() != 1) { Debug.WriteLine("Error in parsing"); } var imgLink = string.Format("http:{0}", imgNode.First().GetAttributeValue("src", "not found")); return new InternetImage() { ImageLink = imgLink, ThumbnailLink = GetThumbnailPathFromUrl(imgLink) }; }).ToList(); } } } else if (u.AbsolutePath.StartsWith("/gallery/")) { // TODO: parse gallery links. var a = 0; } else { // Imgur single image page. var imgLink = "http://i.imgur.com" + u.AbsolutePath; if (!u.AbsolutePath.Contains(".")) { imgLink += ".jpg"; } ret.ImageLink = imgLink; ret.ThumbnailLink = GetThumbnailPathFromUrl(imgLink); } } else if (u.Host == "i.imgur.com") { // Direct link to an image. ret.ThumbnailLink = GetThumbnailPathFromUrl(ret.ImageLink); } return Tuple.Create(ret, galleryUrls); }
public static async Task <Tuple <InternetImage, List <InternetImage> > > Extract(string inputUrl) { var ret = new InternetImage() { ImageLink = inputUrl }; List <InternetImage> galleryUrls = null; var u = new Uri(inputUrl); if (u.Host == "m.imgur.com") { // Turn mobile page into normal page. var desktopU = new UriBuilder(u); desktopU.Host = "imgur.com"; u = desktopU.Uri; } if (u.Host == "imgur.com") { if (u.AbsolutePath.StartsWith("/a/")) { // Imgur albums. galleryUrls = new List <InternetImage>(); ret = null; var apArr = u.AbsolutePath.Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries); var albumBlogLayoutUrl = "http://" + u.Host + "/" + apArr[0] + "/" + apArr[1] + "/layout/blog"; var hc = new HttpClient(); string albumPage = null; try { albumPage = await hc.GetStringAsync(albumBlogLayoutUrl); } catch (Exception e) { Debug.WriteLine("Ex: {0}", e.Message); } if (albumPage != null) { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(albumPage); if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0 && htmlDoc.DocumentNode != null) { var imgDivs = htmlDoc.DocumentNode.Descendants().Where(n => n.Name == "div" && n.Attributes.FirstOrDefault(a => a.Name == "class" && a.Value == "image") != null); galleryUrls = imgDivs.Select(iDiv => { var imgNode = iDiv.Descendants().Where(n => n.Name == "img"); if (imgNode.Count() != 1) { Debug.WriteLine("Error in parsing"); } var imgLink = string.Format("http:{0}", imgNode.First().GetAttributeValue("src", "not found")); return(new InternetImage() { ImageLink = imgLink, ThumbnailLink = GetThumbnailPathFromUrl(imgLink) }); }).ToList(); } } } else if (u.AbsolutePath.StartsWith("/gallery/")) { // TODO: parse gallery links. var a = 0; } else { // Imgur single image page. var imgLink = "http://i.imgur.com" + u.AbsolutePath; if (!u.AbsolutePath.Contains(".")) { imgLink += ".jpg"; } ret.ImageLink = imgLink; ret.ThumbnailLink = GetThumbnailPathFromUrl(imgLink); } } else if (u.Host == "i.imgur.com") { // Direct link to an image. ret.ThumbnailLink = GetThumbnailPathFromUrl(ret.ImageLink); } return(Tuple.Create(ret, galleryUrls)); }