Example #1
0
        public static async Task<Tuple<InternetImage, List<InternetImage>>> Extract(string inputUrl)
        {
            var ret = new InternetImage() { ImageLink = inputUrl };
            List<InternetImage> galleryUrls = null;

            var u = new Uri(inputUrl);
            if (u.Host == "m.imgur.com")
            {
                // Turn mobile page into normal page.
                var desktopU = new UriBuilder(u);
                desktopU.Host = "imgur.com";
                u = desktopU.Uri;
            }

            if (u.Host == "imgur.com")
            {
                if (u.AbsolutePath.StartsWith("/a/"))
                {
                    // Imgur albums.
                    galleryUrls = new List<InternetImage>();
                    ret = null;

                    var apArr = u.AbsolutePath.Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries);

                    var albumBlogLayoutUrl = "http://" + u.Host + "/" + apArr[0] + "/" + apArr[1] + "/layout/blog";

                    var hc = new HttpClient();
                    string albumPage = null;
                    try
                    {
                        albumPage = await hc.GetStringAsync(albumBlogLayoutUrl);
                    }
                    catch (Exception e)
                    {
                        Debug.WriteLine("Ex: {0}", e.Message);
                    }
                    if (albumPage != null)
                    {
                        var htmlDoc = new HtmlDocument();
                        htmlDoc.LoadHtml(albumPage);
                        if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0 && htmlDoc.DocumentNode != null)
                        {
                            var imgDivs = htmlDoc.DocumentNode.Descendants().Where(n => n.Name == "div" && n.Attributes.FirstOrDefault(a => a.Name == "class" && a.Value == "image") != null);
                            galleryUrls = imgDivs.Select(iDiv =>
                            {
                                var imgNode = iDiv.Descendants().Where(n => n.Name == "img");
                                if (imgNode.Count() != 1)
                                {
                                    Debug.WriteLine("Error in parsing");
                                }

                                var imgLink = string.Format("http:{0}", imgNode.First().GetAttributeValue("src", "not found"));
                                return new InternetImage()
                                {
                                    ImageLink = imgLink,
                                    ThumbnailLink = GetThumbnailPathFromUrl(imgLink)
                                };
                            }).ToList();
                        }
                    }
                }
                else if (u.AbsolutePath.StartsWith("/gallery/"))
                {
                    // TODO: parse gallery links.
                    var a = 0;
                }
                else
                {
                    // Imgur single image page.
                    var imgLink = "http://i.imgur.com" + u.AbsolutePath;
                    if (!u.AbsolutePath.Contains("."))
                    {
                        imgLink += ".jpg";
                    }
                    ret.ImageLink = imgLink;
                    ret.ThumbnailLink = GetThumbnailPathFromUrl(imgLink);
                }
            }
            else if (u.Host == "i.imgur.com")
            {
                // Direct link to an image.
                ret.ThumbnailLink = GetThumbnailPathFromUrl(ret.ImageLink);
            }

            return Tuple.Create(ret, galleryUrls);
        }
Example #2
0
        public static async Task <Tuple <InternetImage, List <InternetImage> > > Extract(string inputUrl)
        {
            var ret = new InternetImage()
            {
                ImageLink = inputUrl
            };
            List <InternetImage> galleryUrls = null;

            var u = new Uri(inputUrl);

            if (u.Host == "m.imgur.com")
            {
                // Turn mobile page into normal page.
                var desktopU = new UriBuilder(u);
                desktopU.Host = "imgur.com";
                u             = desktopU.Uri;
            }

            if (u.Host == "imgur.com")
            {
                if (u.AbsolutePath.StartsWith("/a/"))
                {
                    // Imgur albums.
                    galleryUrls = new List <InternetImage>();
                    ret         = null;

                    var apArr = u.AbsolutePath.Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries);

                    var albumBlogLayoutUrl = "http://" + u.Host + "/" + apArr[0] + "/" + apArr[1] + "/layout/blog";

                    var    hc        = new HttpClient();
                    string albumPage = null;
                    try
                    {
                        albumPage = await hc.GetStringAsync(albumBlogLayoutUrl);
                    }
                    catch (Exception e)
                    {
                        Debug.WriteLine("Ex: {0}", e.Message);
                    }
                    if (albumPage != null)
                    {
                        var htmlDoc = new HtmlDocument();
                        htmlDoc.LoadHtml(albumPage);
                        if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0 && htmlDoc.DocumentNode != null)
                        {
                            var imgDivs = htmlDoc.DocumentNode.Descendants().Where(n => n.Name == "div" && n.Attributes.FirstOrDefault(a => a.Name == "class" && a.Value == "image") != null);
                            galleryUrls = imgDivs.Select(iDiv =>
                            {
                                var imgNode = iDiv.Descendants().Where(n => n.Name == "img");
                                if (imgNode.Count() != 1)
                                {
                                    Debug.WriteLine("Error in parsing");
                                }

                                var imgLink = string.Format("http:{0}", imgNode.First().GetAttributeValue("src", "not found"));
                                return(new InternetImage()
                                {
                                    ImageLink = imgLink,
                                    ThumbnailLink = GetThumbnailPathFromUrl(imgLink)
                                });
                            }).ToList();
                        }
                    }
                }
                else if (u.AbsolutePath.StartsWith("/gallery/"))
                {
                    // TODO: parse gallery links.
                    var a = 0;
                }
                else
                {
                    // Imgur single image page.
                    var imgLink = "http://i.imgur.com" + u.AbsolutePath;
                    if (!u.AbsolutePath.Contains("."))
                    {
                        imgLink += ".jpg";
                    }
                    ret.ImageLink     = imgLink;
                    ret.ThumbnailLink = GetThumbnailPathFromUrl(imgLink);
                }
            }
            else if (u.Host == "i.imgur.com")
            {
                // Direct link to an image.
                ret.ThumbnailLink = GetThumbnailPathFromUrl(ret.ImageLink);
            }

            return(Tuple.Create(ret, galleryUrls));
        }