Exemplo n.º 1
0
        public async Task <CADComic> GetComicAsync()
        {
            if (rateLimiter.IsRatelimited())
            {
                return(null);
            }

            (var doc, var redirect) = await HttpWebClient.ScrapeUrlAsync(new Uri("https://cad-comic.com/random")).ConfigureAwait(false);

            var html        = doc.DocumentNode.ChildNodes.FirstOrDefault(x => x.Name == "html");
            var body        = html.ChildNodes.FirstOrDefault(x => x.Name == "body");
            var container   = body.ChildNodes.Where(x => x.Attributes.Contains("class") && x.Attributes["class"].Value.Contains("container")).FirstOrDefault(z => z.ChildNodes.Any(x => x.Attributes.Contains("class") && x.Attributes["class"].Value.Contains("col-md-8 main-content")));
            var container2  = container.ChildNodes.FirstOrDefault(x => x.Attributes.Contains("class") && x.Attributes["class"].Value.Contains("col-md-8 main-content"));
            var content     = container2.ChildNodes.FirstOrDefault(x => x.Name == "article");
            var comicview   = content.ChildNodes.FirstOrDefault(x => x.HasClass("comicpage"));
            var comicholder = comicview.ChildNodes.LastOrDefault(x => x.Name == "a");

            var comicmeta = content.ChildNodes.FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value == "comicblog").ChildNodes.FirstOrDefault(x => x.HasClass("blog-wrap"));

            var comicinfo = comicmeta.ChildNodes.FirstOrDefault(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "blog-meta-wrap");

            var comicurl = comicinfo.ChildNodes.FirstOrDefault(x => x.Name == "a");

            var comicdate = comicinfo.ChildNodes.FirstOrDefault(x => x.Name == "p").InnerText.Replace(" by Tim", "");

            return(new CADComic
            {
                ImageURL = comicholder.FirstChild.Attributes["src"].Value,
                Title = comicurl.InnerText,
                Uploaded = DateTime.Parse(comicdate).ToString("dd'/'MM'/'yyyy"),
                URL = comicurl.Attributes["href"].Value
            });
        }
Exemplo n.º 2
0
        public async Task <CAHComic> GetComicAsync()
        {
            if (rateLimiter.IsRatelimited())
            {
                return(null);
            }

            (var doc, var redirect) = await HttpWebClient.ScrapeUrlAsync(new Uri("http://explosm.net/comics/random")).ConfigureAwait(false);

            var html = doc.DocumentNode.ChildNodes.FirstOrDefault(x => x.Name == "html");
            var body = html.ChildNodes.FirstOrDefault(x => x.Name == "body");

            var basepagecontent = body.ChildNodes
                                  .FirstOrDefault(x => x.Attributes.Contains("class") && x.Attributes["class"].Value.Contains("off-canvas-wrap")).ChildNodes
                                  .FirstOrDefault(x => x.Attributes.Contains("class") && x.Attributes["class"].Value.Contains("inner-wrap"));

            var page = basepagecontent.ChildNodes
                       .FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value.Contains("main-content"));

            var comicarea = page.ChildNodes
                            .FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value.Contains("main-left")).ChildNodes
                            .FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value.Contains("comic-area"));

            var comicwrap = comicarea.ChildNodes
                            .FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value.Contains("comic-wrap"));

            var comicimageurl = "http:" + comicwrap.ChildNodes.FirstOrDefault(x => x.Attributes.Contains("id") &&
                                                                              x.Attributes["id"].Value.Contains("main-comic") &&
                                                                              x.Attributes.Contains("src")).Attributes["src"].Value;

            var comicinfo = comicarea.ChildNodes
                            .FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value.Contains("comic-under")).ChildNodes
                            .FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value.Contains("comic-info"));

            var authorwrap = comicinfo.ChildNodes
                             .FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value.Contains("comic-info-text"));

            var authorblock = authorwrap.ChildNodes
                              .FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value.Contains("comic-author"));

            var socialblock = authorwrap.ChildNodes
                              .FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value.Contains("comic-social"));

            var avatarblock = comicinfo.ChildNodes
                              .FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value.Contains("comic-avatar"));

            var avatar = avatarblock.ChildNodes.FirstOrDefault(x => x.Name.Contains("img") && x.Attributes.Contains("src"));

            var author = authorblock.ChildNodes.FirstOrDefault(x => x.InnerText.Contains("by")).InnerText;

            author = Regex.Replace(author, @"\r\n?|\n", "");

            var comicurl = socialblock.ChildNodes
                           .FirstOrDefault(
                x =>
                x.Attributes.Contains("id") &&
                x.Attributes["id"].Value.Contains("comic-social-link") &&
                x.Attributes.Contains("href")
                )
                           .Attributes["href"].Value;

            var authorurl = "http://explosm.net" + avatarblock.Attributes["href"].Value;
            var avatarurl = "http:" + avatar.Attributes["src"].Value;

            return(new CAHComic
            {
                Author = author,
                ImageURL = comicimageurl,
                AuthorAvatar = avatarurl,
                AuthorURL = authorurl,
                URL = comicurl
            });
        }