Ejemplo n.º 1
0
 private void AddWebmshareUrl(TumblrApiJson document)
 {
     foreach (Post post in document.posts)
     {
         if (!PostWithinTimeSpan(post))
         {
             continue;
         }
         if (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase)))
         {
             if (CheckIfDownloadRebloggedPosts(post))
             {
                 Regex regex = webmshareParser.GetWebmshareUrlRegex();
                 foreach (Match match in regex.Matches(InlineSearch(post)))
                 {
                     string webmshareId = match.Groups[2].Value;
                     string imageUrl    = webmshareParser.CreateWebmshareUrl(webmshareId, blog.WebmshareType);
                     if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv")))
                     {
                         continue;
                     }
                     AddToDownloadList(new ExternalVideoPost(imageUrl, webmshareId,
                                                             post.unix_timestamp.ToString()));
                     AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(imageUrl.Split('/').Last(), ".json"), post));
                 }
             }
         }
     }
 }
Ejemplo n.º 2
0
        private async Task AddImgurUrl(TumblrApiJson document)
        {
            foreach (Post post in document.posts)
            {
                if (!PostWithinTimeSpan(post))
                {
                    continue;
                }
                if (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase)))
                {
                    if (CheckIfDownloadRebloggedPosts(post))
                    {
                        // single linked images
                        Regex regex = imgurParser.GetImgurImageRegex();
                        foreach (Match match in regex.Matches(InlineSearch(post)))
                        {
                            string imageUrl = match.Groups[1].Value;
                            string imgurId  = match.Groups[2].Value;
                            if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv")))
                            {
                                continue;
                            }
                            AddToDownloadList(new ExternalPhotoPost(imageUrl, imgurId,
                                                                    post.unix_timestamp.ToString()));
                            AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(imageUrl.Split('/').Last(), ".json"), post));
                        }

                        // album urls
                        regex = imgurParser.GetImgurAlbumRegex();
                        foreach (Match match in regex.Matches(InlineSearch(post)))
                        {
                            string albumUrl = match.Groups[1].Value;
                            string imgurId  = match.Groups[2].Value;
                            string album    = await imgurParser.RequestImgurAlbumSite(albumUrl);

                            Regex hashRegex   = imgurParser.GetImgurAlbumHashRegex();
                            var   hashMatches = hashRegex.Matches(album);
                            var   hashes      = hashMatches.Cast <Match>().Select(hashMatch => hashMatch.Groups[1].Value).ToList();

                            Regex extRegex   = imgurParser.GetImgurAlbumExtRegex();
                            var   extMatches = extRegex.Matches(album);
                            var   exts       = extMatches.Cast <Match>().Select(extMatch => extMatch.Groups[1].Value).ToList();

                            var imageUrls = hashes.Zip(exts, (hash, ext) => "https://i.imgur.com/" + hash + ext);

                            foreach (string imageUrl in imageUrls)
                            {
                                if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv")))
                                {
                                    continue;
                                }
                                AddToDownloadList(new ExternalPhotoPost(imageUrl, imgurId,
                                                                        post.unix_timestamp.ToString()));
                                AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(imageUrl.Split('/').Last(), ".json"), post));
                            }
                        }
                    }
                }
            }
        }
Ejemplo n.º 3
0
 private async Task AddGfycatUrl(TumblrApiJson document)
 {
     foreach (Post post in document.posts)
     {
         if (!PostWithinTimeSpan(post))
         {
             continue;
         }
         if (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase)))
         {
             if (CheckIfDownloadRebloggedPosts(post))
             {
                 Regex regex = gfycatParser.GetGfycatUrlRegex();
                 foreach (Match match in regex.Matches(InlineSearch(post)))
                 {
                     string gfyId    = match.Groups[2].Value;
                     string videoUrl = gfycatParser.ParseGfycatCajaxResponse(await gfycatParser.RequestGfycatCajax(gfyId),
                                                                             blog.GfycatType);
                     if (blog.SkipGif && (videoUrl.EndsWith(".gif") || videoUrl.EndsWith(".gifv")))
                     {
                         continue;
                     }
                     AddToDownloadList(new ExternalVideoPost(videoUrl, gfyId,
                                                             post.unix_timestamp.ToString()));
                     AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(videoUrl.Split('/').Last(), ".json"), post));
                 }
             }
         }
     }
 }
Ejemplo n.º 4
0
        private bool CheckPostAge(TumblrApiJson response)
        {
            ulong highestPostId = 0;

            ulong.TryParse(response.Posts.FirstOrDefault().Id,
                           out highestPostId);

            return(highestPostId >= GetLastPostId());
        }
Ejemplo n.º 5
0
        private bool CheckPostAge(TumblrApiJson response)
        {
            ulong highestPostId = 0;

            ulong.TryParse(response.posts.FirstOrDefault().id,
                           out highestPostId);

            if (highestPostId < GetLastPostId())
            {
                return(false);
            }
            return(true);
        }
Ejemplo n.º 6
0
        private bool CheckPostAge(TumblrApiJson response)
        {
            ulong highestPostId = 0;
            var   post          = response.Posts.FirstOrDefault();

            if (post == null)
            {
                return(false);
            }
            _ = ulong.TryParse(post.Id, out highestPostId);

            return(highestPostId >= GetLastPostId());
        }
Ejemplo n.º 7
0
        private async Task AddUrlsToDownloadListAsync(TumblrApiJson document)
        {
            var lastPostId = GetLastPostId();

            foreach (Post post in document.Posts)
            {
                if (CheckIfShouldStop())
                {
                    break;
                }
                CheckIfShouldPause();
                if (lastPostId > 0 && ulong.TryParse(post.Id, out var postId) && postId < lastPostId)
                {
                    continue;
                }
                if (!PostWithinTimeSpan(post))
                {
                    continue;
                }
                if (!CheckIfContainsTaggedPost(post))
                {
                    continue;
                }
                if (!CheckIfDownloadRebloggedPosts(post))
                {
                    continue;
                }

                try
                {
                    AddPhotoUrlToDownloadList(post);
                    AddVideoUrlToDownloadList(post);
                    AddAudioUrlToDownloadList(post);
                    AddTextUrlToDownloadList(post);
                    AddQuoteUrlToDownloadList(post);
                    AddLinkUrlToDownloadList(post);
                    AddConversationUrlToDownloadList(post);
                    AddAnswerUrlToDownloadList(post);
                    AddPhotoMetaUrlToDownloadList(post);
                    AddVideoMetaUrlToDownloadList(post);
                    AddAudioMetaUrlToDownloadList(post);
                    await AddExternalPhotoUrlToDownloadListAsync(post);
                }
                catch (NullReferenceException e)
                {
                    Logger.Verbose("TumblrBlogCrawler.AddUrlsToDownloadListAsync: {0}", e);
                }
            }
        }
Ejemplo n.º 8
0
 private async Task AddExternalPhotoUrlToDownloadList(TumblrApiJson document)
 {
     if (blog.DownloadImgur)
     {
         await AddImgurUrl(document);
     }
     if (blog.DownloadGfycat)
     {
         await AddGfycatUrl(document);
     }
     if (blog.DownloadWebmshare)
     {
         AddWebmshareUrl(document);
     }
 }
Ejemplo n.º 9
0
        private void AddPhotoUrlToDownloadList(TumblrApiJson document)
        {
            if (blog.DownloadPhoto)
            {
                foreach (Post post in document.posts)
                {
                    if (!PostWithinTimeSpan(post))
                    {
                        continue;
                    }
                    if (post.type == "photo" && (!tags.Any() ||
                                                 post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase))))
                    {
                        if (CheckIfDownloadRebloggedPosts(post))
                        {
                            AddPhotoUrl(post);
                            AddPhotoSetUrl(post);
                            if (post.photo_caption != null)
                            {
                                //var postCopy = (Post)post.Clone();
                                AddInlinePhotoUrl(post);
                            }
                        }
                    }
                }

                // check for inline images
                foreach (Post post in document.posts.Where(p => p.type != "photo"))
                {
                    if (!PostWithinTimeSpan(post))
                    {
                        continue;
                    }
                    if (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase)))
                    {
                        if (CheckIfDownloadRebloggedPosts(post))
                        {
                            AddInlinePhotoUrl(post);
                        }
                    }
                }
            }
        }
Ejemplo n.º 10
0
 private void AddAudioUrlToDownloadList(TumblrApiJson document)
 {
     if (blog.DownloadAudio)
     {
         foreach (Post post in document.posts)
         {
             if (!PostWithinTimeSpan(post))
             {
                 continue;
             }
             if (post.type == "audio" && (!tags.Any() ||
                                          post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase))))
             {
                 if (CheckIfDownloadRebloggedPosts(post))
                 {
                     AddAudioUrl(post);
                 }
             }
         }
     }
 }
Ejemplo n.º 11
0
 private async Task AddUrlsToDownloadList(TumblrApiJson document)
 {
     try
     {
         AddPhotoUrlToDownloadList(document);
         AddVideoUrlToDownloadList(document);
         AddAudioUrlToDownloadList(document);
         AddTextUrlToDownloadList(document);
         AddQuoteUrlToDownloadList(document);
         AddLinkUrlToDownloadList(document);
         AddConversationUrlToDownloadList(document);
         AddAnswerUrlToDownloadList(document);
         AddPhotoMetaUrlToDownloadList(document);
         AddVideoMetaUrlToDownloadList(document);
         AddAudioMetaUrlToDownloadList(document);
         await AddExternalPhotoUrlToDownloadList(document);
     }
     catch (NullReferenceException)
     {
     }
 }
Ejemplo n.º 12
0
        private async Task AddUrlsToDownloadListAsync(TumblrApiJson document)
        {
            foreach (Post post in document.Posts)
            {
                if (!PostWithinTimeSpan(post))
                {
                    continue;
                }

                if (!CheckIfContainsTaggedPost(post))
                {
                    continue;
                }

                if (!CheckIfDownloadRebloggedPosts(post))
                {
                    continue;
                }

                try
                {
                    AddPhotoUrlToDownloadList(post);
                    AddVideoUrlToDownloadList(post);
                    AddAudioUrlToDownloadList(post);
                    AddTextUrlToDownloadList(post);
                    AddQuoteUrlToDownloadList(post);
                    AddLinkUrlToDownloadList(post);
                    AddConversationUrlToDownloadList(post);
                    AddAnswerUrlToDownloadList(post);
                    AddPhotoMetaUrlToDownloadList(post);
                    AddVideoMetaUrlToDownloadList(post);
                    AddAudioMetaUrlToDownloadList(post);
                    await AddExternalPhotoUrlToDownloadListAsync(post);
                }
                catch (NullReferenceException)
                {
                }
            }
        }
Ejemplo n.º 13
0
 private void AddAudioMetaUrlToDownloadList(TumblrApiJson document)
 {
     if (blog.CreateAudioMeta)
     {
         foreach (Post post in document.posts)
         {
             if (!PostWithinTimeSpan(post))
             {
                 continue;
             }
             if (post.type == "audio" && (!tags.Any() ||
                                          post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase))))
             {
                 if (CheckIfDownloadRebloggedPosts(post))
                 {
                     string textBody = tumblrJsonParser.ParseAudioMeta(post);
                     AddToDownloadList(new AudioMetaPost(textBody, post.id));
                     AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(post.id, ".json"), post));
                 }
             }
         }
     }
 }