Esempio n. 1
0
 private async Task AddExternalPhotoUrlToDownloadList(XContainer document)
 {
     if (blog.DownloadImgur)
     {
         foreach (XElement post in document.Descendants("post"))
         {
             if (!PostWithinTimeSpan(post))
             {
                 continue;
             }
             if (!tags.Any() || post.Descendants("tag").Any(x => tags.Contains(x.Value,
                                                                               StringComparer.OrdinalIgnoreCase)))
             {
                 if (CheckIfDownloadRebloggedPosts(post))
                 {
                     Regex regex = imgurParser.GetImgurUrlRegex();
                     foreach (Match match in regex.Matches(post.Value))
                     {
                         string imageUrl = match.Groups[1].Value;
                         string imgurId  = match.Groups[2].Value;
                         if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv")))
                         {
                             continue;
                         }
                         // TODO: postID
                         AddToDownloadList(new TumblrPost(PostTypes.Photo, imageUrl, imgurId,
                                                          post.Attribute("unix-timestamp").Value));
                     }
                 }
             }
         }
     }
     if (blog.DownloadGfycat)
     {
         foreach (XElement post in document.Descendants("post"))
         {
             if (!PostWithinTimeSpan(post))
             {
                 continue;
             }
             if (!tags.Any() || post.Descendants("tag").Any(x => tags.Contains(x.Value,
                                                                               StringComparer.OrdinalIgnoreCase)))
             {
                 if (CheckIfDownloadRebloggedPosts(post))
                 {
                     Regex regex = gfycatParser.GetGfycatUrlRegex();
                     foreach (Match match in regex.Matches(post.Value))
                     {
                         string gfyId    = match.Groups[2].Value;
                         string videoUrl = gfycatParser.ParseGfycatCajaxResponse(await gfycatParser.RequestGfycatCajax(gfyId),
                                                                                 blog.GfycatType);
                         if (blog.SkipGif && (videoUrl.EndsWith(".gif") || videoUrl.EndsWith(".gifv")))
                         {
                             continue;
                         }
                         // TODO: postID
                         AddToDownloadList(new TumblrPost(PostTypes.Video, videoUrl, gfyId,
                                                          post.Attribute("unix-timestamp").Value));
                     }
                 }
             }
         }
     }
     if (blog.DownloadWebmshare)
     {
         foreach (XElement post in document.Descendants("post"))
         {
             if (!PostWithinTimeSpan(post))
             {
                 continue;
             }
             if (!tags.Any() || post.Descendants("tag").Any(x => tags.Contains(x.Value,
                                                                               StringComparer.OrdinalIgnoreCase)))
             {
                 if (CheckIfDownloadRebloggedPosts(post))
                 {
                     Regex regex = webmshareParser.GetWebmshareUrlRegex();
                     foreach (Match match in regex.Matches(post.Value))
                     {
                         string webmshareId = match.Groups[2].Value;
                         string imageUrl    = webmshareParser.CreateWebmshareUrl(webmshareId, blog.WebmshareType);
                         if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv")))
                         {
                             continue;
                         }
                         // TODO: postID
                         AddToDownloadList(new TumblrPost(PostTypes.Photo, imageUrl, webmshareId,
                                                          post.Attribute("unix-timestamp").Value));
                     }
                 }
             }
         }
     }
 }
Esempio n. 2
0
 private async Task DownloadGfycat(XContainer document)
 {
     foreach (XElement post in document.Descendants("post"))
     {
         if (!PostWithinTimeSpan(post))
         {
             continue;
         }
         if (!tags.Any() || post.Descendants("tag").Any(x => tags.Contains(x.Value,
                                                                           StringComparer.OrdinalIgnoreCase)))
         {
             if (CheckIfDownloadRebloggedPosts(post))
             {
                 Regex regex = gfycatParser.GetGfycatUrlRegex();
                 foreach (Match match in regex.Matches(post.Value))
                 {
                     string gfyId    = match.Groups[2].Value;
                     string videoUrl = gfycatParser.ParseGfycatCajaxResponse(await gfycatParser.RequestGfycatCajax(gfyId),
                                                                             blog.GfycatType);
                     if (blog.SkipGif && (videoUrl.EndsWith(".gif") || videoUrl.EndsWith(".gifv")))
                     {
                         continue;
                     }
                     AddToDownloadList(new ExternalVideoPost(videoUrl, gfyId,
                                                             post.Attribute("unix-timestamp").Value));
                     AddToXmlQueue(new TumblrCrawlerXmlData(Path.ChangeExtension(videoUrl.Split('/').Last(), ".xml"), post));
                 }
             }
         }
     }
 }
Esempio n. 3
0
 private async Task DownloadGfycat(TumblrJson document)
 {
     foreach (Post post in document.response.posts)
     {
         if (!PostWithinTimeSpan(post))
         {
             continue;
         }
         if (!tags.Any() || post.tags.Intersect(tags, StringComparer.OrdinalIgnoreCase).Any())
         {
             if (CheckIfDownloadRebloggedPosts(post))
             {
                 Regex regex = gfycatParser.GetGfycatUrlRegex();
                 foreach (Match match in regex.Matches(post.caption))
                 {
                     string gfyId    = match.Groups[2].Value;
                     string videoUrl = gfycatParser.ParseGfycatCajaxResponse(await gfycatParser.RequestGfycatCajax(gfyId),
                                                                             blog.GfycatType);
                     if (blog.SkipGif && (videoUrl.EndsWith(".gif") || videoUrl.EndsWith(".gifv")))
                     {
                         continue;
                     }
                     // TODO: postID
                     AddToDownloadList(new VideoPost(videoUrl, gfyId,
                                                     post.timestamp.ToString()));
                     AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(videoUrl.Split('/').Last(), ".json"), post));
                 }
             }
         }
     }
 }
Esempio n. 4
0
 private async Task AddExternalPhotoUrlToDownloadList(TumblrJson document)
 {
     if (blog.DownloadImgur)
     {
         foreach (Post post in document.response.posts)
         {
             if (!PostWithinTimeSpan(post))
             {
                 continue;
             }
             if (!tags.Any() || post.tags.Intersect(tags, StringComparer.OrdinalIgnoreCase).Any())
             {
                 if (CheckIfDownloadRebloggedPosts(post))
                 {
                     Regex regex = imgurParser.GetImgurUrlRegex();
                     foreach (Match match in regex.Matches(post.ToString()))
                     {
                         string imageUrl = match.Groups[1].Value;
                         string imgurId  = match.Groups[2].Value;
                         if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv")))
                         {
                             continue;
                         }
                         // TODO: postID
                         AddToDownloadList(new TumblrPost(PostTypes.Photo, imageUrl, Guid.NewGuid().ToString("N"),
                                                          post.timestamp.ToString()));
                     }
                 }
             }
         }
     }
     if (blog.DownloadGfycat)
     {
         foreach (Post post in document.response.posts)
         {
             if (!PostWithinTimeSpan(post))
             {
                 continue;
             }
             if (!tags.Any() || post.tags.Intersect(tags, StringComparer.OrdinalIgnoreCase).Any())
             {
                 if (CheckIfDownloadRebloggedPosts(post))
                 {
                     Regex regex = gfycatParser.GetGfycatUrlRegex();
                     foreach (Match match in regex.Matches(post.ToString()))
                     {
                         string gfyId    = match.Groups[2].Value;
                         string videoUrl = gfycatParser.ParseGfycatCajaxResponse(await gfycatParser.RequestGfycatCajax(gfyId),
                                                                                 blog.GfycatType);
                         if (blog.SkipGif && (videoUrl.EndsWith(".gif") || videoUrl.EndsWith(".gifv")))
                         {
                             continue;
                         }
                         // TODO: postID
                         AddToDownloadList(new TumblrPost(PostTypes.Video, videoUrl, gfyId,
                                                          post.timestamp.ToString()));
                     }
                 }
             }
         }
     }
     if (blog.DownloadWebmshare)
     {
         foreach (Post post in document.response.posts)
         {
             if (!PostWithinTimeSpan(post))
             {
                 continue;
             }
             if (!tags.Any() || post.tags.Intersect(tags, StringComparer.OrdinalIgnoreCase).Any())
             {
                 if (CheckIfDownloadRebloggedPosts(post))
                 {
                     var regex = webmshareParser.GetWebmshareUrlRegex();
                     foreach (Match match in regex.Matches(post.ToString()))
                     {
                         string webmshareId = match.Groups[2].Value;
                         string imageUrl    = webmshareParser.CreateWebmshareUrl(webmshareId, blog.WebmshareType);
                         if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv")))
                         {
                             continue;
                         }
                         // TODO: postID
                         AddToDownloadList(new TumblrPost(PostTypes.Video, imageUrl, webmshareId,
                                                          post.timestamp.ToString()));
                     }
                 }
             }
         }
     }
 }