private void AddWebmshareUrl(TumblrApiJson document) { foreach (Post post in document.posts) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase))) { if (CheckIfDownloadRebloggedPosts(post)) { Regex regex = webmshareParser.GetWebmshareUrlRegex(); foreach (Match match in regex.Matches(InlineSearch(post))) { string webmshareId = match.Groups[2].Value; string imageUrl = webmshareParser.CreateWebmshareUrl(webmshareId, blog.WebmshareType); if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv"))) { continue; } AddToDownloadList(new ExternalVideoPost(imageUrl, webmshareId, post.unix_timestamp.ToString())); AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(imageUrl.Split('/').Last(), ".json"), post)); } } } } }
private async Task AddImgurUrl(TumblrApiJson document) { foreach (Post post in document.posts) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase))) { if (CheckIfDownloadRebloggedPosts(post)) { // single linked images Regex regex = imgurParser.GetImgurImageRegex(); foreach (Match match in regex.Matches(InlineSearch(post))) { string imageUrl = match.Groups[1].Value; string imgurId = match.Groups[2].Value; if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv"))) { continue; } AddToDownloadList(new ExternalPhotoPost(imageUrl, imgurId, post.unix_timestamp.ToString())); AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(imageUrl.Split('/').Last(), ".json"), post)); } // album urls regex = imgurParser.GetImgurAlbumRegex(); foreach (Match match in regex.Matches(InlineSearch(post))) { string albumUrl = match.Groups[1].Value; string imgurId = match.Groups[2].Value; string album = await imgurParser.RequestImgurAlbumSite(albumUrl); Regex hashRegex = imgurParser.GetImgurAlbumHashRegex(); var hashMatches = hashRegex.Matches(album); var hashes = hashMatches.Cast <Match>().Select(hashMatch => hashMatch.Groups[1].Value).ToList(); Regex extRegex = imgurParser.GetImgurAlbumExtRegex(); var extMatches = extRegex.Matches(album); var exts = extMatches.Cast <Match>().Select(extMatch => extMatch.Groups[1].Value).ToList(); var imageUrls = hashes.Zip(exts, (hash, ext) => "https://i.imgur.com/" + hash + ext); foreach (string imageUrl in imageUrls) { if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv"))) { continue; } AddToDownloadList(new ExternalPhotoPost(imageUrl, imgurId, post.unix_timestamp.ToString())); AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(imageUrl.Split('/').Last(), ".json"), post)); } } } } } }
private async Task AddGfycatUrl(TumblrApiJson document) { foreach (Post post in document.posts) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase))) { if (CheckIfDownloadRebloggedPosts(post)) { Regex regex = gfycatParser.GetGfycatUrlRegex(); foreach (Match match in regex.Matches(InlineSearch(post))) { string gfyId = match.Groups[2].Value; string videoUrl = gfycatParser.ParseGfycatCajaxResponse(await gfycatParser.RequestGfycatCajax(gfyId), blog.GfycatType); if (blog.SkipGif && (videoUrl.EndsWith(".gif") || videoUrl.EndsWith(".gifv"))) { continue; } AddToDownloadList(new ExternalVideoPost(videoUrl, gfyId, post.unix_timestamp.ToString())); AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(videoUrl.Split('/').Last(), ".json"), post)); } } } } }
private bool CheckPostAge(TumblrApiJson response) { ulong highestPostId = 0; ulong.TryParse(response.Posts.FirstOrDefault().Id, out highestPostId); return(highestPostId >= GetLastPostId()); }
private bool CheckPostAge(TumblrApiJson response) { ulong highestPostId = 0; ulong.TryParse(response.posts.FirstOrDefault().id, out highestPostId); if (highestPostId < GetLastPostId()) { return(false); } return(true); }
private bool CheckPostAge(TumblrApiJson response) { ulong highestPostId = 0; var post = response.Posts.FirstOrDefault(); if (post == null) { return(false); } _ = ulong.TryParse(post.Id, out highestPostId); return(highestPostId >= GetLastPostId()); }
private async Task AddUrlsToDownloadListAsync(TumblrApiJson document) { var lastPostId = GetLastPostId(); foreach (Post post in document.Posts) { if (CheckIfShouldStop()) { break; } CheckIfShouldPause(); if (lastPostId > 0 && ulong.TryParse(post.Id, out var postId) && postId < lastPostId) { continue; } if (!PostWithinTimeSpan(post)) { continue; } if (!CheckIfContainsTaggedPost(post)) { continue; } if (!CheckIfDownloadRebloggedPosts(post)) { continue; } try { AddPhotoUrlToDownloadList(post); AddVideoUrlToDownloadList(post); AddAudioUrlToDownloadList(post); AddTextUrlToDownloadList(post); AddQuoteUrlToDownloadList(post); AddLinkUrlToDownloadList(post); AddConversationUrlToDownloadList(post); AddAnswerUrlToDownloadList(post); AddPhotoMetaUrlToDownloadList(post); AddVideoMetaUrlToDownloadList(post); AddAudioMetaUrlToDownloadList(post); await AddExternalPhotoUrlToDownloadListAsync(post); } catch (NullReferenceException e) { Logger.Verbose("TumblrBlogCrawler.AddUrlsToDownloadListAsync: {0}", e); } } }
private async Task AddExternalPhotoUrlToDownloadList(TumblrApiJson document) { if (blog.DownloadImgur) { await AddImgurUrl(document); } if (blog.DownloadGfycat) { await AddGfycatUrl(document); } if (blog.DownloadWebmshare) { AddWebmshareUrl(document); } }
private void AddPhotoUrlToDownloadList(TumblrApiJson document) { if (blog.DownloadPhoto) { foreach (Post post in document.posts) { if (!PostWithinTimeSpan(post)) { continue; } if (post.type == "photo" && (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase)))) { if (CheckIfDownloadRebloggedPosts(post)) { AddPhotoUrl(post); AddPhotoSetUrl(post); if (post.photo_caption != null) { //var postCopy = (Post)post.Clone(); AddInlinePhotoUrl(post); } } } } // check for inline images foreach (Post post in document.posts.Where(p => p.type != "photo")) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase))) { if (CheckIfDownloadRebloggedPosts(post)) { AddInlinePhotoUrl(post); } } } } }
private void AddAudioUrlToDownloadList(TumblrApiJson document) { if (blog.DownloadAudio) { foreach (Post post in document.posts) { if (!PostWithinTimeSpan(post)) { continue; } if (post.type == "audio" && (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase)))) { if (CheckIfDownloadRebloggedPosts(post)) { AddAudioUrl(post); } } } } }
private async Task AddUrlsToDownloadList(TumblrApiJson document) { try { AddPhotoUrlToDownloadList(document); AddVideoUrlToDownloadList(document); AddAudioUrlToDownloadList(document); AddTextUrlToDownloadList(document); AddQuoteUrlToDownloadList(document); AddLinkUrlToDownloadList(document); AddConversationUrlToDownloadList(document); AddAnswerUrlToDownloadList(document); AddPhotoMetaUrlToDownloadList(document); AddVideoMetaUrlToDownloadList(document); AddAudioMetaUrlToDownloadList(document); await AddExternalPhotoUrlToDownloadList(document); } catch (NullReferenceException) { } }
private async Task AddUrlsToDownloadListAsync(TumblrApiJson document) { foreach (Post post in document.Posts) { if (!PostWithinTimeSpan(post)) { continue; } if (!CheckIfContainsTaggedPost(post)) { continue; } if (!CheckIfDownloadRebloggedPosts(post)) { continue; } try { AddPhotoUrlToDownloadList(post); AddVideoUrlToDownloadList(post); AddAudioUrlToDownloadList(post); AddTextUrlToDownloadList(post); AddQuoteUrlToDownloadList(post); AddLinkUrlToDownloadList(post); AddConversationUrlToDownloadList(post); AddAnswerUrlToDownloadList(post); AddPhotoMetaUrlToDownloadList(post); AddVideoMetaUrlToDownloadList(post); AddAudioMetaUrlToDownloadList(post); await AddExternalPhotoUrlToDownloadListAsync(post); } catch (NullReferenceException) { } } }
private void AddAudioMetaUrlToDownloadList(TumblrApiJson document) { if (blog.CreateAudioMeta) { foreach (Post post in document.posts) { if (!PostWithinTimeSpan(post)) { continue; } if (post.type == "audio" && (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase)))) { if (CheckIfDownloadRebloggedPosts(post)) { string textBody = tumblrJsonParser.ParseAudioMeta(post); AddToDownloadList(new AudioMetaPost(textBody, post.id)); AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(post.id, ".json"), post)); } } } } }