private void Downloadwebmshare(TumblrJson document) { foreach (Post post in document.response.posts) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.tags.Intersect(tags, StringComparer.OrdinalIgnoreCase).Any()) { if (CheckIfDownloadRebloggedPosts(post)) { Regex regex = webmshareParser.GetWebmshareUrlRegex(); foreach (Match match in regex.Matches(post.caption)) { string url = match.Groups[0].Value.Split('\"').First(); string webmshareId = match.Groups[2].Value; string imageUrl = webmshareParser.CreateWebmshareUrl(webmshareId, url, blog.WebmshareType); if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv"))) { continue; } // TODO: postID AddToDownloadList(new VideoPost(imageUrl, webmshareId, post.timestamp.ToString())); AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(imageUrl.Split('/').Last(), ".json"), post)); } } } } }
private void DownloadWebmshare(XContainer document) { foreach (XElement post in document.Descendants("post")) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.Descendants("tag").Any(x => tags.Contains(x.Value, StringComparer.OrdinalIgnoreCase))) { if (CheckIfDownloadRebloggedPosts(post)) { Regex regex = webmshareParser.GetWebmshareUrlRegex(); foreach (Match match in regex.Matches(post.Value)) { string webmshareId = match.Groups[2].Value; string imageUrl = webmshareParser.CreateWebmshareUrl(webmshareId, blog.WebmshareType); if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv"))) { continue; } AddToDownloadList(new ExternalVideoPost(imageUrl, webmshareId, post.Attribute("unix-timestamp").Value)); AddToXmlQueue(new TumblrCrawlerXmlData(Path.ChangeExtension(imageUrl.Split('/').Last(), ".xml"), post)); } } } } }
private void AddWebmshareUrl(TumblrApiJson document) { foreach (Post post in document.posts) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.tags.Any(x => tags.Contains(x, StringComparer.OrdinalIgnoreCase))) { if (CheckIfDownloadRebloggedPosts(post)) { Regex regex = webmshareParser.GetWebmshareUrlRegex(); foreach (Match match in regex.Matches(InlineSearch(post))) { string webmshareId = match.Groups[2].Value; string imageUrl = webmshareParser.CreateWebmshareUrl(webmshareId, blog.WebmshareType); if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv"))) { continue; } AddToDownloadList(new ExternalVideoPost(imageUrl, webmshareId, post.unix_timestamp.ToString())); AddToJsonQueue(new TumblrCrawlerData <Post>(Path.ChangeExtension(imageUrl.Split('/').Last(), ".json"), post)); } } } } }
private async Task AddExternalPhotoUrlToDownloadList(XContainer document) { if (blog.DownloadImgur) { foreach (XElement post in document.Descendants("post")) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.Descendants("tag").Any(x => tags.Contains(x.Value, StringComparer.OrdinalIgnoreCase))) { if (CheckIfDownloadRebloggedPosts(post)) { Regex regex = imgurParser.GetImgurUrlRegex(); foreach (Match match in regex.Matches(post.Value)) { string imageUrl = match.Groups[1].Value; string imgurId = match.Groups[2].Value; if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv"))) { continue; } // TODO: postID AddToDownloadList(new TumblrPost(PostTypes.Photo, imageUrl, imgurId, post.Attribute("unix-timestamp").Value)); } } } } } if (blog.DownloadGfycat) { foreach (XElement post in document.Descendants("post")) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.Descendants("tag").Any(x => tags.Contains(x.Value, StringComparer.OrdinalIgnoreCase))) { if (CheckIfDownloadRebloggedPosts(post)) { Regex regex = gfycatParser.GetGfycatUrlRegex(); foreach (Match match in regex.Matches(post.Value)) { string gfyId = match.Groups[2].Value; string videoUrl = gfycatParser.ParseGfycatCajaxResponse(await gfycatParser.RequestGfycatCajax(gfyId), blog.GfycatType); if (blog.SkipGif && (videoUrl.EndsWith(".gif") || videoUrl.EndsWith(".gifv"))) { continue; } // TODO: postID AddToDownloadList(new TumblrPost(PostTypes.Video, videoUrl, gfyId, post.Attribute("unix-timestamp").Value)); } } } } } if (blog.DownloadWebmshare) { foreach (XElement post in document.Descendants("post")) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.Descendants("tag").Any(x => tags.Contains(x.Value, StringComparer.OrdinalIgnoreCase))) { if (CheckIfDownloadRebloggedPosts(post)) { Regex regex = webmshareParser.GetWebmshareUrlRegex(); foreach (Match match in regex.Matches(post.Value)) { string webmshareId = match.Groups[2].Value; string imageUrl = webmshareParser.CreateWebmshareUrl(webmshareId, blog.WebmshareType); if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv"))) { continue; } // TODO: postID AddToDownloadList(new TumblrPost(PostTypes.Photo, imageUrl, webmshareId, post.Attribute("unix-timestamp").Value)); } } } } } }
private async Task AddExternalPhotoUrlToDownloadList(TumblrJson document) { if (blog.DownloadImgur) { foreach (Post post in document.response.posts) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.tags.Intersect(tags, StringComparer.OrdinalIgnoreCase).Any()) { if (CheckIfDownloadRebloggedPosts(post)) { Regex regex = imgurParser.GetImgurUrlRegex(); foreach (Match match in regex.Matches(post.ToString())) { string imageUrl = match.Groups[1].Value; string imgurId = match.Groups[2].Value; if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv"))) { continue; } // TODO: postID AddToDownloadList(new TumblrPost(PostTypes.Photo, imageUrl, Guid.NewGuid().ToString("N"), post.timestamp.ToString())); } } } } } if (blog.DownloadGfycat) { foreach (Post post in document.response.posts) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.tags.Intersect(tags, StringComparer.OrdinalIgnoreCase).Any()) { if (CheckIfDownloadRebloggedPosts(post)) { Regex regex = gfycatParser.GetGfycatUrlRegex(); foreach (Match match in regex.Matches(post.ToString())) { string gfyId = match.Groups[2].Value; string videoUrl = gfycatParser.ParseGfycatCajaxResponse(await gfycatParser.RequestGfycatCajax(gfyId), blog.GfycatType); if (blog.SkipGif && (videoUrl.EndsWith(".gif") || videoUrl.EndsWith(".gifv"))) { continue; } // TODO: postID AddToDownloadList(new TumblrPost(PostTypes.Video, videoUrl, gfyId, post.timestamp.ToString())); } } } } } if (blog.DownloadWebmshare) { foreach (Post post in document.response.posts) { if (!PostWithinTimeSpan(post)) { continue; } if (!tags.Any() || post.tags.Intersect(tags, StringComparer.OrdinalIgnoreCase).Any()) { if (CheckIfDownloadRebloggedPosts(post)) { var regex = webmshareParser.GetWebmshareUrlRegex(); foreach (Match match in regex.Matches(post.ToString())) { string webmshareId = match.Groups[2].Value; string imageUrl = webmshareParser.CreateWebmshareUrl(webmshareId, blog.WebmshareType); if (blog.SkipGif && (imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv"))) { continue; } // TODO: postID AddToDownloadList(new TumblrPost(PostTypes.Video, imageUrl, webmshareId, post.timestamp.ToString())); } } } } } }