private TumblrBlog CrawlCoreTumblrBlog(TumblrBlog blog, IProgress <DataModels.DownloadProgress> progress, CancellationToken ct, PauseToken pt) { Logger.Verbose("ManagerController.CrawlCoreTumblrBlog:Start"); var newProgress = new DataModels.DownloadProgress(); var tuple = GetImageUrls(blog, progress, ct, pt); var newImageCount = tuple.Item1; var newImageUrls = tuple.Item2; blog.TotalCount = newImageCount; var imageUrls = newImageUrls.Except(blog.Links.ToList()); var indexPath = Path.Combine(shellService.Settings.DownloadLocation, "Index"); var blogPath = shellService.Settings.DownloadLocation; var parallel = Parallel.ForEach( imageUrls, new ParallelOptions { MaxDegreeOfParallelism = (shellService.Settings.ParallelImages / selectionService.ActiveItems.Count) }, (currentImageUrl, state) => { if (ct.IsCancellationRequested) { state.Break(); } if (pt.IsPaused) { pt.WaitWhilePausedWithResponseAsyc().Wait(); } string fileName = currentImageUrl.Split('/').Last(); string fileLocation = Path.Combine(Path.Combine(blogPath, blog.Name), fileName); if (Download(blog, fileLocation, currentImageUrl)) { blog.Links.Add(currentImageUrl); blog.DownloadedImages = (uint)blog.Links.Count(); blog.Progress = (uint)((double)blog.DownloadedImages / (double)blog.TotalCount * 100); newProgress = new DataModels.DownloadProgress(); newProgress.Progress = string.Format(CultureInfo.CurrentCulture, Resources.ProgressDownloadImage, currentImageUrl);; progress.Report(newProgress); } }); if (!ct.IsCancellationRequested) { blog.LastCompleteCrawl = DateTime.Now; } SaveBlog(blog); newProgress = new DataModels.DownloadProgress(); newProgress.Progress = ""; progress.Report(newProgress); return(blog); }
public void UpdateProgressQueueInformation(string format, params object[] args) { var newProgress = new DataModels.DownloadProgress { Progress = string.Format(CultureInfo.CurrentCulture, format, args) }; progress.Report(newProgress); }
public Tuple <uint, List <string> > GetImageUrls(TumblrBlog blog, IProgress <DataModels.DownloadProgress> progress, CancellationToken ct, PauseToken pt) { int totalPosts = 0; int numberOfPostsCrawled = 0; uint totalImages; List <string> images = new List <string>(); string url = GetApiUrl(blog.Name, 1); string authHeader = shellService.OAuthManager.GenerateauthHeader(url, "GET"); var blogDoc = RequestData(url, authHeader); totalPosts = blogDoc.response.blog.total_posts; // Generate URL list of Images // the api v2 shows 20 posts at max, determine the number of pages to crawl int totalPages = (totalPosts / 20) + 1; Parallel.For(0, totalPages, new ParallelOptions { MaxDegreeOfParallelism = (shellService.Settings.ParallelImages / selectionService.ActiveItems.Count) }, (i, state) => { if (ct.IsCancellationRequested) { state.Break(); } if (pt.IsPaused) { pt.WaitWhilePausedWithResponseAsyc().Wait(); } try { // check for tags -- crawling for all images here if (blog.Tags == null || blog.Tags.Count() == 0) { DataModels.TumblrJson document = null; // get 20 posts per crawl/page url = GetApiUrl(blog.Name, 20, i * 20); authHeader = shellService.OAuthManager.GenerateauthHeader(url, "GET"); document = RequestData(url, authHeader); if (shellService.Settings.DownloadImages == true) { foreach (Datamodels.Post post in document.response.posts.Where(posts => posts.type.Equals("photo"))) { foreach (DataModels.Photo photo in post.photos) { var imageUrl = photo.alt_sizes.ElementAt(shellService.Settings.ImageSizes.IndexOf(shellService.Settings.ImageSize.ToString())).url; if (shellService.Settings.SkipGif == true && imageUrl.EndsWith(".gif")) { continue; } Monitor.Enter(images); images.Add(imageUrl); Monitor.Exit(images); } } } if (shellService.Settings.DownloadVideos == true) { foreach (DataModels.Post post in document.response.posts.Where(posts => posts.type.Equals("video"))) { if (shellService.Settings.VideoSize == 1080) { Monitor.Enter(images); images.Add(post.video_url); Monitor.Exit(images); } if (shellService.Settings.VideoSize == 480) { Monitor.Enter(images); images.Add(post.video_url.Insert(post.video_url.LastIndexOf("."), "_480")); Monitor.Exit(images); } } } } // crawling only for tagged images else { List <string> tags = blog.Tags.Split(',').Select(x => x.Trim()).ToList(); DataModels.TumblrJson document = null; // get 20 posts per crawl/page url = GetApiUrl(blog.Name, 20, i * 20); authHeader = shellService.OAuthManager.GenerateauthHeader(url, "GET"); document = RequestData(url, authHeader); if (shellService.Settings.DownloadImages == true) { foreach (Datamodels.Post post in document.response.posts.Where(posts => posts.tags.Any(tag => tags.Equals(tag)) && posts.type.Equals("photo"))) { foreach (DataModels.Photo photo in post.photos ?? new List <Datamodels.Photo>()) { var imageUrl = photo.alt_sizes.ElementAt(shellService.Settings.ImageSizes.IndexOf(shellService.Settings.ImageSize.ToString())).url; if (shellService.Settings.SkipGif == true && imageUrl.EndsWith(".gif")) { continue; } Monitor.Enter(images); images.Add(imageUrl); Monitor.Exit(images); } } } if (shellService.Settings.DownloadVideos == true) { foreach (DataModels.Post post in document.response.posts.Where(posts => posts.tags.Any(tag => tags.Equals(tag)) && posts.type.Equals("video"))) { if (shellService.Settings.VideoSize == 1080) { Monitor.Enter(images); images.Add(post.video_url); Monitor.Exit(images); } if (shellService.Settings.VideoSize == 480) { Monitor.Enter(images); images.Add(post.video_url.Insert(post.video_url.LastIndexOf("."), "_480")); Monitor.Exit(images); } } } } } catch (Exception ex) { Console.WriteLine(ex.Data); } numberOfPostsCrawled += 20; var newProgress = new DataModels.DownloadProgress(); newProgress.Progress = string.Format(CultureInfo.CurrentCulture, Resources.ProgressGetUrl, numberOfPostsCrawled, totalPosts); progress.Report(newProgress); } ); images = images.Distinct().ToList(); totalImages = (uint)images.Count; return(Tuple.Create(totalImages, images)); }