Example #1
0
        private TumblrBlog CrawlCoreTumblrBlog(TumblrBlog blog, IProgress <DataModels.DownloadProgress> progress, CancellationToken ct, PauseToken pt)
        {
            Logger.Verbose("ManagerController.CrawlCoreTumblrBlog:Start");

            var newProgress = new DataModels.DownloadProgress();

            var tuple         = GetImageUrls(blog, progress, ct, pt);
            var newImageCount = tuple.Item1;
            var newImageUrls  = tuple.Item2;

            blog.TotalCount = newImageCount;

            var imageUrls = newImageUrls.Except(blog.Links.ToList());

            var indexPath = Path.Combine(shellService.Settings.DownloadLocation, "Index");
            var blogPath  = shellService.Settings.DownloadLocation;

            var parallel = Parallel.ForEach(
                imageUrls,
                new ParallelOptions {
                MaxDegreeOfParallelism = (shellService.Settings.ParallelImages / selectionService.ActiveItems.Count)
            },
                (currentImageUrl, state) =>
            {
                if (ct.IsCancellationRequested)
                {
                    state.Break();
                }
                if (pt.IsPaused)
                {
                    pt.WaitWhilePausedWithResponseAsyc().Wait();
                }

                string fileName     = currentImageUrl.Split('/').Last();
                string fileLocation = Path.Combine(Path.Combine(blogPath, blog.Name), fileName);

                if (Download(blog, fileLocation, currentImageUrl))
                {
                    blog.Links.Add(currentImageUrl);
                    blog.DownloadedImages = (uint)blog.Links.Count();
                    blog.Progress         = (uint)((double)blog.DownloadedImages / (double)blog.TotalCount * 100);

                    newProgress          = new DataModels.DownloadProgress();
                    newProgress.Progress = string.Format(CultureInfo.CurrentCulture, Resources.ProgressDownloadImage, currentImageUrl);;
                    progress.Report(newProgress);
                }
            });

            if (!ct.IsCancellationRequested)
            {
                blog.LastCompleteCrawl = DateTime.Now;
            }
            SaveBlog(blog);

            newProgress          = new DataModels.DownloadProgress();
            newProgress.Progress = "";
            progress.Report(newProgress);

            return(blog);
        }
        public void UpdateProgressQueueInformation(string format, params object[] args)
        {
            var newProgress = new DataModels.DownloadProgress
            {
                Progress = string.Format(CultureInfo.CurrentCulture, format, args)
            };

            progress.Report(newProgress);
        }
Example #3
0
        public Tuple <uint, List <string> > GetImageUrls(TumblrBlog blog, IProgress <DataModels.DownloadProgress> progress, CancellationToken ct, PauseToken pt)
        {
            int           totalPosts           = 0;
            int           numberOfPostsCrawled = 0;
            uint          totalImages;
            List <string> images = new List <string>();

            string url        = GetApiUrl(blog.Name, 1);
            string authHeader = shellService.OAuthManager.GenerateauthHeader(url, "GET");

            var blogDoc = RequestData(url, authHeader);

            totalPosts = blogDoc.response.blog.total_posts;

            // Generate URL list of Images
            // the api v2 shows 20 posts at max, determine the number of pages to crawl
            int totalPages = (totalPosts / 20) + 1;

            Parallel.For(0, totalPages,
                         new ParallelOptions {
                MaxDegreeOfParallelism = (shellService.Settings.ParallelImages / selectionService.ActiveItems.Count)
            },
                         (i, state) =>
            {
                if (ct.IsCancellationRequested)
                {
                    state.Break();
                }
                if (pt.IsPaused)
                {
                    pt.WaitWhilePausedWithResponseAsyc().Wait();
                }
                try
                {
                    // check for tags -- crawling for all images here
                    if (blog.Tags == null || blog.Tags.Count() == 0)
                    {
                        DataModels.TumblrJson document = null;

                        // get 20 posts per crawl/page
                        url        = GetApiUrl(blog.Name, 20, i * 20);
                        authHeader = shellService.OAuthManager.GenerateauthHeader(url, "GET");

                        document = RequestData(url, authHeader);

                        if (shellService.Settings.DownloadImages == true)
                        {
                            foreach (Datamodels.Post post in document.response.posts.Where(posts => posts.type.Equals("photo")))
                            {
                                foreach (DataModels.Photo photo in post.photos)
                                {
                                    var imageUrl = photo.alt_sizes.ElementAt(shellService.Settings.ImageSizes.IndexOf(shellService.Settings.ImageSize.ToString())).url;
                                    if (shellService.Settings.SkipGif == true && imageUrl.EndsWith(".gif"))
                                    {
                                        continue;
                                    }
                                    Monitor.Enter(images);
                                    images.Add(imageUrl);
                                    Monitor.Exit(images);
                                }
                            }
                        }
                        if (shellService.Settings.DownloadVideos == true)
                        {
                            foreach (DataModels.Post post in document.response.posts.Where(posts => posts.type.Equals("video")))
                            {
                                if (shellService.Settings.VideoSize == 1080)
                                {
                                    Monitor.Enter(images);
                                    images.Add(post.video_url);
                                    Monitor.Exit(images);
                                }
                                if (shellService.Settings.VideoSize == 480)
                                {
                                    Monitor.Enter(images);
                                    images.Add(post.video_url.Insert(post.video_url.LastIndexOf("."), "_480"));
                                    Monitor.Exit(images);
                                }
                            }
                        }
                    }
                    // crawling only for tagged images
                    else
                    {
                        List <string> tags = blog.Tags.Split(',').Select(x => x.Trim()).ToList();

                        DataModels.TumblrJson document = null;

                        // get 20 posts per crawl/page
                        url        = GetApiUrl(blog.Name, 20, i * 20);
                        authHeader = shellService.OAuthManager.GenerateauthHeader(url, "GET");

                        document = RequestData(url, authHeader);

                        if (shellService.Settings.DownloadImages == true)
                        {
                            foreach (Datamodels.Post post in document.response.posts.Where(posts => posts.tags.Any(tag => tags.Equals(tag)) && posts.type.Equals("photo")))
                            {
                                foreach (DataModels.Photo photo in post.photos ?? new List <Datamodels.Photo>())
                                {
                                    var imageUrl = photo.alt_sizes.ElementAt(shellService.Settings.ImageSizes.IndexOf(shellService.Settings.ImageSize.ToString())).url;
                                    if (shellService.Settings.SkipGif == true && imageUrl.EndsWith(".gif"))
                                    {
                                        continue;
                                    }
                                    Monitor.Enter(images);
                                    images.Add(imageUrl);
                                    Monitor.Exit(images);
                                }
                            }
                        }
                        if (shellService.Settings.DownloadVideos == true)
                        {
                            foreach (DataModels.Post post in document.response.posts.Where(posts => posts.tags.Any(tag => tags.Equals(tag)) && posts.type.Equals("video")))
                            {
                                if (shellService.Settings.VideoSize == 1080)
                                {
                                    Monitor.Enter(images);
                                    images.Add(post.video_url);
                                    Monitor.Exit(images);
                                }
                                if (shellService.Settings.VideoSize == 480)
                                {
                                    Monitor.Enter(images);
                                    images.Add(post.video_url.Insert(post.video_url.LastIndexOf("."), "_480"));
                                    Monitor.Exit(images);
                                }
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Data);
                }

                numberOfPostsCrawled += 20;
                var newProgress       = new DataModels.DownloadProgress();
                newProgress.Progress  = string.Format(CultureInfo.CurrentCulture, Resources.ProgressGetUrl, numberOfPostsCrawled, totalPosts);
                progress.Report(newProgress);
            }
                         );

            images = images.Distinct().ToList();

            totalImages = (uint)images.Count;
            return(Tuple.Create(totalImages, images));
        }