Пример #1
0
        public async Task CrawlAsync()
        {
            Logger.Verbose("TumblrBlogCrawler.Crawl:Start");

            ulong highestId = await GetHighestPostIdAsync();

            Task <bool> grabber = GetUrlsAsync();

            // FIXME: refactor downloader out of class
            Task <bool> download = downloader.DownloadBlogAsync();

            Task crawlerDownloader = Task.CompletedTask;

            if (blog.DumpCrawlerData)
            {
                crawlerDownloader = crawlerDataDownloader.DownloadCrawlerDataAsync();
            }

            bool apiLimitHit = await grabber;

            UpdateProgressQueueInformation(Resources.ProgressUniqueDownloads);

            blog.DuplicatePhotos = DetermineDuplicates <PhotoPost>();
            blog.DuplicateVideos = DetermineDuplicates <VideoPost>();
            blog.DuplicateAudios = DetermineDuplicates <AudioPost>();
            blog.TotalCount      = (blog.TotalCount - blog.DuplicatePhotos - blog.DuplicateAudios - blog.DuplicateVideos);

            CleanCollectedBlogStatistics();

            await crawlerDownloader;
            bool  finishedDownloading = await download;

            if (!ct.IsCancellationRequested)
            {
                blog.LastCompleteCrawl = DateTime.Now;
                if (finishedDownloading && !apiLimitHit)
                {
                    blog.LastId = highestId;
                }
            }

            blog.Save();

            UpdateProgressQueueInformation("");
        }
Пример #2
0
        public async Task CrawlAsync()
        {
            Logger.Verbose("TumblrBlogCrawler.Crawl:Start");

            ulong highestId = await GetHighestPostIdAsync();

            Task <bool> grabber = GetUrlsAsync();

            // FIXME: refactor downloader out of class
            Task <bool> download = downloader.DownloadBlogAsync();

            Task crawlerDownloader = Task.CompletedTask;

            if (Blog.DumpCrawlerData)
            {
                crawlerDownloader = crawlerDataDownloader.DownloadCrawlerDataAsync();
            }

            bool apiLimitHit = await grabber;

            UpdateProgressQueueInformation(Resources.ProgressUniqueDownloads);

            UpdateBlogDuplicates();

            CleanCollectedBlogStatistics();

            await crawlerDownloader;
            bool  finishedDownloading = await download;

            if (!Ct.IsCancellationRequested)
            {
                Blog.LastCompleteCrawl = DateTime.Now;
                if (finishedDownloading && !apiLimitHit)
                {
                    Blog.LastId = highestId;
                }
            }

            Blog.Save();

            UpdateProgressQueueInformation(string.Empty);
        }
        public async Task CrawlAsync()
        {
            Logger.Verbose("TumblrTagSearchCrawler.Crawl:Start");

            Task        grabber  = GetUrlsAsync();
            Task <bool> download = downloader.DownloadBlogAsync();

            Task crawlerDownloader = Task.CompletedTask;

            if (Blog.DumpCrawlerData)
            {
                crawlerDownloader = crawlerDataDownloader.DownloadCrawlerDataAsync();
            }

            await grabber;

            UpdateProgressQueueInformation(Resources.ProgressUniqueDownloads);
            Blog.DuplicatePhotos = DetermineDuplicates <PhotoPost>();
            Blog.DuplicateVideos = DetermineDuplicates <VideoPost>();
            Blog.DuplicateAudios = DetermineDuplicates <AudioPost>();
            Blog.TotalCount      = (Blog.TotalCount - Blog.DuplicatePhotos - Blog.DuplicateAudios - Blog.DuplicateVideos);

            CleanCollectedBlogStatistics();

            await crawlerDownloader;
            await download;

            if (!Ct.IsCancellationRequested)
            {
                Blog.LastCompleteCrawl = DateTime.Now;
            }

            Blog.Save();

            UpdateProgressQueueInformation(string.Empty);
        }