public async Task CrawlAsync() { Logger.Verbose("TumblrBlogCrawler.Crawl:Start"); ulong highestId = await GetHighestPostIdAsync(); Task <bool> grabber = GetUrlsAsync(); // FIXME: refactor downloader out of class Task <bool> download = downloader.DownloadBlogAsync(); Task crawlerDownloader = Task.CompletedTask; if (blog.DumpCrawlerData) { crawlerDownloader = crawlerDataDownloader.DownloadCrawlerDataAsync(); } bool apiLimitHit = await grabber; UpdateProgressQueueInformation(Resources.ProgressUniqueDownloads); blog.DuplicatePhotos = DetermineDuplicates <PhotoPost>(); blog.DuplicateVideos = DetermineDuplicates <VideoPost>(); blog.DuplicateAudios = DetermineDuplicates <AudioPost>(); blog.TotalCount = (blog.TotalCount - blog.DuplicatePhotos - blog.DuplicateAudios - blog.DuplicateVideos); CleanCollectedBlogStatistics(); await crawlerDownloader; bool finishedDownloading = await download; if (!ct.IsCancellationRequested) { blog.LastCompleteCrawl = DateTime.Now; if (finishedDownloading && !apiLimitHit) { blog.LastId = highestId; } } blog.Save(); UpdateProgressQueueInformation(""); }
public async Task CrawlAsync() { Logger.Verbose("TumblrBlogCrawler.Crawl:Start"); ulong highestId = await GetHighestPostIdAsync(); Task <bool> grabber = GetUrlsAsync(); // FIXME: refactor downloader out of class Task <bool> download = downloader.DownloadBlogAsync(); Task crawlerDownloader = Task.CompletedTask; if (Blog.DumpCrawlerData) { crawlerDownloader = crawlerDataDownloader.DownloadCrawlerDataAsync(); } bool apiLimitHit = await grabber; UpdateProgressQueueInformation(Resources.ProgressUniqueDownloads); UpdateBlogDuplicates(); CleanCollectedBlogStatistics(); await crawlerDownloader; bool finishedDownloading = await download; if (!Ct.IsCancellationRequested) { Blog.LastCompleteCrawl = DateTime.Now; if (finishedDownloading && !apiLimitHit) { Blog.LastId = highestId; } } Blog.Save(); UpdateProgressQueueInformation(string.Empty); }
public async Task CrawlAsync() { Logger.Verbose("TumblrTagSearchCrawler.Crawl:Start"); Task grabber = GetUrlsAsync(); Task <bool> download = downloader.DownloadBlogAsync(); Task crawlerDownloader = Task.CompletedTask; if (Blog.DumpCrawlerData) { crawlerDownloader = crawlerDataDownloader.DownloadCrawlerDataAsync(); } await grabber; UpdateProgressQueueInformation(Resources.ProgressUniqueDownloads); Blog.DuplicatePhotos = DetermineDuplicates <PhotoPost>(); Blog.DuplicateVideos = DetermineDuplicates <VideoPost>(); Blog.DuplicateAudios = DetermineDuplicates <AudioPost>(); Blog.TotalCount = (Blog.TotalCount - Blog.DuplicatePhotos - Blog.DuplicateAudios - Blog.DuplicateVideos); CleanCollectedBlogStatistics(); await crawlerDownloader; await download; if (!Ct.IsCancellationRequested) { Blog.LastCompleteCrawl = DateTime.Now; } Blog.Save(); UpdateProgressQueueInformation(string.Empty); }