public async Task RegisterStartingAddressesAsync(CancellationToken cancellationToken) { var utcNow = dateTimeProvider.UtcNow; var normalizedStartingUris = options.StartingAddresses .Select(startingUri => LinkSanitizer.GetSanitizedLinkWithoutQueryAndFragment(startingUri)) .ToList(); await crawlerAddressRegistrationService.RegisterNonExploredLinksForExplorationAsync(utcNow, normalizedStartingUris); }
public async Task <ExplorePageCommandResult> ExplorePageAsync(CancellationToken cancellationToken) { try { var stopwatch = Stopwatch.StartNew(); var utcNow = dateTimeProvider.UtcNow; var steamPage = await crawlerPrefetchService.GetNextPageAsync(utcNow); if (steamPage is null) { // No links remain to explore. return(ExplorePageCommandResult.NoMoreItems); } var notYetExploredLinks = await crawlerAddressRegistrationService.RegisterNonExploredLinksForExplorationAsync(utcNow, steamPage.NormalizedLinks); var unknownApps = await RegisterFoundAppsAsync(steamPage, notYetExploredLinks); var unknownBundles = await RegisterFoundBundlesAsync(steamPage, notYetExploredLinks); var unknownSubs = await RegisterFoundSubsAsync(steamPage, notYetExploredLinks); // Note: this is not accurate, as it does not account for items that are already prefetched but not yet processed. // An approximate value is sufficient for this though. var explorationStats = await crawlerAddressRegistrationService.GetExplorationStatisticsAsync(utcNow); stopwatch.Stop(); logger.LogInformation( "Processed URI '{@Uri}'. Elapsed millis: {@ElapsedMillis}, " + "Found {@NotExploredAppCount} not explored apps, {@NotKnownAppCount} not known apps, " + "{@NotExploredSubCount} not explored subs, {@NotKnownSubCount} not known subs and " + "{@NotExploredBundleCount} not explored bundles, {@NotKnownBundleCount} not known bundles. " + "About {@ExploredItemCount} links are explored and {@RemainingItemCount} links remain.", steamPage.NormalizedAddress.AbsoluteUri, stopwatch.ElapsedMilliseconds, unknownApps.NotYetExploredCount, unknownApps.NotYetKnownCount, unknownSubs.NotYetExploredCount, unknownSubs.NotYetKnownCount, unknownBundles.NotYetExploredCount, unknownBundles.NotYetKnownCount, explorationStats.ApproximateExploredCount, explorationStats.ApproximateToBeExploredCount); return(ExplorePageCommandResult.Success); } catch (SteamPageRemovedException e) { logger.LogWarning("The page located at URL {@Uri} has been removed.", e.Uri); return(ExplorePageCommandResult.Success); } }