public async Task <int> CollectUserChannels(IReadOnlyCollection <string> channelIds, ILogger log, CancellationToken cancel) { log.Information("started scraping user channels {Total}", channelIds.Count); log = log.Scope($"{nameof(YtCollector)}.{nameof(CollectUserChannels)}"); await using var dbUser = DbStore.Users(); var start = Stopwatch.StartNew(); var usersScraped = await channelIds.BlockDo(async (c, i) => { var u = await Scraper.Channel(log, c).Swallow(ex => log.Warning(ex, "error loading channel {Channel}", c)); if (u == null) { return(0); } var subs = await u.Subscriptions().Take(Cfg.Collect.MaxSubscriptionsToSave).ToArrayAsync() .Swallow(ex => log.Warning(ex, "error loading subscriptions for user {User}", u.Id)); var user = new User { UserId = u.Id, Platform = Platform.YouTube, Name = u.Title, ProfileUrl = u.LogoUrl, Updated = DateTime.UtcNow, Subscriptions = subs, SubscriberCount = u.Subs, }; await dbUser.Append(user); if (i % 100 == 0) { log.Debug("Collect - scraped users progress {Users}/{Total}", i, channelIds.Count); } return(1); }, RCfg.WebParallel, cancel : cancel).SumAsync(); log.Information("Collect - completed scraping user channels {Success}/{Total} in {Duration}", usersScraped, channelIds.Count, start.Elapsed.HumanizeShort()); return(usersScraped); }
async Task <int> CollectUserChannels(IReadOnlyCollection <string> channelIds, ILogger log, CancellationToken cancel) { log.Information("Collect - started scraping user channels {Total}", channelIds.Count); var batchTotal = channelIds.Count / RCfg.UserBatchSize; var start = Stopwatch.StartNew(); var total = await channelIds.Batch(RCfg.ChannelBatchSize).BlockTrans(async(ids, i) => { var userChannels = await ids.BlockTrans(async c => { var u = await Scraper.Channel(log, c).Swallow(ex => log.Warning(ex, "error loading channel {Channel}", c)); if (u == null) { return(null); } var subs = await u.Subscriptions().SelectManyList().Swallow(ex => log.Warning(ex, "error loading subscriptions for user {User}", u.Id)); return(new User { UserId = u.Id, Platform = Platform.YouTube, Name = u.Title, ProfileUrl = u.LogoUrl, Updated = DateTime.UtcNow, Subscriptions = subs }); }, RCfg.WebParallel, cancel: cancel) .NotNull().ToListAsync(); log.Debug("Collect - scraped {Users} users. Batch {Batch}/{Total}", userChannels.Count, i, batchTotal); await DbStore.Users.Append(userChannels); return(userChannels.Count); }, RCfg.ParallelChannels, cancel: cancel) // mimic parallel settings from channel processing e.g. x4 outer, x6 inner .SumAsync(); log.Information("Collect - completed scraping user channels {Success}/{Total} in {Duration}", total, channelIds.Count, start.Elapsed.HumanizeShort()); return(total); }