示例#1
0
    public async Task <int> CollectUserChannels(IReadOnlyCollection <string> channelIds, ILogger log, CancellationToken cancel)
    {
        log.Information("started scraping user channels {Total}", channelIds.Count);
        log = log.Scope($"{nameof(YtCollector)}.{nameof(CollectUserChannels)}");
        await using var dbUser = DbStore.Users();
        var start        = Stopwatch.StartNew();
        var usersScraped = await channelIds.BlockDo(async (c, i) => {
            var u = await Scraper.Channel(log, c).Swallow(ex => log.Warning(ex, "error loading channel {Channel}", c));
            if (u == null)
            {
                return(0);
            }
            var subs = await u.Subscriptions().Take(Cfg.Collect.MaxSubscriptionsToSave).ToArrayAsync()
                       .Swallow(ex => log.Warning(ex, "error loading subscriptions for user {User}", u.Id));
            var user = new User {
                UserId          = u.Id,
                Platform        = Platform.YouTube,
                Name            = u.Title,
                ProfileUrl      = u.LogoUrl,
                Updated         = DateTime.UtcNow,
                Subscriptions   = subs,
                SubscriberCount = u.Subs,
            };
            await dbUser.Append(user);
            if (i % 100 == 0)
            {
                log.Debug("Collect - scraped users progress {Users}/{Total}", i, channelIds.Count);
            }
            return(1);
        }, RCfg.WebParallel, cancel : cancel).SumAsync();

        log.Information("Collect - completed scraping user channels {Success}/{Total} in {Duration}",
                        usersScraped, channelIds.Count, start.Elapsed.HumanizeShort());
        return(usersScraped);
    }
示例#2
0
        async Task <int> CollectUserChannels(IReadOnlyCollection <string> channelIds, ILogger log, CancellationToken cancel)
        {
            log.Information("Collect - started scraping user channels {Total}", channelIds.Count);
            var batchTotal = channelIds.Count / RCfg.UserBatchSize;
            var start      = Stopwatch.StartNew();
            var total      = await channelIds.Batch(RCfg.ChannelBatchSize).BlockTrans(async(ids, i) => {
                var userChannels = await ids.BlockTrans(async c => {
                    var u = await Scraper.Channel(log, c).Swallow(ex => log.Warning(ex, "error loading channel {Channel}", c));
                    if (u == null)
                    {
                        return(null);
                    }
                    var subs = await u.Subscriptions().SelectManyList().Swallow(ex => log.Warning(ex, "error loading subscriptions for user {User}", u.Id));
                    return(new User {
                        UserId = u.Id,
                        Platform = Platform.YouTube,
                        Name = u.Title,
                        ProfileUrl = u.LogoUrl,
                        Updated = DateTime.UtcNow,
                        Subscriptions = subs
                    });
                }, RCfg.WebParallel, cancel: cancel)
                                   .NotNull().ToListAsync();
                log.Debug("Collect - scraped {Users} users. Batch {Batch}/{Total}", userChannels.Count, i, batchTotal);
                await DbStore.Users.Append(userChannels);
                return(userChannels.Count);
            }, RCfg.ParallelChannels, cancel: cancel) // mimic parallel settings from channel processing e.g. x4 outer, x6 inner
                             .SumAsync();

            log.Information("Collect - completed scraping user channels {Success}/{Total} in {Duration}",
                            total, channelIds.Count, start.Elapsed.HumanizeShort());
            return(total);
        }