Example #1
0
        public static async Task <IReadOnlyCollection <IContainerGroup> > StartFleet(ILogger log, Cfg cfg, UpdateType optionUpdateType)
        {
            var sheets        = (await ChannelSheets.MainChannels(cfg.App.Sheets, log)).ToList();
            var evenBatchSize = (int)Math.Ceiling(sheets.Count / Math.Ceiling(sheets.Count / (double)cfg.App.ChannelsPerContainer));

            var batches = sheets.Randomize().Batch(evenBatchSize).Select((b, i) => (batch: b.ToList(), name: $"{cfg.App.Container.Name}-fleet-{i}", i)).ToList();

            var azure = GetAzure(cfg);

            // before starting feel. Ensure they are all not already running
            await batches.BlockAction(async b => await EnsureNotRunning(b.name, azure, cfg.App.ResourceGroup), cfg.App.DefaultParallel);

            var fleet = await batches.BlockTransform(async b => {
                var(batch, fleetName, i) = b;
                var region = Regions[i % Regions.Length];
                var args   = new[] {
                    "update",
                    "-t", optionUpdateType.ToString(),
                    "-c", batch.Join("|", c => c.Id)
                };
                var group = await ContainerGroup(cfg, azure, fleetName, region, args.ToArray());
                return(await group.CreateAsync());
            }, cfg.App.DefaultParallel);

            log.Information("Started fleet containers: {Containers}", fleet.Join(", ", f => f.Name));
            return(fleet);
        }
Example #2
0
        async Task <IEnumerable <ChannelStored2> > UpdateAllChannels(bool disableDiscover, string[] limitChannels, ILogger log, CancellationToken cancel)
        {
            var store            = Store.Channels;
            var limitChannelHash = limitChannels.HasItems() ? limitChannels.ToHashSet() : Cfg.LimitedToSeedChannels?.ToHashSet() ?? new HashSet <string>();

            log.Information("Collect - Starting channels update. Limited to ({Included})",
                            limitChannelHash.Any() ? limitChannelHash.Join("|") : "All");

            IKeyedCollection <string, ChannelStored2> channelPev;

            ChannelStored2[] toDiscover;
            using (var db = await Sf.OpenConnection(log)) {
                var channelLimitSql = limitChannelHash.Any() ? $" and v:ChannelId::string in ({limitChannelHash.Join(",", c => $"'{c}'")})" : "";
                // retrieve previous channel state to update with new classification (algos and human) and stats form the API
                channelPev = (await db.Query <string>("channels - previous", $@"select v
from channel_stage
where v:Status::string is null or v:Status::string not in ('ManualRejected', 'AlgoRejected') {channelLimitSql}
qualify row_number() over (partition by v:ChannelId::string order by v:Updated::timestamp_ntz desc) = 1"))
                             .Select(s => s.ToObject <ChannelStored2>(Store.Channels.JCfg)).ToKeyedCollection(c => c.ChannelId);
                toDiscover = disableDiscover ? new ChannelStored2[] { } : await ChannelsToDiscover(db, channelPev, log);
            }

            // human classification of channels. also acts a manual seed list
            var toUpdate = (await ChannelSheets.Channels(Cfg.Sheets, log))
                           .Where(c => limitChannelHash.IsEmpty() || limitChannelHash.Contains(c.Id))
                           .Select(s => ChannelStored(s, channelPev))
                           .Randomize() // each run, we want to spread the early runs, possible failures across different channels
                           .ToArray();

            // perform full update on channels older than 30 days (max X at a time because of quota limit).
            var fullUpdate = toUpdate
                             .Where(c => c.Updated == default || c.Updated - c.LastFullUpdate > 30.Days())
                             .Take(600)
                             .Select(c => c.ChannelId).ToHashSet();

            var(channels, dur) = await toUpdate.Concat(toDiscover)
                                 .BlockFunc(async c => await UpdateChannel(c, fullUpdate.Contains(c.ChannelId), log), Cfg.DefaultParallel, cancel: cancel,
                                            progressUpdate: p => log.Debug("Collect - Reading channels {ChannelCount}/{ChannelTotal}", p.CompletedTotal, toUpdate.Length))
                                 .WithDuration();

            if (cancel.IsCancellationRequested)
            {
                return(channels);
            }

            if (channels.Any())
            {
                await store.Append(channels, log);
            }

            log.Information("Collect - Updated stats for {Channels} existing and {Discovered} discovered channels in {Duration}",
                            toUpdate.Length, toDiscover.Length, dur);

            return(channels);
        }
        async Task <IReadOnlyCollection <ChannelStored2> > UpdateChannels()
        {
            var store = Store.ChannelStore;

            Log.Information("Starting channels update. Limited to ({Included})",
                            Cfg.LimitedToSeedChannels?.HasItems() == true ? Cfg.LimitedToSeedChannels.Join("|") : "All");

            async Task <ChannelStored2> UpdateChannel(ChannelSheet channel)
            {
                var log = Log.ForContext("Channel", channel.Title).ForContext("ChannelId", channel.Id);

                var channelData = new ChannelData {
                    Id = channel.Id, Title = channel.Title
                };

                try {
                    channelData = await Api.ChannelData(channel.Id) ?? // Use API to get channel instead of scraper. We get better info faster
                                  new ChannelData
                    {
                        Id = channel.Id, Title = channel.Title, Status = ChannelStatus.Dead
                    };
                    log.Information("{Channel} - read channel details", channelData.Title);
                }
                catch (Exception ex) {
                    channelData.Status = ChannelStatus.Dead;
                    log.Error(ex, "{Channel} - Error when updating details for channel : {Error}", channel.Title, ex.Message);
                }
                var channelStored = new ChannelStored2 {
                    ChannelId     = channel.Id,
                    ChannelTitle  = channelData.Title ?? channel.Title,
                    Status        = channelData.Status,
                    MainChannelId = channel.MainChannelId,
                    Description   = channelData.Description,
                    LogoUrl       = channelData.Thumbnails?.Default__?.Url,
                    Subs          = channelData.Stats?.SubCount,
                    ChannelViews  = channelData.Stats?.ViewCount,
                    Country       = channelData.Country,
                    Updated       = DateTime.UtcNow,
                    Relevance     = channel.Relevance,
                    LR            = channel.LR,
                    HardTags      = channel.HardTags,
                    SoftTags      = channel.SoftTags,
                    UserChannels  = channel.UserChannels
                };

                return(channelStored);
            }

            var seeds = await ChannelSheets.Channels(Cfg.Sheets, Log);

            var channels = await seeds.Where(c => Cfg.LimitedToSeedChannels.IsEmpty() || Cfg.LimitedToSeedChannels.Contains(c.Id))
                           .BlockTransform(UpdateChannel, Cfg.DefaultParallel,
                                           progressUpdate: p => Log.Debug("Reading channels {ChannelCount}/{ChannelTotal}", p.CompletedTotal, seeds.Count));

            if (channels.Any())
            {
                await store.Append(channels);
            }

            return(channels);
        }