public static async Task <IReadOnlyCollection <IContainerGroup> > StartFleet(ILogger log, Cfg cfg, UpdateType optionUpdateType) { var sheets = (await ChannelSheets.MainChannels(cfg.App.Sheets, log)).ToList(); var evenBatchSize = (int)Math.Ceiling(sheets.Count / Math.Ceiling(sheets.Count / (double)cfg.App.ChannelsPerContainer)); var batches = sheets.Randomize().Batch(evenBatchSize).Select((b, i) => (batch: b.ToList(), name: $"{cfg.App.Container.Name}-fleet-{i}", i)).ToList(); var azure = GetAzure(cfg); // before starting feel. Ensure they are all not already running await batches.BlockAction(async b => await EnsureNotRunning(b.name, azure, cfg.App.ResourceGroup), cfg.App.DefaultParallel); var fleet = await batches.BlockTransform(async b => { var(batch, fleetName, i) = b; var region = Regions[i % Regions.Length]; var args = new[] { "update", "-t", optionUpdateType.ToString(), "-c", batch.Join("|", c => c.Id) }; var group = await ContainerGroup(cfg, azure, fleetName, region, args.ToArray()); return(await group.CreateAsync()); }, cfg.App.DefaultParallel); log.Information("Started fleet containers: {Containers}", fleet.Join(", ", f => f.Name)); return(fleet); }
async Task <IEnumerable <ChannelStored2> > UpdateAllChannels(bool disableDiscover, string[] limitChannels, ILogger log, CancellationToken cancel) { var store = Store.Channels; var limitChannelHash = limitChannels.HasItems() ? limitChannels.ToHashSet() : Cfg.LimitedToSeedChannels?.ToHashSet() ?? new HashSet <string>(); log.Information("Collect - Starting channels update. Limited to ({Included})", limitChannelHash.Any() ? limitChannelHash.Join("|") : "All"); IKeyedCollection <string, ChannelStored2> channelPev; ChannelStored2[] toDiscover; using (var db = await Sf.OpenConnection(log)) { var channelLimitSql = limitChannelHash.Any() ? $" and v:ChannelId::string in ({limitChannelHash.Join(",", c => $"'{c}'")})" : ""; // retrieve previous channel state to update with new classification (algos and human) and stats form the API channelPev = (await db.Query <string>("channels - previous", $@"select v from channel_stage where v:Status::string is null or v:Status::string not in ('ManualRejected', 'AlgoRejected') {channelLimitSql} qualify row_number() over (partition by v:ChannelId::string order by v:Updated::timestamp_ntz desc) = 1")) .Select(s => s.ToObject <ChannelStored2>(Store.Channels.JCfg)).ToKeyedCollection(c => c.ChannelId); toDiscover = disableDiscover ? new ChannelStored2[] { } : await ChannelsToDiscover(db, channelPev, log); } // human classification of channels. also acts a manual seed list var toUpdate = (await ChannelSheets.Channels(Cfg.Sheets, log)) .Where(c => limitChannelHash.IsEmpty() || limitChannelHash.Contains(c.Id)) .Select(s => ChannelStored(s, channelPev)) .Randomize() // each run, we want to spread the early runs, possible failures across different channels .ToArray(); // perform full update on channels older than 30 days (max X at a time because of quota limit). var fullUpdate = toUpdate .Where(c => c.Updated == default || c.Updated - c.LastFullUpdate > 30.Days()) .Take(600) .Select(c => c.ChannelId).ToHashSet(); var(channels, dur) = await toUpdate.Concat(toDiscover) .BlockFunc(async c => await UpdateChannel(c, fullUpdate.Contains(c.ChannelId), log), Cfg.DefaultParallel, cancel: cancel, progressUpdate: p => log.Debug("Collect - Reading channels {ChannelCount}/{ChannelTotal}", p.CompletedTotal, toUpdate.Length)) .WithDuration(); if (cancel.IsCancellationRequested) { return(channels); } if (channels.Any()) { await store.Append(channels, log); } log.Information("Collect - Updated stats for {Channels} existing and {Discovered} discovered channels in {Duration}", toUpdate.Length, toDiscover.Length, dur); return(channels); }
async Task <IReadOnlyCollection <ChannelStored2> > UpdateChannels() { var store = Store.ChannelStore; Log.Information("Starting channels update. Limited to ({Included})", Cfg.LimitedToSeedChannels?.HasItems() == true ? Cfg.LimitedToSeedChannels.Join("|") : "All"); async Task <ChannelStored2> UpdateChannel(ChannelSheet channel) { var log = Log.ForContext("Channel", channel.Title).ForContext("ChannelId", channel.Id); var channelData = new ChannelData { Id = channel.Id, Title = channel.Title }; try { channelData = await Api.ChannelData(channel.Id) ?? // Use API to get channel instead of scraper. We get better info faster new ChannelData { Id = channel.Id, Title = channel.Title, Status = ChannelStatus.Dead }; log.Information("{Channel} - read channel details", channelData.Title); } catch (Exception ex) { channelData.Status = ChannelStatus.Dead; log.Error(ex, "{Channel} - Error when updating details for channel : {Error}", channel.Title, ex.Message); } var channelStored = new ChannelStored2 { ChannelId = channel.Id, ChannelTitle = channelData.Title ?? channel.Title, Status = channelData.Status, MainChannelId = channel.MainChannelId, Description = channelData.Description, LogoUrl = channelData.Thumbnails?.Default__?.Url, Subs = channelData.Stats?.SubCount, ChannelViews = channelData.Stats?.ViewCount, Country = channelData.Country, Updated = DateTime.UtcNow, Relevance = channel.Relevance, LR = channel.LR, HardTags = channel.HardTags, SoftTags = channel.SoftTags, UserChannels = channel.UserChannels }; return(channelStored); } var seeds = await ChannelSheets.Channels(Cfg.Sheets, Log); var channels = await seeds.Where(c => Cfg.LimitedToSeedChannels.IsEmpty() || Cfg.LimitedToSeedChannels.Contains(c.Id)) .BlockTransform(UpdateChannel, Cfg.DefaultParallel, progressUpdate: p => Log.Debug("Reading channels {ChannelCount}/{ChannelTotal}", p.CompletedTotal, seeds.Count)); if (channels.Any()) { await store.Append(channels); } return(channels); }