public static async Task <IContainerGroup> Start(ILogger log, Cfg cfg, string[] args) { log.Information("starting container {Image} {Args}", cfg.App.Container.ImageName, args.Join(" ")); var containerGroup = await ContainerGroup(cfg, GetAzure(cfg), cfg.App.Container.Name, GetRegion(), args); return(await containerGroup.CreateAsync()); }
public static YtStore YtStore(this Cfg cfg, ILogger log) { var reader = new YtClient(cfg.App, log); var ytStore = new YtStore(reader, cfg.DataStore(cfg.App.Storage.DbPath)); return(ytStore); }
static IAzure GetAzure(Cfg cfg) { var sp = cfg.App.ServicePrincipal; var creds = new AzureCredentialsFactory().FromServicePrincipal(sp.ClientId, sp.Secret, sp.TennantId, AzureEnvironment.AzureGlobalCloud); var azure = Azure.Authenticate(creds).WithSubscription(cfg.App.SubscriptionId); return(azure); }
async Task SaveCfg(string dir) { var localDir = LocalResultsDir.Combine(dir); localDir.EnsureDirectoryExists(); var storeDir = StringPath.Relative(dir); var localCfgFile = localDir.Combine("cfg.json"); Cfg.ToJsonFile(localCfgFile); await Store.Save(storeDir.Add("cfg.json"), localCfgFile); }
/// <summary> /// For the configured time period creates the following /// Channels.parquet - Basic channel info and statistics about recommendations at the granularity of Channel,Date /// Recommends.parquet - Details about video recommendations at the granularity of From,To,Date ?? /// </summary> /// <returns></returns> public async Task SaveChannelRelationData() { var analysisDir = DateTime.UtcNow.ToString("yyyy-MM-dd"); await SaveCfg(analysisDir); var channelCfg = await Cfg.LoadChannelConfig(); var seeds = channelCfg.Seeds.ToList(); async Task SaveVideosAndRecommends() { var par = (int)Math.Sqrt(Cfg.ParallelCollect); var vrTransform = new TransformBlock <SeedChannel, (IReadOnlyCollection <VideoRow> vids, IReadOnlyCollection <RecommendRow> recs)>( async c => { var vids = (await(await ChannelVideos(c)).BlockTransform(Video, par)).NotNull().ToReadOnly(); var recs = (await vids.BlockTransform(Recommends, par)).NotNull().SelectMany(r => r).NotNull().ToReadOnly(); return(vids, recs); }, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = par, EnsureOrdered = false } ); var produceTask = seeds.Produce(vrTransform); var vidSink = new RowSink <VideoRow>((c, name) => SaveParquet(c, name, analysisDir), "Videos", 500_000); var recSink = new RowSink <RecommendRow>((c, name) => SaveParquet(c, name, analysisDir), "Recommends", 500_000); while (await vrTransform.OutputAvailableAsync()) { var(vids, recs) = await vrTransform.ReceiveAsync(); await Task.WhenAll(vidSink.Add(vids), recSink.Add(recs)); } await Task.WhenAll(vidSink.End(), recSink.End()); await produceTask; } async Task SaveChannels() { { var channels = await seeds.BlockTransform(Channel, Cfg.Parallel, progressUpdate : p => Log.Information("Collecting channels {Channels}/{Total}. {Speed}", p.Results.Count, seeds.Count, p.Speed("channels"))); await SaveParquet(channels.NotNull(), "Channels", analysisDir); } } await Task.WhenAll(SaveVideosAndRecommends(), SaveChannels()); }
public async Task UpdateData() { Log.Information("Starting incremental data update"); var channelCfg = await Cfg.LoadChannelConfig(); var seeds = channelCfg.Seeds; var res = await seeds.BlockTransform(UpdateChannel, Cfg.Parallel, progressUpdate : p => Log.Verbose("Channel update progress {Channels}/{Total} {Speed}", p.Results.Count, seeds.Count, p.Speed("channels").Humanize())).WithDuration(); // sufficiently parallel inside Log.Information("Completed updates successfully in {Duration}. {Channels} channels, {Videos} videos.", res.Duration.Humanize(), res.Result.Count, res.Result.Sum(r => r.videos.Count)); }
public static async Task <IContainerGroup> Start(ILogger log, Cfg cfg, string[] args) { var sp = cfg.App.ServicePrincipal; var container = cfg.App.Container; var creds = new AzureCredentialsFactory().FromServicePrincipal(sp.ClientId, sp.Secret, sp.TennantId, AzureEnvironment.AzureGlobalCloud); var azure = Azure.Authenticate(creds).WithSubscription(cfg.App.SubscriptionId); var sub = azure.GetCurrentSubscription(); var rg = cfg.App.ResourceGroup; var group = await azure.ContainerGroups.GetByResourceGroupAsync(rg, container.Name); if (group != null) { await azure.ContainerGroups.DeleteByIdAsync(group.Id); } var cArgs = new[] { "/app/ytnetworks.dll" }.Concat(args).ToArray(); log.Information("starting container {Image} {Args}", container.ImageName, cArgs.Join(" ")); var containerGroup = await azure.ContainerGroups.Define(container.Name) .WithRegion(Region.USWest) .WithExistingResourceGroup(rg) .WithLinux() .WithPrivateImageRegistry(container.Registry, container.RegistryCreds.Name, container.RegistryCreds.Secret) .WithoutVolume() .DefineContainerInstance(container.Name) .WithImage($"{container.Registry}/{container.ImageName}") .WithoutPorts() .WithCpuCoreCount(container.Cores) .WithMemorySizeInGB(container.Mem) .WithEnvironmentVariables(new Dictionary <string, string> { { $"YtNetworks_{nameof(RootCfg.AzureStorageCs)}", cfg.Root.AzureStorageCs }, { $"YtNetworks_{nameof(RootCfg.Env)}", cfg.Root.Env } }) .WithStartingCommandLine("dotnet", cArgs) .Attach() .WithRestartPolicy(ContainerGroupRestartPolicy.Never) .CreateAsync(); return(containerGroup); }
/// <summary> /// For the configured time period creates the following /// Channels.parquet - Basic channel info and statistics about recommendations at the granularity of Channel,Date /// Recommends.parquet - Details about video recommendations at the granularity of From,To,Date ?? /// </summary> /// <returns></returns> public async Task SaveChannelRelationData() { var analysisDir = DateTime.UtcNow.ToString("yyyy-MM-dd"); await SaveCfg(analysisDir); var channelCfg = await Cfg.LoadChannelConfig(); var seeds = channelCfg.Seeds; IReadOnlyCollection <ChannelVideoRow> channelVideos = null; async Task LoadChannels() { var channels = await seeds.BlockTransform(Channel, Cfg.ParallelCollect, progressUpdate : p => Log.Information("Collecting channels {Channels}/{Total}. {Speed}", p.Results.Count, seeds.Count, p.Speed("channels"))); await SaveParquet(channels, "Channels", analysisDir); } async Task LoadChannelVideos() { var cvs = await seeds.BlockTransform(ChannelVideos, Cfg.ParallelCollect, progressUpdate : p => Log.Information("Collecting channel videos {Channels}/{Total}. {Speed}", p.Results.Count, seeds.Count, p.Speed("channels"))); channelVideos = cvs.SelectMany(cv => cv).ToList(); } await Task.WhenAll(LoadChannels(), LoadChannelVideos()); var videos = (await channelVideos.BlockTransform(Video, Cfg.ParallelCollect, progressUpdate: p => Log.Information("Collecting videos {Videos}/{Total}. {Speed}", p.Results.Count, channelVideos.Count, p.Speed("videos")))) .NotNull(); await SaveParquet(videos, "Videos", analysisDir); var recommendsResult = (await videos.BlockTransform(Recommends, Cfg.ParallelCollect, progressUpdate: p => Log.Information("Collecting channel video recommendations {Videos}/{Total}. {Speed}", p.Results.Count, channelVideos.Count, p.Speed("videos")))) .NotNull(); await SaveParquet(recommendsResult.SelectMany(r => r), "Recommends", analysisDir); }
public async Task RefreshMissingVideos() { var channelCfg = await Cfg.LoadChannelConfig(); await channelCfg.Seeds.Randomize().BlockAction(async c => { Log.Information("Started fixing '{Channel}'", c.Title); var channelVids = (await Yt.ChannelVideosCollection.Get(c.Id))?.Vids; if (channelVids == null) { Log.Error("{Channel}' has not video's stored", c.Title); return; } var missingVids = (await channelVids.Select(v => v).NotNull() .BlockTransform(async v => await Yt.Videos.Get(v.VideoId) == null ? v.VideoId : null, Cfg.ParallelCollect)) .NotNull().ToList(); if (missingVids.Count > 0) { var videosUpdated = await missingVids.BlockTransform(async v => (Id: v, Video: await Yt.GetAndUpdateVideo(v)), Cfg.Parallel); Log.Information("'{Channel}' Missing video's fixed [{Fixed}], broken [{Broken}]", c.Title, videosUpdated.Where(v => v.Video.Video != null).Join("|", v => v.Id), videosUpdated.Where(v => v.Video.Video == null).Join("|", v => v.Id)); } var recommends = await channelVids.BlockTransform(async v => (From: v, Recs: await Yt.RecommendedVideosCollection.Get(v.VideoId)), Cfg.ParallelCollect); var missinRecs = await recommends.Where(r => r.Recs == null).BlockTransform(async r => (From: r.From, Recs: await Yt.GetAndUpdateRecommendedVideos(r.From)), Cfg.Parallel); Log.Information("'{Channel}' Missing video's recommendations [{Fixed}], broken [{Broken}]", c.Title, missinRecs.Where(v => v.Recs != null).Join("|", v => v.From.VideoId), missinRecs.Where(v => v.Recs == null).Join("|", v => v.From.VideoId)); }); }
public static ISimpleFileStore DataStore(this Cfg cfg, StringPath path = null) => new AzureBlobFileStore(cfg.App.Storage.DataStorageCs, path ?? cfg.App.Storage.DbPath);
public static Task <IContainerGroup> Start(ILogger log, Cfg cfg) => Start(log, cfg, new string[] { });
public static async Task <IReadOnlyCollection <IContainerGroup> > StartFleet(ILogger log, Cfg cfg, UpdateType optionUpdateType) { var sheets = (await ChannelSheets.MainChannels(cfg.App.Sheets, log)).ToList(); var evenBatchSize = (int)Math.Ceiling(sheets.Count / Math.Ceiling(sheets.Count / (double)cfg.App.ChannelsPerContainer)); var batches = sheets.Randomize().Batch(evenBatchSize).Select((b, i) => (batch: b.ToList(), name: $"{cfg.App.Container.Name}-fleet-{i}", i)).ToList(); var azure = GetAzure(cfg); // before starting feel. Ensure they are all not already running await batches.BlockAction(async b => await EnsureNotRunning(b.name, azure, cfg.App.ResourceGroup), cfg.App.DefaultParallel); var fleet = await batches.BlockTransform(async b => { var(batch, fleetName, i) = b; var region = Regions[i % Regions.Length]; var args = new[] { "update", "-t", optionUpdateType.ToString(), "-c", batch.Join("|", c => c.Id) }; var group = await ContainerGroup(cfg, azure, fleetName, region, args.ToArray()); return(await group.CreateAsync()); }, cfg.App.DefaultParallel); log.Information("Started fleet containers: {Containers}", fleet.Join(", ", f => f.Name)); return(fleet); }
public static YtStore YtStore(this Cfg cfg, ILogger log) { var ytStore = new YtStore(cfg.DataStore(cfg.App.Storage.DbPath), log); return(ytStore); }
public static YtClient YtClient(this Cfg cfg, ILogger log) => new YtClient(cfg.App.YTApiKeys, log);