Beispiel #1
0
        public static async Task <IContainerGroup> Start(ILogger log, Cfg cfg, string[] args)
        {
            log.Information("starting container {Image} {Args}", cfg.App.Container.ImageName, args.Join(" "));
            var containerGroup = await ContainerGroup(cfg, GetAzure(cfg), cfg.App.Container.Name, GetRegion(), args);

            return(await containerGroup.CreateAsync());
        }
Beispiel #2
0
        public static YtStore YtStore(this Cfg cfg, ILogger log)
        {
            var reader = new YtClient(cfg.App, log);

            var ytStore = new YtStore(reader, cfg.DataStore(cfg.App.Storage.DbPath));

            return(ytStore);
        }
Beispiel #3
0
        static IAzure GetAzure(Cfg cfg)
        {
            var sp    = cfg.App.ServicePrincipal;
            var creds = new AzureCredentialsFactory().FromServicePrincipal(sp.ClientId, sp.Secret, sp.TennantId, AzureEnvironment.AzureGlobalCloud);
            var azure = Azure.Authenticate(creds).WithSubscription(cfg.App.SubscriptionId);

            return(azure);
        }
        async Task SaveCfg(string dir)
        {
            var localDir = LocalResultsDir.Combine(dir);

            localDir.EnsureDirectoryExists();
            var storeDir     = StringPath.Relative(dir);
            var localCfgFile = localDir.Combine("cfg.json");

            Cfg.ToJsonFile(localCfgFile);
            await Store.Save(storeDir.Add("cfg.json"), localCfgFile);
        }
        /// <summary>
        ///   For the configured time period creates the following
        ///   Channels.parquet - Basic channel info and statistics about recommendations at the granularity of Channel,Date
        ///   Recommends.parquet - Details about video recommendations at the granularity of From,To,Date ??
        /// </summary>
        /// <returns></returns>
        public async Task SaveChannelRelationData()
        {
            var analysisDir = DateTime.UtcNow.ToString("yyyy-MM-dd");

            await SaveCfg(analysisDir);

            var channelCfg = await Cfg.LoadChannelConfig();

            var seeds = channelCfg.Seeds.ToList();

            async Task SaveVideosAndRecommends()
            {
                var par         = (int)Math.Sqrt(Cfg.ParallelCollect);
                var vrTransform =
                    new TransformBlock <SeedChannel, (IReadOnlyCollection <VideoRow> vids, IReadOnlyCollection <RecommendRow> recs)>(
                        async c => {
                    var vids = (await(await ChannelVideos(c)).BlockTransform(Video, par)).NotNull().ToReadOnly();
                    var recs = (await vids.BlockTransform(Recommends, par)).NotNull().SelectMany(r => r).NotNull().ToReadOnly();
                    return(vids, recs);
                },
                        new ExecutionDataflowBlockOptions {
                    MaxDegreeOfParallelism = par, EnsureOrdered = false
                }
                        );

                var produceTask = seeds.Produce(vrTransform);
                var vidSink     = new RowSink <VideoRow>((c, name) => SaveParquet(c, name, analysisDir), "Videos", 500_000);
                var recSink     = new RowSink <RecommendRow>((c, name) => SaveParquet(c, name, analysisDir), "Recommends", 500_000);

                while (await vrTransform.OutputAvailableAsync())
                {
                    var(vids, recs) = await vrTransform.ReceiveAsync();

                    await Task.WhenAll(vidSink.Add(vids), recSink.Add(recs));
                }

                await Task.WhenAll(vidSink.End(), recSink.End());

                await produceTask;
            }

            async Task SaveChannels()
            {
                {
                    var channels = await seeds.BlockTransform(Channel, Cfg.Parallel,
                                                              progressUpdate : p => Log.Information("Collecting channels {Channels}/{Total}. {Speed}", p.Results.Count,
                                                                                                    seeds.Count, p.Speed("channels")));
                    await SaveParquet(channels.NotNull(), "Channels", analysisDir);
                }
            }

            await Task.WhenAll(SaveVideosAndRecommends(), SaveChannels());
        }
Beispiel #6
0
        public async Task UpdateData()
        {
            Log.Information("Starting incremental data update");

            var channelCfg = await Cfg.LoadChannelConfig();

            var seeds = channelCfg.Seeds;
            var res   = await seeds.BlockTransform(UpdateChannel, Cfg.Parallel, progressUpdate :
                                                   p => Log.Verbose("Channel update progress {Channels}/{Total} {Speed}",
                                                                    p.Results.Count, seeds.Count, p.Speed("channels").Humanize())).WithDuration(); // sufficiently parallel inside

            Log.Information("Completed updates successfully in {Duration}. {Channels} channels, {Videos} videos.",
                            res.Duration.Humanize(), res.Result.Count, res.Result.Sum(r => r.videos.Count));
        }
        public static async Task <IContainerGroup> Start(ILogger log, Cfg cfg, string[] args)
        {
            var sp        = cfg.App.ServicePrincipal;
            var container = cfg.App.Container;
            var creds     = new AzureCredentialsFactory().FromServicePrincipal(sp.ClientId, sp.Secret, sp.TennantId,
                                                                               AzureEnvironment.AzureGlobalCloud);
            var azure = Azure.Authenticate(creds).WithSubscription(cfg.App.SubscriptionId);

            var sub = azure.GetCurrentSubscription();

            var rg    = cfg.App.ResourceGroup;
            var group = await azure.ContainerGroups.GetByResourceGroupAsync(rg, container.Name);

            if (group != null)
            {
                await azure.ContainerGroups.DeleteByIdAsync(group.Id);
            }

            var cArgs = new[] { "/app/ytnetworks.dll" }.Concat(args).ToArray();

            log.Information("starting container {Image} {Args}", container.ImageName, cArgs.Join(" "));
            var containerGroup = await azure.ContainerGroups.Define(container.Name)
                                 .WithRegion(Region.USWest)
                                 .WithExistingResourceGroup(rg)
                                 .WithLinux()
                                 .WithPrivateImageRegistry(container.Registry, container.RegistryCreds.Name, container.RegistryCreds.Secret)
                                 .WithoutVolume()
                                 .DefineContainerInstance(container.Name)
                                 .WithImage($"{container.Registry}/{container.ImageName}")
                                 .WithoutPorts()
                                 .WithCpuCoreCount(container.Cores)
                                 .WithMemorySizeInGB(container.Mem)
                                 .WithEnvironmentVariables(new Dictionary <string, string> {
                { $"YtNetworks_{nameof(RootCfg.AzureStorageCs)}", cfg.Root.AzureStorageCs },
                { $"YtNetworks_{nameof(RootCfg.Env)}", cfg.Root.Env }
            })
                                 .WithStartingCommandLine("dotnet", cArgs)
                                 .Attach()
                                 .WithRestartPolicy(ContainerGroupRestartPolicy.Never)
                                 .CreateAsync();

            return(containerGroup);
        }
Beispiel #8
0
        /// <summary>
        ///     For the configured time period creates the following
        ///     Channels.parquet - Basic channel info and statistics about recommendations at the granularity of Channel,Date
        ///     Recommends.parquet - Details about video recommendations at the granularity of From,To,Date ??
        /// </summary>
        /// <returns></returns>
        public async Task SaveChannelRelationData()
        {
            var analysisDir = DateTime.UtcNow.ToString("yyyy-MM-dd");

            await SaveCfg(analysisDir);

            var channelCfg = await Cfg.LoadChannelConfig();

            var seeds = channelCfg.Seeds;
            IReadOnlyCollection <ChannelVideoRow> channelVideos = null;

            async Task LoadChannels()
            {
                var channels = await seeds.BlockTransform(Channel, Cfg.ParallelCollect,
                                                          progressUpdate : p => Log.Information("Collecting channels {Channels}/{Total}. {Speed}", p.Results.Count, seeds.Count, p.Speed("channels")));

                await SaveParquet(channels, "Channels", analysisDir);
            }

            async Task LoadChannelVideos()
            {
                var cvs = await seeds.BlockTransform(ChannelVideos, Cfg.ParallelCollect,
                                                     progressUpdate : p => Log.Information("Collecting channel videos {Channels}/{Total}. {Speed}", p.Results.Count, seeds.Count, p.Speed("channels")));

                channelVideos = cvs.SelectMany(cv => cv).ToList();
            }

            await Task.WhenAll(LoadChannels(), LoadChannelVideos());

            var videos = (await channelVideos.BlockTransform(Video, Cfg.ParallelCollect,
                                                             progressUpdate: p => Log.Information("Collecting videos {Videos}/{Total}. {Speed}", p.Results.Count, channelVideos.Count, p.Speed("videos"))))
                         .NotNull();

            await SaveParquet(videos, "Videos", analysisDir);

            var recommendsResult = (await videos.BlockTransform(Recommends, Cfg.ParallelCollect,
                                                                progressUpdate: p => Log.Information("Collecting channel video recommendations {Videos}/{Total}. {Speed}", p.Results.Count, channelVideos.Count, p.Speed("videos"))))
                                   .NotNull();

            await SaveParquet(recommendsResult.SelectMany(r => r), "Recommends", analysisDir);
        }
        public async Task RefreshMissingVideos()
        {
            var channelCfg = await Cfg.LoadChannelConfig();

            await channelCfg.Seeds.Randomize().BlockAction(async c => {
                Log.Information("Started fixing '{Channel}'", c.Title);

                var channelVids = (await Yt.ChannelVideosCollection.Get(c.Id))?.Vids;
                if (channelVids == null)
                {
                    Log.Error("{Channel}' has not video's stored", c.Title);
                    return;
                }
                var missingVids = (await channelVids.Select(v => v).NotNull()
                                   .BlockTransform(async v => await Yt.Videos.Get(v.VideoId) == null ? v.VideoId : null, Cfg.ParallelCollect))
                                  .NotNull().ToList();
                if (missingVids.Count > 0)
                {
                    var videosUpdated = await missingVids.BlockTransform(async v => (Id: v, Video: await Yt.GetAndUpdateVideo(v)), Cfg.Parallel);

                    Log.Information("'{Channel}' Missing video's fixed [{Fixed}], broken [{Broken}]",
                                    c.Title,
                                    videosUpdated.Where(v => v.Video.Video != null).Join("|", v => v.Id),
                                    videosUpdated.Where(v => v.Video.Video == null).Join("|", v => v.Id));
                }

                var recommends = await channelVids.BlockTransform(async v =>
                                                                  (From: v, Recs: await Yt.RecommendedVideosCollection.Get(v.VideoId)), Cfg.ParallelCollect);
                var missinRecs = await recommends.Where(r => r.Recs == null).BlockTransform(async r =>
                                                                                            (From: r.From, Recs: await Yt.GetAndUpdateRecommendedVideos(r.From)), Cfg.Parallel);
                Log.Information("'{Channel}' Missing video's recommendations [{Fixed}], broken [{Broken}]",
                                c.Title,
                                missinRecs.Where(v => v.Recs != null).Join("|", v => v.From.VideoId),
                                missinRecs.Where(v => v.Recs == null).Join("|", v => v.From.VideoId));
            });
        }
Beispiel #10
0
 public static ISimpleFileStore DataStore(this Cfg cfg, StringPath path = null) =>
 new AzureBlobFileStore(cfg.App.Storage.DataStorageCs, path ?? cfg.App.Storage.DbPath);
 public static Task <IContainerGroup> Start(ILogger log, Cfg cfg) => Start(log, cfg, new string[] { });
Beispiel #12
0
        public static async Task <IReadOnlyCollection <IContainerGroup> > StartFleet(ILogger log, Cfg cfg, UpdateType optionUpdateType)
        {
            var sheets        = (await ChannelSheets.MainChannels(cfg.App.Sheets, log)).ToList();
            var evenBatchSize = (int)Math.Ceiling(sheets.Count / Math.Ceiling(sheets.Count / (double)cfg.App.ChannelsPerContainer));

            var batches = sheets.Randomize().Batch(evenBatchSize).Select((b, i) => (batch: b.ToList(), name: $"{cfg.App.Container.Name}-fleet-{i}", i)).ToList();

            var azure = GetAzure(cfg);

            // before starting feel. Ensure they are all not already running
            await batches.BlockAction(async b => await EnsureNotRunning(b.name, azure, cfg.App.ResourceGroup), cfg.App.DefaultParallel);

            var fleet = await batches.BlockTransform(async b => {
                var(batch, fleetName, i) = b;
                var region = Regions[i % Regions.Length];
                var args   = new[] {
                    "update",
                    "-t", optionUpdateType.ToString(),
                    "-c", batch.Join("|", c => c.Id)
                };
                var group = await ContainerGroup(cfg, azure, fleetName, region, args.ToArray());
                return(await group.CreateAsync());
            }, cfg.App.DefaultParallel);

            log.Information("Started fleet containers: {Containers}", fleet.Join(", ", f => f.Name));
            return(fleet);
        }
Beispiel #13
0
        public static YtStore YtStore(this Cfg cfg, ILogger log)
        {
            var ytStore = new YtStore(cfg.DataStore(cfg.App.Storage.DbPath), log);

            return(ytStore);
        }
Beispiel #14
0
 public static YtClient YtClient(this Cfg cfg, ILogger log) => new YtClient(cfg.App.YTApiKeys, log);