コード例 #1
0
 Task <long> CreateContainers(BranchState state, string[] paths, ILogger log) =>
 new[] { Premium, Standard }.BlockAction(async tier => {
     var s      = Stores.Store(tier: tier);
     var c      = s.Container;
     var exists = await c.ExistsAsync();
     if (!exists)
     {
         await c.CreateAsync();
         await c.SetAccessPolicyAsync(PublicAccessType.BlobContainer);
     }
     await PopulateContainer(tier, state, paths, log);
 });
コード例 #2
0
 public Parler(ILogger log, BlobStores stores, GoogleCfg Cfg)
 {
     Log      = log;
     this.Cfg = Cfg;
     Db       = stores.Store(DataStoreType.DbStage);
     Dir      = Path.GetTempPath().AsPath().Combine("recfluence", "parler");
 }
コード例 #3
0
        public async ValueTask ExecuteAsync(IConsole console)
        {
            var includedDirs = Dirs?.UnJoin('|');
            var dirs = new[] { "videos", "recs", "captions" }.Where(d => includedDirs == null || includedDirs.Contains(d));
            var store = Stores.Store(DataStoreType.DbStage);

            foreach (var dir in dirs)
            {
                Log.Information("upgrade-partitions - {Dir} started", dir);
                var files = await store.Files(dir, allDirectories : true).SelectMany()
                            .Where(f => f.Path.Tokens.Count == 3) // only optimise from within partitions
                            .ToListAsync();

                var plan = JsonlStoreExtensions.OptimisePlan(dir, files, Cfg.Optimise, Log);

                if (plan.Count < 10) // if the plan is small, run locally, otherwise on many machines
                {
                    await store.Optimise(Cfg.Optimise, plan, Log);
                }
                else
                {
                    await plan.Process(Ctx,
                                       b => Stage.ProcessOptimisePlan(b, store, PipeArg.Inject <ILogger>()),
                                       new() { MaxParallel = 12, MinWorkItems = 1 },
                                       log : Log, cancel : console.GetCancellationToken());
                }
            }
        }
コード例 #4
0
        public async Task CreateOrReplace(BranchState state, ILogger log)
        {
            var sw = Stopwatch.StartNew();

            var schema    = Sf.Cfg.Schema;
            var store     = Stores.Store(DataStoreType.DbStage);
            var container = store.Container;
            var sasUri    = container.GenerateSasUri(List | Read, DateTimeOffset.UtcNow.AddYears(100));
            var stageUrl  = $"azure://{sasUri.Host}{sasUri.AbsolutePath}";
            var sasToken  = sasUri.Query;

            using var conn = await Sf.Open(log, "", ""); // connection sans db & schema. If you specify ones that doesn't exist, all queries hang.

            var db = Sf.Cfg.DbName();
            IEnumerable <Script> scripts;
            var dbComment = new DbComment {
                Expires = DateTime.UtcNow.AddDays(2),
                Email   = EnvCfg.Email
            }.ToJson(JCfg);

            if (state.In(Clone, CloneDb))
            {
                scripts = new[] {
                    new Script("db copy", @$ "create or replace database {db} clone {Sf.Cfg.Db} comment='{dbComment}'")
                }
            }
コード例 #5
0
 public YtCollector(BlobStores stores, AppCfg cfg, SnowflakeConnectionProvider sf, IPipeCtx pipeCtx, YtWeb ytWeb,
                    YtClient api, ILogger log)
 {
     DbStore = new(stores.Store(DataStoreType.DbStage), log);
     Cfg     = cfg;
     Sf      = sf;
     PipeCtx = pipeCtx;
     Scraper = ytWeb;
     Api     = api;
 }
コード例 #6
0
 public async ValueTask ExecuteAsync(IConsole console)
 {
     var privateStore = Stores.Store(DataStoreType.Private);
     await TrafficSourceExports.Process(privateStore, Scraper, Log);
 }
コード例 #7
0
    public async Task CreateOrReplace(WarehouseCreateMode mode, [CanBeNull] string schema, ILogger log)
    {
        if (mode == CloneProd && Sf.Cfg.DbSuffix.NullOrEmpty())
        {
            throw new(
                      "DbSuffix needs to be set when cloning a db. Typically you should set warehouse.mode to \"Branch\" in local.appcfg.json to create a dev warehouse");
        }

        var sw = Stopwatch.StartNew();

        schema ??= Sf.Cfg.Schema;

        var stages = new[] { DbStage, Private }.Select(type => {
            var store     = Stores.Store(type);
            var container = ((AzureBlobFileStore)store).Container;
            var sasUri    = container.GenerateSasUri(List | Read, DateTimeOffset.UtcNow.AddYears(100));
            var name      = type switch {
                Private => "yt_private",
                DbStage => "yt_data",
                _ => throw new($"store type {type} has no stage")
            };
            return(new { Uri = $"azure://{sasUri.Host}{sasUri.AbsolutePath}", Sas = sasUri.Query, Name = name });
        }).ToArray();

        if (mode == CreateSchemaIfNotExists)
        {
            using var dbConn = await Sf.Open(log, schema : "");

            var schemaExists = await dbConn.ExecuteScalar <bool>($"{Scope} - schema exits",
                                                                 $"select exists(select * from information_schema.schemata where catalog_name = current_database() and schema_name ilike '{schema}')");

            if (schemaExists)
            {
                return;
            }
        }

        // use a non-contextual connection for creating databases/schema's
        using var conn = await Sf.Open(log, "", "");

        var db        = Sf.Cfg.DbName();
        var dbComment = new DbComment {
            Expires = DateTime.UtcNow.AddDays(2),
            Email   = EnvCfg.Email
        }.ToJson(JCfg);


        var scripts = (mode.In(CloneProd)
        ? new[] { new Script("db copy", @$ "create or replace database {db} clone {Sf.Cfg.Db} comment='{dbComment}'") }
        : new[] {
            new Script("db create", @$ "create database if not exists {db} comment='{dbComment}'"), // don't replace
            new Script("schema", $"create schema if not exists {db}.{schema}")
        })
                      .Concat(new Script("file formats",
                                         $"create or replace file format {db}.{schema}.json type = 'json'",
                                         $"create or replace file format {db}.{schema}.json_zst type = 'json' compression = ZSTD",
                                         $"create or replace file format {db}.{schema}.tsv type = 'csv' field_delimiter = '\t' validate_UTF8 = false  NULL_IF=('')",
                                         $"create or replace file format {db}.{schema}.tsv_header type = 'csv' field_delimiter = '\t' validate_UTF8 = false  NULL_IF=('') skip_header=1 field_optionally_enclosed_by ='\"'",
                                         $"create or replace file format {db}.{schema}.tsv_header_no_enclose type = 'csv' field_delimiter = '\t' validate_UTF8 = false  NULL_IF=('') skip_header=1"))
                      .Concat(stages.Select(s => new Script($"stage {s.Name}",
                                                            $"create or replace stage {db}.{schema}.{s.Name} url='{s.Uri}' credentials=(azure_sas_token='{s.Sas}') file_format=(type=json compression=gzip)"
                                                            )))
                      .Concat(WhCfg.AdminRoles.Select(r => new Script($"init role {r}", ScriptMode.Parallel,
                                                                      $"grant all on database {db} to role {r}",
                                                                      $"grant all on schema {db}.{schema} to role {r}",
                                                                      $"grant all on all tables in schema {db}.{schema} to role {r}",
                                                                      $"grant all on future tables in schema {db}.{schema} to role {r}",
                                                                      $"grant all on all views in schema {db}.{schema} to role {r}",
                                                                      $"grant all on future views in schema {db}.{schema} to role {r}",
                                                                      $"grant all on all stages in database {db} to role {r}",
                                                                      $"grant all on all functions in database {db} to role {r}"
                                                                      )))
                      .Concat(WhCfg.ReadRoles.Select(r => new Script($"init role {r}", ScriptMode.Parallel,
                                                                     $"grant usage,monitor on database {db} to role {r}",
                                                                     $"grant usage, monitor on all schemas in database {db} to role {r}",
                                                                     $"grant select on future tables in schema {db}.{schema} to role {r}",
                                                                     $"grant select on future views in schema {db}.{schema} to role {r}",
                                                                     $"grant select on all tables in schema {db}.{schema} to role {r}",
                                                                     $"grant select on all views in schema {db}.{schema} to role {r}",
                                                                     $"grant usage on all stages in schema {db}.{schema} to role {r}",
                                                                     $"grant usage on all functions in schema {db}.{schema} to role {r}",
                                                                     $"grant usage on all file formats in schema {db}.{schema} to role {r}"
                                                                     )));

        foreach (var s in scripts)
        {
            await s.Sqls.BlockDo <string>(q => conn.Execute(s.Name, q), s.Mode == Sequential? 1 : WhCfg.MetadataParallel);
        }

        log.Information("Create Warehouse - {Db} created/updated in {Duration}", db, sw.Elapsed.HumanizeShort());
    }
コード例 #8
0
        public async Task CreateOrReplace(BranchState state, ILogger log)
        {
            var sw = Stopwatch.StartNew();

            var schema    = Sf.Cfg.Schema;
            var store     = Stores.Store(DataStoreType.DbStage);
            var container = store.Container;
            var sasUri    = container.GenerateSasUri(List | Read, DateTimeOffset.UtcNow.AddYears(100));
            var stageUrl  = $"azure://{sasUri.Host}{sasUri.AbsolutePath}";
            var sasToken  = sasUri.Query;

            using var conn = await Sf.Open(log, "", ""); // connection sans db & schema. If you specify ones that doesn't exist, all queries hang.

            var db        = Sf.Cfg.DbName();
            var dbComment = new DbComment {
                Expires = DateTime.UtcNow.AddDays(2),
                Email   = EnvCfg.Email
            }.ToJson(JCfg);

            var scripts = (state.In(Clone, CloneDb)
        ? new[] { new Script("db copy", @$ "create or replace database {db} clone {Sf.Cfg.Db} comment='{dbComment}'") }
        : new[] {
                new Script("db create", @$ "create or replace database {db} comment='{dbComment}'"),
                new Script("schema", $"create schema if not exists {db}.{schema}"),
            })
                          .Concat(new Script("stage",
                                             $"create or replace stage {db}.{schema}.yt_data url='{stageUrl}' credentials=(azure_sas_token='{sasToken}') file_format=(type=json compression=gzip)",
                                             $"create or replace file format {db}.{schema}.json type = 'json'",
                                             $"create or replace file format {db}.{schema}.json_zst type = 'json' compression = ZSTD",
                                             $"create or replace file format {db}.{schema}.tsv type = 'csv' field_delimiter = '\t' validate_UTF8 = false  NULL_IF=('')",
                                             $"create or replace file format {db}.{schema}.tsv_header type = 'csv' field_delimiter = '\t' validate_UTF8 = false  NULL_IF=('') skip_header=1 field_optionally_enclosed_by ='\"'",
                                             $"create or replace file format {db}.{schema}.tsv_header_no_enclose type = 'csv' field_delimiter = '\t' validate_UTF8 = false  NULL_IF=('') skip_header=1"
                                             ))
                          .Concat(
                WhCfg.AdminRoles.Select(r =>
                                        new Script($"init role {r}", ScriptMode.Parallel,
                                                   $"grant all on database {db} to role {r}",
                                                   $"grant all on schema {db}.{schema} to role {r}",
                                                   $"grant all on all tables in schema {db}.{schema} to role {r}",
                                                   $"grant all on future tables in schema {db}.{schema} to role {r}",
                                                   $"grant all on all views in schema {db}.{schema} to role {r}",
                                                   $"grant all on future views in schema {db}.{schema} to role {r}",
                                                   $"grant all on all stages in database {db} to role {r}",
                                                   $"grant all on all functions in database {db} to role {r}"
                                                   )))
                          .Concat(WhCfg.ReadRoles.Select(r =>
                                                         new Script($"init role {r}", ScriptMode.Parallel,
                                                                    $"grant usage,monitor on database {db} to role {r}",
                                                                    $"grant usage, monitor on all schemas in database {db} to role {r}",
                                                                    $"grant select on future tables in database {db} to role {r}",
                                                                    $"grant select on future views in database {db} to role {r}",
                                                                    $"grant select on all tables in database {db} to role {r}",
                                                                    $"grant select on all views in database {db} to role {r}",
                                                                    $"grant usage on all stages in database {db} to role {r}",
                                                                    $"grant usage on all functions in database {db} to role {r}"
                                                                    )));

            foreach (var s in scripts)
            {
                await s.Sqls.BlockAction(q => conn.Execute(s.Name, q), s.Mode == Sequential? 1 : WhCfg.MetadataParallel);
            }

            log.Information("Create Warehouse - {Db} created/updated in {Duration}", db, sw.Elapsed.HumanizeShort());
        }
コード例 #9
0
 public YtIndexResults(BlobStores stores, SnowflakeConnectionProvider sf)
 {
     Sf        = sf;
     BlobIndex = new(stores.Store(DataStoreType.Results));
 }