Esempio n. 1
0
        async Task Incremental(LoggedConnection db, string table, StageTableCfg t, DateTime latestTs)
        {
            await DbStore.Optimise(Cfg.Optimise, t.Dir, latestTs.FileSafeTimestamp(), db.Log); // optimise files newer than the last load

            var((_, rows, size), dur) = await CopyInto(db, table, t).WithDuration();

            db.Log.Information("StageUpdate - {Table} incremental load of {Rows} rows ({Size}) took {Duration}",
                               table, rows, size.Humanize("#.#"), dur.HumanizeShort());
        }
Esempio n. 2
0
        async Task FullLoad(LoggedConnection db, string table, StageTableCfg t)
        {
            if (t.IsNativeStore)
            {
                await DbStore.Optimise(Cfg.Optimise, t.Dir, null, db.Log); // optimise all files when performing a full load
            }
            await db.Execute("truncate table", $"truncate table {table}"); // no transaction, stage tables aren't reported on so don't need to be available

            var((_, rows, size), dur) = await CopyInto(db, table, t).WithDuration();

            db.Log.Information("StageUpdate - {Table} full load of {Rows} rows ({Size}) took {Duration}",
                               table, rows, size.Humanize("#.#"), dur.HumanizeShort());
        }
Esempio n. 3
0
 AzureBlobFileStore Store(StageTableCfg t) => Stores.Store(t.StoreType);
Esempio n. 4
0
        async Task <(string[] files, long rows, ByteSize size)> CopyInto(ILoggedConnection <IDbConnection> db, string table, StageTableCfg t)
        {
            var startTime = await db.ExecuteScalar <string>("current time", "select current_timestamp()::string");

            var(stage, path) = t.StoreType switch {
                DataStoreType.Db => (Cfg.Stage, StorageCfg.DbPath),
                DataStoreType.Private => (Cfg.Private, null),
                _ => throw new InvalidOperationException($"No warehouse stage for store type {t.StoreType}")
            };


            var sql = $"copy into {table} from @{new[] {stage, path}.Concat(t.Dir.Tokens).NotNull().Join(" / ")}/ file_format=(type=json)";
            await db.Execute("copy into", sql);

            // sf should return this info form copy_into (its in their UI, but not in .net or jdbc drivers)
            // the int that is return is the # of rows form the first file loaded. So we go get this ourselves
            var copyResults = await db.Query <(string fileName, long rows, long size)>("copy results",
                                                                                       "select file_name, row_count, file_size " +
                                                                                       $"from table(information_schema.copy_history(table_name=>'{table}', start_time=>'{startTime}'::timestamp_ltz))");

            var res = (copyResults.Select(r => r.fileName).ToArray(), copyResults.Sum(r => r.rows), copyResults.Sum(r => r.size).Bytes());

            return(res);
        }
    }
Esempio n. 5
0
        async Task <(string[] files, long rows, ByteSize size)> CopyInto(LoggedConnection db, string table, StageTableCfg t)
        {
            var startTime = await db.ExecuteScalar <string>("current time", "select current_timestamp()::string");

            var sql = $"copy into {table} from @{Cfg.Stage}/{StorageCfg.DbPath}/{t.Dir}/ file_format=(type=json)";
            await db.Execute("copy into", sql);

            // sf should return this info form copy_into (its in their UI, but not in .net or jdbc drivers)
            // the int that is return is the # of rows form the first file loaded. So we go get this ourselves
            var copyResults = await db.Query <(string fileName, long rows, long size)>("copy results",
                                                                                       "select file_name, row_count, file_size " +
                                                                                       $"from table(information_schema.copy_history(table_name=>'{table}', start_time=>'{startTime}'::timestamp_ltz))");

            var res = (copyResults.Select(r => r.fileName).ToArray(), copyResults.Sum(r => r.rows), copyResults.Sum(r => r.size).Bytes());

            return(res);
        }