async Task Incremental(LoggedConnection db, string table, StageTableCfg t, DateTime latestTs) { await DbStore.Optimise(Cfg.Optimise, t.Dir, latestTs.FileSafeTimestamp(), db.Log); // optimise files newer than the last load var((_, rows, size), dur) = await CopyInto(db, table, t).WithDuration(); db.Log.Information("StageUpdate - {Table} incremental load of {Rows} rows ({Size}) took {Duration}", table, rows, size.Humanize("#.#"), dur.HumanizeShort()); }
async Task FullLoad(LoggedConnection db, string table, StageTableCfg t) { if (t.IsNativeStore) { await DbStore.Optimise(Cfg.Optimise, t.Dir, null, db.Log); // optimise all files when performing a full load } await db.Execute("truncate table", $"truncate table {table}"); // no transaction, stage tables aren't reported on so don't need to be available var((_, rows, size), dur) = await CopyInto(db, table, t).WithDuration(); db.Log.Information("StageUpdate - {Table} full load of {Rows} rows ({Size}) took {Duration}", table, rows, size.Humanize("#.#"), dur.HumanizeShort()); }
AzureBlobFileStore Store(StageTableCfg t) => Stores.Store(t.StoreType);
async Task <(string[] files, long rows, ByteSize size)> CopyInto(ILoggedConnection <IDbConnection> db, string table, StageTableCfg t) { var startTime = await db.ExecuteScalar <string>("current time", "select current_timestamp()::string"); var(stage, path) = t.StoreType switch { DataStoreType.Db => (Cfg.Stage, StorageCfg.DbPath), DataStoreType.Private => (Cfg.Private, null), _ => throw new InvalidOperationException($"No warehouse stage for store type {t.StoreType}") }; var sql = $"copy into {table} from @{new[] {stage, path}.Concat(t.Dir.Tokens).NotNull().Join(" / ")}/ file_format=(type=json)"; await db.Execute("copy into", sql); // sf should return this info form copy_into (its in their UI, but not in .net or jdbc drivers) // the int that is return is the # of rows form the first file loaded. So we go get this ourselves var copyResults = await db.Query <(string fileName, long rows, long size)>("copy results", "select file_name, row_count, file_size " + $"from table(information_schema.copy_history(table_name=>'{table}', start_time=>'{startTime}'::timestamp_ltz))"); var res = (copyResults.Select(r => r.fileName).ToArray(), copyResults.Sum(r => r.rows), copyResults.Sum(r => r.size).Bytes()); return(res); } }
async Task <(string[] files, long rows, ByteSize size)> CopyInto(LoggedConnection db, string table, StageTableCfg t) { var startTime = await db.ExecuteScalar <string>("current time", "select current_timestamp()::string"); var sql = $"copy into {table} from @{Cfg.Stage}/{StorageCfg.DbPath}/{t.Dir}/ file_format=(type=json)"; await db.Execute("copy into", sql); // sf should return this info form copy_into (its in their UI, but not in .net or jdbc drivers) // the int that is return is the # of rows form the first file loaded. So we go get this ourselves var copyResults = await db.Query <(string fileName, long rows, long size)>("copy results", "select file_name, row_count, file_size " + $"from table(information_schema.copy_history(table_name=>'{table}', start_time=>'{startTime}'::timestamp_ltz))"); var res = (copyResults.Select(r => r.fileName).ToArray(), copyResults.Sum(r => r.rows), copyResults.Sum(r => r.size).Bytes()); return(res); }