public async Task SyncDb(SyncDbCfg cfg, ILogger log, IReadOnlyCollection <string> tables = null, bool fullLoad = false, int optionLimit = 0) { var toRun = cfg.Tables.Where(t => tables == null || !tables.Any() || tables.Contains(t.Name)).ToArray(); var dur = await toRun.BlockAction(async t => { var tableLog = log.ForContext("Table", t.Name); tableLog.Information("Table Sync {Table} - started", t.Name); using var sourceConn = await Snowflake.OpenConnection(log); using var destConn = await SqlServerCfg.OpenConnection(tableLog); var sync = new DbSync( new SnowflakeSourceDb((SnowflakeDbConnection)sourceConn.Conn, Snowflake.Cfg.Schema, tableLog), new MsSqlDestDb(destConn, SqlServerCfg.DefaultSchema, t.FullTextCatalog, tableLog)); var res = await sync.UpdateTable(t, tableLog, fullLoad, optionLimit) .WithWrappedException($"sync table '{t.Name}'", tableLog) // log the error and rethrow. Won't interrupt untill other sync have completed .WithDuration(); tableLog.Information("Talbe sync {Table} - completed in {Duration}", t.Name, res.HumanizeShort()); }, cfg.Parallel).WithDuration(); log.Information("Completed loading {Tables} in {Duration}", toRun.Select(t => t.Name), dur.Duration.HumanizeShort()); }
public async Task SyncDb(ILogger log, CancellationToken cancel, IReadOnlyCollection <string> restrictTables = null, bool fullLoad = false, int optionLimit = 0) { var tables = new[] { new SyncTableCfg("video_stats", new SyncColCfg("video_id") { Id = true, SqlType = "varchar(20)" }, // this is a big table, so optimising for perf with types more than normal new SyncColCfg("date") { Id = true, SqlType = "date" }, new SyncColCfg("channel_id") { SqlType = "varchar(30)" }, new SyncColCfg("views") { SqlType = "float" }, new SyncColCfg("watch_hours") { SqlType = "float" }, new SyncColCfg("tags") { SqlType = "varchar(1000)" }, new SyncColCfg("lr") { SqlType = "varchar(10)" } ) { Sql = @"select d.*, array_to_string(cl.tags, '|') as tags, cl.lr from video_stats_daily d inner join channel_latest cl on d.channel_id = cl.channel_id" } }; var toRun = tables.Where(t => restrictTables == null || !restrictTables.Any() || restrictTables.Contains(t.Name)).ToArray(); var dur = await toRun.BlockAction(async t => { var tableLog = log.ForContext("Table", t.Name); tableLog.Information("Table Sync {Table} - started", t.Name); using var sourceConn = await Snowflake.OpenConnection(log); using var destConn = await SqlServerCfg.OpenConnection(tableLog); var sync = new DbSync( new SnowflakeSourceDb(sourceConn.Conn, Snowflake.Cfg.Schema, WhCfg.Stage, WhCfg.FileMb.Megabytes(), tableLog), new MsSqlDestDb(destConn.Conn, Version.Prerelease.HasValue() ? Version.Prerelease : SqlServerCfg.DefaultSchema, tableLog), Store); var res = await sync.UpdateTable(t, tableLog, cancel, fullLoad, optionLimit) .WithWrappedException($"sync table '{t.Name}'", tableLog) // log the error and rethrow. Won't interrupt untill other sync have completed .WithDuration(); tableLog.Information("Table sync {Table} - completed in {Duration}", t.Name, res.HumanizeShort()); }, Cfg.Parallel).WithDuration(); log.Information("Completed loading {Tables} in {Duration}", toRun.Select(t => t.Name), dur.Duration.HumanizeShort()); }