async Task <ByteSize> LoadBLobData(SyncTableCfg tableCfg, ILogger log, string loadId, string sourceSql, object maxTs, TableId loadTable) { var path = StringPath.Relative("sync", tableCfg.Name, loadId); var copyTask = Source.CopyTo(path, sourceSql, tableCfg, maxTs); var loadedFiles = new KeyedCollection <StringPath, FileListItem>(f => f.Path); while (true) // load as the files are created { if (copyTask.IsFaulted) { break; } var toLoad = (await Store.List(path).SelectManyList()) .Where(f => !loadedFiles.ContainsKey(f.Path)).ToArray(); if (toLoad.None()) { if (copyTask.IsCompleted) { break; } await 5.Seconds().Delay(); continue; } log.Debug("Sync {Table} - loading: {Files}", tableCfg.Name, toLoad.Join("|", l => l.Path.ToString())); await Dest.LoadFrom(toLoad.Select(f => f.Path), loadTable); loadedFiles.AddRange(toLoad); await toLoad.BlockAction(f => Store.Delete(f.Path, log), parallel : 8); } log.Information("Sync {Table} - copied {Files} files ({Size})", tableCfg.Name, loadedFiles.Count, loadedFiles.Sum(f => f.Bytes).Bytes().Humanize("#,#")); return(loadedFiles.Sum(f => f.Bytes).Bytes()); }
public async Task <DbDataReader> Read(TableId table, SyncTableCfg tableCfg, object tsValue = null, int limit = 0) { var colList = tableCfg.SelectedCols.Any() ? tableCfg.SelectedCols .Concat(tableCfg.TsCol, tableCfg.IdCol).NotNull().Distinct() // always include the special cols if they are specified .Select(Sql) : new[] { "*" }; var selectSql = $"select {colList.Join(", ")} from {Sql(table)}"; var incremental = tsValue != null && tsValue.GetType().DefaultForType() != tsValue; var whereParts = new List <string>(); if (incremental) { whereParts.Add($"{Sql(tableCfg.TsCol ?? throw new InvalidOperationException("tsValue specified without a column"))} > :maxTs"); } if (tableCfg.Filter.HasValue()) { whereParts.Add($"({tableCfg.Filter})"); } var orderBySql = incremental ? $"order by {tableCfg.TsCol} asc" : null; var limitSql = limit == 0 ? null : " limit :limit"; var sql = new[] { selectSql, whereParts.None() ? null : "where " + whereParts.Join(" and \n\t"), orderBySql, limitSql }.NotNull().Join("\n"); return(await Connection.ExecuteReader(nameof(Read), sql, new { maxTs = tsValue, limit })); }
public async Task CopyTo(string path, string selectSql, SyncTableCfg tableCfg, object tsValue = null, int?limit = null) { var sql = SelectSql(selectSql, tableCfg, tsValue, limit); var copySql = $@"copy into @{StageName}/{path}/ from ({sql}) file_format = (TYPE=CSV, COMPRESSION=NONE, FIELD_OPTIONALLY_ENCLOSED_BY ='""', RECORD_DELIMITER ='\r\n', NULL_IF=('')) MAX_FILE_SIZE = {FileSize.Bytes:#} "; await Conn.Execute(nameof(CopyTo), copySql); }
async Task CreateDestTable(TableId destTable, SyncTableCfg tableCfg, TableSchema destSchema, ILogger log) { await Dest.CreateTable(destSchema, destTable, tableCfg.ColStore); var descColCfgs = destSchema.Columns.Select(c => tableCfg.Cols[c.ColumnName]).ToArray(); await descColCfgs.Where(c => c?.Index == true) .BlockAction(c => Dest.CreateIndex(destTable, new[] { c.Name })); log.Debug("Sync {Table} - created because it didn't exist", tableCfg.Name); }
async Task CreateDestTable(TableId destTable, SyncTableCfg tableCfg, TableSchema destSchema, ILogger log) { await Dest.CreateTable(destSchema, destTable, tableCfg.ColStore); var descColCfgs = destSchema.Columns.Select(c => tableCfg.Cols[c.ColumnName]).ToArray(); await descColCfgs.Where(c => c?.Index == true) .BlockAction(c => Dest.CreateIndex(destTable, IndexType.Default, new[] { c.Name })); var fullTextCols = descColCfgs.Where(c => c?.FullText == true).ToArray(); if (fullTextCols.Any()) { await Dest.CreateIndex(destTable, IndexType.FullText, fullTextCols.Select(t => t.Name).ToArray()); } log.Debug("{Table} - created because it didn't exist", tableCfg.Name); }
string SelectSql(string selectSql, SyncTableCfg tableCfg, object tsValue, int?limit) { var incremental = tsValue != null && tsValue.GetType().DefaultForType() != tsValue; var whereParts = new List <string>(); if (incremental) { whereParts.Add($"{Sql(tableCfg.TsCol ?? throw new InvalidOperationException("tsValue specified without a column"))} > :maxTs"); } if (tableCfg.Filter.HasValue()) { whereParts.Add($"({tableCfg.Filter})"); } var orderBySql = incremental ? $"order by {tableCfg.TsCol} asc" : null; var limitSql = limit == null ? null : " limit :limit"; var sql = new[] { selectSql, whereParts.None() ? null : "where " + whereParts.Join(" and \n\t"), orderBySql, limitSql }.NotNull().Join("\n"); return(sql); }
/// <summary>Legacy bulk copy, tmp table switching version</summary> public async Task UpdateTable(SyncTableCfg tableCfg, ILogger log, CancellationToken cancel, bool fullLoad = false, int limit = 0, SyncMode mode = SyncMode.Blob) { var sw = Stopwatch.StartNew(); await Dest.Init(Store.ContainerUrl); var sourceSql = tableCfg.Sql ?? $"select * from {Source.DefaultSchema}.tableCfg.Name"; var destTable = new TableId(Dest.DefaultSchema, tableCfg.Name); var destSchema = await Dest.Schema(destTable); var destExists = destSchema != null; var syncType = fullLoad || destSchema == null ? SyncType.Full : tableCfg.SyncType; if (syncType != SyncType.Full && destSchema?.Columns.Any(c => c.ColumnName.Equals(tableCfg.TsCol, StringComparison.InvariantCultureIgnoreCase)) == false) { syncType = SyncType.Full; } if (syncType.IsIncremental() && tableCfg.TsCol.NullOrEmpty()) { throw new InvalidOperationException("table configured for incremental, but no ts column was found"); } var maxTs = syncType.IsIncremental() ? await Dest.Conn.ExecuteScalar <object>(nameof(UpdateTable), $"select max({Dest.Sql(tableCfg.TsCol)}) from {Dest.Sql(destTable)}") : null; // start reading and get schema. if we are blowwing, do this to get the schema without loading any rows using var reader = await Source.Read(sourceSql, tableCfg, maxTs, mode == SyncMode.Blob? 0 : limit); var querySchema = reader.Schema(); destSchema ??= querySchema; // if there is no final destination schema, then it should match the source // apply overrides to dest schema destSchema = new TableSchema(destSchema.Columns.Select(c => { var cfg = tableCfg.Cols[c.ColumnName]; return(new ColumnSchema(c.ColumnName, c.DataType) { ProviderTypeExpression = cfg?.SqlType, Key = cfg?.Id, AllowDBNull = cfg?.Null }); })); // create table if not exists if (!destExists) { await CreateDestTable(destTable, tableCfg, destSchema, log); } // prepare tmp table if required var tmpTable = destTable.WithTable($"{destTable.Table}_tmp"); var loadTable = destExists ? tmpTable : destTable; if (loadTable == tmpTable) { await CreateTmpTable(tmpTable, querySchema); } // copy data var newRows = 0L; var newBytes = 0.Bytes(); var loadId = DateTime.UtcNow.FileSafeTimestamp(); if (mode == SyncMode.Blob) { newBytes += await LoadBLobData(tableCfg, log, loadId, sourceSql, maxTs, loadTable); } else { newRows = await Dest.BulkCopy(reader, loadTable, log, cancel); log.Debug("Sync {Table} - loaded {Rows} into {LoadTable} ({SyncType})", tableCfg.Name, newRows, loadTable, syncType); } // if we loaded in to temp table, work out best way to switch this in without downtime if (loadTable == tmpTable) { if (newRows == 0 && newBytes == 0.Bytes()) { await Dest.DropTable(tmpTable); // no new rows, nothing to do } else if (syncType.IsIncremental() || tableCfg.ManualSchema) // incremental load, or manual schema. Move the rows into the desitntion table { var cols = destSchema.Columns; var mergeRes = await Dest.Merge(destTable, tmpTable, tableCfg.IdCols, cols); log.Debug("Sync {Table} - merged {Records} from {TempTable}", tableCfg.Name, mergeRes, tmpTable); await Dest.DropTable(tmpTable); } else { // there may be moments where the table dissapears.I removed the transaction to get past this error: BeginExecuteNonQuery requires the command to have a transaction when the connection assigned to the command is in a pending local transaction. The Transaction property of the command has not been initialized. //using (var trans = await Dest.Conn.Conn.BeginTransactionAsync(IsolationLevel.ReadUncommitted, cancel)) { await Dest.DropTable(destTable); await Dest.RenameTable(tmpTable, destTable); /*await trans.CommitAsync(); * }*/ log.Debug("Sync {Table} - switch out temp table {TempTable}", tableCfg.Name, tmpTable); } } log.Information("Sync {Table} - completed loading {Size} in {Duration}", tableCfg.Name, newBytes > 0.Bytes() ? newBytes.Humanize("#,#") : newRows.ToString("#,#"), sw.Elapsed.HumanizeShort()); }
public async Task <DbDataReader> Read(string selectSql, SyncTableCfg tableCfg, object tsValue = null, int?limit = null) { var sql = SelectSql(selectSql, tableCfg, tsValue, limit); return(await Conn.ExecuteReader(nameof(Read), sql, new { maxTs = tsValue, limit })); }
public async Task UpdateTable(SyncTableCfg tableCfg, ILogger log, bool fullLoad = false, int limit = 0) { var sw = Stopwatch.StartNew(); var sourceTable = new TableId(Source.DefaultSchema, tableCfg.Name); var destTable = new TableId(Dest.DefaultSchema, tableCfg.Name); var destSchema = await Dest.Schema(destTable); var destExists = destSchema != null; var syncType = fullLoad || destSchema == null ? SyncType.Full : tableCfg.SyncType; if (syncType != SyncType.Full && destSchema?.Columns.Any(c => c.ColumnName.Equals(tableCfg.TsCol, StringComparison.InvariantCultureIgnoreCase)) == false) { syncType = SyncType.Full; } if (syncType.IsIncremental() && tableCfg.TsCol.NullOrEmpty()) { throw new InvalidOperationException("table configured for incremental, but no ts column was found"); } var maxTs = syncType.IsIncremental() ? await Dest.Connection.ExecuteScalar <object>(nameof(UpdateTable), $"select max({Dest.Sql(tableCfg.TsCol)}) from {Dest.Sql(destTable)}") : null; // start reading and get schema using var reader = await Source.Read(sourceTable, tableCfg, maxTs, limit); var querySchema = reader.Schema(); destSchema ??= querySchema; // if there is no final destination schema, then it should match the source // apply overrides to dest schema destSchema = new TableSchema(destSchema.Columns.Select(c => { var cfg = tableCfg.Cols[c.ColumnName]; return(new ColumnSchema(c.ColumnName, c.DataType) { ProviderTypeExpression = cfg?.TypeOverride, Key = cfg?.Id, AllowDBNull = cfg?.Null }); })); // create table if not exists if (!destExists) { await CreateDestTable(destTable, tableCfg, destSchema, log); } // prepare tmp table if required var tmpTable = destTable.WithTable($"{destTable.Table}_tmp"); var loadTable = destExists ? tmpTable : destTable; if (loadTable == tmpTable) { await CreateTmpTable(tmpTable, querySchema); } // copy data var newRows = await Dest.BulkCopy(reader, loadTable, log); log.Debug("{Table} - loaded {Rows} into {LoadTable} ({SyncType})", tableCfg.Name, newRows, loadTable, syncType); // if we loaded in to temp table, work out best way to switch this in without downtime if (loadTable == tmpTable) { if (newRows == 0) { await Dest.DropTable(tmpTable); // no new rows, nothing to do } else if (syncType.IsIncremental() || tableCfg.ManualSchema) // incremental load, or manual schema. Move the rows into the desitntion table { var cols = destSchema.Columns; var mergeRes = await Dest.Merge(destTable, tmpTable, tableCfg.IdCol, cols); log.Debug("{Table} - merged {Records} from {TempTable}", tableCfg.Name, mergeRes, tmpTable); await Dest.DropTable(tmpTable); } else { using (var trans = Dest.Connection.Conn.BeginTransaction()) { await Dest.DropTable(destTable, trans); await Dest.RenameTable(tmpTable, destTable, trans); await trans.CommitAsync(); log.Debug("{Table} - switch out temp table {TempTable}", tableCfg.Name, tmpTable); } } } log.Information("{Table} - completed loading {Rows} in {Duration}", tableCfg.Name, newRows, sw.Elapsed.HumanizeShort()); }