Exemplo n.º 1
0
        async Task <ByteSize> LoadBLobData(SyncTableCfg tableCfg, ILogger log, string loadId, string sourceSql, object maxTs, TableId loadTable)
        {
            var path        = StringPath.Relative("sync", tableCfg.Name, loadId);
            var copyTask    = Source.CopyTo(path, sourceSql, tableCfg, maxTs);
            var loadedFiles = new KeyedCollection <StringPath, FileListItem>(f => f.Path);

            while (true) // load as the files are created
            {
                if (copyTask.IsFaulted)
                {
                    break;
                }
                var toLoad = (await Store.List(path).SelectManyList())
                             .Where(f => !loadedFiles.ContainsKey(f.Path)).ToArray();
                if (toLoad.None())
                {
                    if (copyTask.IsCompleted)
                    {
                        break;
                    }
                    await 5.Seconds().Delay();
                    continue;
                }
                log.Debug("Sync {Table} - loading: {Files}", tableCfg.Name, toLoad.Join("|", l => l.Path.ToString()));
                await Dest.LoadFrom(toLoad.Select(f => f.Path), loadTable);

                loadedFiles.AddRange(toLoad);
                await toLoad.BlockAction(f => Store.Delete(f.Path, log), parallel : 8);
            }

            log.Information("Sync {Table} - copied {Files} files ({Size})", tableCfg.Name, loadedFiles.Count, loadedFiles.Sum(f => f.Bytes).Bytes().Humanize("#,#"));
            return(loadedFiles.Sum(f => f.Bytes).Bytes());
        }
Exemplo n.º 2
0
        public async Task <DbDataReader> Read(TableId table, SyncTableCfg tableCfg, object tsValue = null, int limit = 0)
        {
            var colList = tableCfg.SelectedCols.Any()
        ? tableCfg.SelectedCols
                          .Concat(tableCfg.TsCol, tableCfg.IdCol).NotNull().Distinct() // always include the special cols if they are specified
                          .Select(Sql)
        : new[] { "*" };
            var selectSql   = $"select {colList.Join(", ")} from {Sql(table)}";
            var incremental = tsValue != null && tsValue.GetType().DefaultForType() != tsValue;
            var whereParts  = new List <string>();

            if (incremental)
            {
                whereParts.Add($"{Sql(tableCfg.TsCol ?? throw new InvalidOperationException("tsValue specified without a column"))} > :maxTs");
            }
            if (tableCfg.Filter.HasValue())
            {
                whereParts.Add($"({tableCfg.Filter})");
            }
            var orderBySql = incremental ? $"order by {tableCfg.TsCol} asc" : null;
            var limitSql   = limit == 0 ? null : " limit :limit";
            var sql        = new[] {
                selectSql,
                whereParts.None() ? null : "where " + whereParts.Join(" and \n\t"),
                orderBySql,
                limitSql
            }.NotNull().Join("\n");

            return(await Connection.ExecuteReader(nameof(Read), sql, new { maxTs = tsValue, limit }));
        }
Exemplo n.º 3
0
        public async Task CopyTo(string path, string selectSql, SyncTableCfg tableCfg, object tsValue = null, int?limit = null)
        {
            var sql     = SelectSql(selectSql, tableCfg, tsValue, limit);
            var copySql = $@"copy into @{StageName}/{path}/ from ({sql})
file_format = (TYPE=CSV, COMPRESSION=NONE, FIELD_OPTIONALLY_ENCLOSED_BY ='""', RECORD_DELIMITER ='\r\n', NULL_IF=(''))
MAX_FILE_SIZE = {FileSize.Bytes:#}
      ";
            await Conn.Execute(nameof(CopyTo), copySql);
        }
Exemplo n.º 4
0
        async Task CreateDestTable(TableId destTable, SyncTableCfg tableCfg, TableSchema destSchema, ILogger log)
        {
            await Dest.CreateTable(destSchema, destTable, tableCfg.ColStore);

            var descColCfgs = destSchema.Columns.Select(c => tableCfg.Cols[c.ColumnName]).ToArray();
            await descColCfgs.Where(c => c?.Index == true)
            .BlockAction(c => Dest.CreateIndex(destTable, new[] { c.Name }));


            log.Debug("Sync {Table} - created because it didn't exist", tableCfg.Name);
        }
Exemplo n.º 5
0
        async Task CreateDestTable(TableId destTable, SyncTableCfg tableCfg, TableSchema destSchema, ILogger log)
        {
            await Dest.CreateTable(destSchema, destTable, tableCfg.ColStore);

            var descColCfgs = destSchema.Columns.Select(c => tableCfg.Cols[c.ColumnName]).ToArray();
            await descColCfgs.Where(c => c?.Index == true)
            .BlockAction(c => Dest.CreateIndex(destTable, IndexType.Default, new[] { c.Name }));

            var fullTextCols = descColCfgs.Where(c => c?.FullText == true).ToArray();

            if (fullTextCols.Any())
            {
                await Dest.CreateIndex(destTable, IndexType.FullText, fullTextCols.Select(t => t.Name).ToArray());
            }

            log.Debug("{Table} - created because it didn't exist", tableCfg.Name);
        }
Exemplo n.º 6
0
        string SelectSql(string selectSql, SyncTableCfg tableCfg, object tsValue, int?limit)
        {
            var incremental = tsValue != null && tsValue.GetType().DefaultForType() != tsValue;
            var whereParts  = new List <string>();

            if (incremental)
            {
                whereParts.Add($"{Sql(tableCfg.TsCol ?? throw new InvalidOperationException("tsValue specified without a column"))} > :maxTs");
            }
            if (tableCfg.Filter.HasValue())
            {
                whereParts.Add($"({tableCfg.Filter})");
            }
            var orderBySql = incremental ? $"order by {tableCfg.TsCol} asc" : null;
            var limitSql   = limit == null ? null : " limit :limit";
            var sql        = new[] {
                selectSql,
                whereParts.None() ? null : "where " + whereParts.Join(" and \n\t"),
                orderBySql,
                limitSql
            }.NotNull().Join("\n");

            return(sql);
        }
Exemplo n.º 7
0
        /// <summary>Legacy bulk copy, tmp table switching version</summary>
        public async Task UpdateTable(SyncTableCfg tableCfg, ILogger log, CancellationToken cancel, bool fullLoad = false, int limit = 0,
                                      SyncMode mode = SyncMode.Blob)
        {
            var sw = Stopwatch.StartNew();


            await Dest.Init(Store.ContainerUrl);

            var sourceSql = tableCfg.Sql ?? $"select * from {Source.DefaultSchema}.tableCfg.Name";

            var destTable  = new TableId(Dest.DefaultSchema, tableCfg.Name);
            var destSchema = await Dest.Schema(destTable);

            var destExists = destSchema != null;

            var syncType = fullLoad || destSchema == null ? SyncType.Full : tableCfg.SyncType;

            if (syncType != SyncType.Full &&
                destSchema?.Columns.Any(c => c.ColumnName.Equals(tableCfg.TsCol, StringComparison.InvariantCultureIgnoreCase)) == false)
            {
                syncType = SyncType.Full;
            }
            if (syncType.IsIncremental() && tableCfg.TsCol.NullOrEmpty())
            {
                throw new InvalidOperationException("table configured for incremental, but no ts column was found");
            }
            var maxTs = syncType.IsIncremental()
        ? await Dest.Conn.ExecuteScalar <object>(nameof(UpdateTable), $"select max({Dest.Sql(tableCfg.TsCol)}) from {Dest.Sql(destTable)}")
        : null;

            // start reading and get schema. if we are blowwing, do this to get the schema without loading any rows
            using var reader = await Source.Read(sourceSql, tableCfg, maxTs, mode == SyncMode.Blob? 0 : limit);

            var querySchema = reader.Schema();

            destSchema ??= querySchema; // if there is no final destination schema, then it should match the source
            // apply overrides to dest schema
            destSchema = new TableSchema(destSchema.Columns.Select(c => {
                var cfg = tableCfg.Cols[c.ColumnName];
                return(new ColumnSchema(c.ColumnName, c.DataType)
                {
                    ProviderTypeExpression = cfg?.SqlType,
                    Key = cfg?.Id,
                    AllowDBNull = cfg?.Null
                });
            }));

            // create table if not exists
            if (!destExists)
            {
                await CreateDestTable(destTable, tableCfg, destSchema, log);
            }

            // prepare tmp table if required
            var tmpTable  = destTable.WithTable($"{destTable.Table}_tmp");
            var loadTable = destExists ? tmpTable : destTable;

            if (loadTable == tmpTable)
            {
                await CreateTmpTable(tmpTable, querySchema);
            }

            // copy data
            var newRows  = 0L;
            var newBytes = 0.Bytes();
            var loadId   = DateTime.UtcNow.FileSafeTimestamp();

            if (mode == SyncMode.Blob)
            {
                newBytes += await LoadBLobData(tableCfg, log, loadId, sourceSql, maxTs, loadTable);
            }
            else
            {
                newRows = await Dest.BulkCopy(reader, loadTable, log, cancel);

                log.Debug("Sync {Table} - loaded {Rows} into {LoadTable} ({SyncType})", tableCfg.Name, newRows, loadTable, syncType);
            }

            // if we loaded in to temp table, work out best way to switch this in without downtime
            if (loadTable == tmpTable)
            {
                if (newRows == 0 && newBytes == 0.Bytes())
                {
                    await Dest.DropTable(tmpTable);                         // no new rows, nothing to do
                }
                else if (syncType.IsIncremental() || tableCfg.ManualSchema) // incremental load, or manual schema. Move the rows into the desitntion table
                {
                    var cols     = destSchema.Columns;
                    var mergeRes = await Dest.Merge(destTable, tmpTable, tableCfg.IdCols, cols);

                    log.Debug("Sync {Table} - merged {Records} from {TempTable}", tableCfg.Name, mergeRes, tmpTable);
                    await Dest.DropTable(tmpTable);
                }
                else
                {
                    // there may be moments where the table dissapears.I removed the transaction to get past this error: BeginExecuteNonQuery requires the command to have a transaction when the connection assigned to the command is in a pending local transaction.  The Transaction property of the command has not been initialized.
                    //using (var trans = await Dest.Conn.Conn.BeginTransactionAsync(IsolationLevel.ReadUncommitted, cancel)) {
                    await Dest.DropTable(destTable);

                    await Dest.RenameTable(tmpTable, destTable);

                    /*await trans.CommitAsync();
                     * }*/
                    log.Debug("Sync {Table} - switch out temp table {TempTable}", tableCfg.Name, tmpTable);
                }
            }

            log.Information("Sync {Table} - completed loading {Size} in {Duration}",
                            tableCfg.Name, newBytes > 0.Bytes() ? newBytes.Humanize("#,#") : newRows.ToString("#,#"), sw.Elapsed.HumanizeShort());
        }
Exemplo n.º 8
0
        public async Task <DbDataReader> Read(string selectSql, SyncTableCfg tableCfg, object tsValue = null, int?limit = null)
        {
            var sql = SelectSql(selectSql, tableCfg, tsValue, limit);

            return(await Conn.ExecuteReader(nameof(Read), sql, new { maxTs = tsValue, limit }));
        }
Exemplo n.º 9
0
        public async Task UpdateTable(SyncTableCfg tableCfg, ILogger log, bool fullLoad = false, int limit = 0)
        {
            var sw          = Stopwatch.StartNew();
            var sourceTable = new TableId(Source.DefaultSchema, tableCfg.Name);
            var destTable   = new TableId(Dest.DefaultSchema, tableCfg.Name);
            var destSchema  = await Dest.Schema(destTable);

            var destExists = destSchema != null;

            var syncType = fullLoad || destSchema == null ? SyncType.Full : tableCfg.SyncType;

            if (syncType != SyncType.Full &&
                destSchema?.Columns.Any(c => c.ColumnName.Equals(tableCfg.TsCol, StringComparison.InvariantCultureIgnoreCase)) == false)
            {
                syncType = SyncType.Full;
            }
            if (syncType.IsIncremental() && tableCfg.TsCol.NullOrEmpty())
            {
                throw new InvalidOperationException("table configured for incremental, but no ts column was found");
            }
            var maxTs = syncType.IsIncremental()
        ? await Dest.Connection.ExecuteScalar <object>(nameof(UpdateTable), $"select max({Dest.Sql(tableCfg.TsCol)}) from {Dest.Sql(destTable)}")
        : null;

            // start reading and get schema
            using var reader = await Source.Read(sourceTable, tableCfg, maxTs, limit);

            var querySchema = reader.Schema();

            destSchema ??= querySchema; // if there is no final destination schema, then it should match the source
            // apply overrides to dest schema
            destSchema = new TableSchema(destSchema.Columns.Select(c => {
                var cfg = tableCfg.Cols[c.ColumnName];
                return(new ColumnSchema(c.ColumnName, c.DataType)
                {
                    ProviderTypeExpression = cfg?.TypeOverride,
                    Key = cfg?.Id,
                    AllowDBNull = cfg?.Null
                });
            }));

            // create table if not exists
            if (!destExists)
            {
                await CreateDestTable(destTable, tableCfg, destSchema, log);
            }

            // prepare tmp table if required
            var tmpTable  = destTable.WithTable($"{destTable.Table}_tmp");
            var loadTable = destExists ? tmpTable : destTable;

            if (loadTable == tmpTable)
            {
                await CreateTmpTable(tmpTable, querySchema);
            }

            // copy data
            var newRows = await Dest.BulkCopy(reader, loadTable, log);

            log.Debug("{Table} - loaded {Rows} into {LoadTable} ({SyncType})", tableCfg.Name, newRows, loadTable, syncType);

            // if we loaded in to temp table, work out best way to switch this in without downtime
            if (loadTable == tmpTable)
            {
                if (newRows == 0)
                {
                    await Dest.DropTable(tmpTable);                         // no new rows, nothing to do
                }
                else if (syncType.IsIncremental() || tableCfg.ManualSchema) // incremental load, or manual schema. Move the rows into the desitntion table
                {
                    var cols     = destSchema.Columns;
                    var mergeRes = await Dest.Merge(destTable, tmpTable, tableCfg.IdCol, cols);

                    log.Debug("{Table} - merged {Records} from {TempTable}", tableCfg.Name, mergeRes, tmpTable);
                    await Dest.DropTable(tmpTable);
                }
                else
                {
                    using (var trans = Dest.Connection.Conn.BeginTransaction()) {
                        await Dest.DropTable(destTable, trans);

                        await Dest.RenameTable(tmpTable, destTable, trans);

                        await trans.CommitAsync();

                        log.Debug("{Table} - switch out temp table {TempTable}", tableCfg.Name, tmpTable);
                    }
                }
            }

            log.Information("{Table} - completed loading {Rows} in {Duration}", tableCfg.Name, newRows, sw.Elapsed.HumanizeShort());
        }