public void CanGenerateStagingTables() { var b = StagingTablesBuilder.Get("dummy"); var tables = b.GetTableNames(); Assert.IsTrue(tables.Any()); }
private static void CheckStagingTablesListPrimaryKeysFirst(Dictionary <string, PrimaryKeyInfo> pkInfoList) { // it's important that all staging tables list the primary keys first because this is assumed // by the algorithm that performs table diffs. var b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName); foreach (var table in b.GetTables(includePseudoRegisterMarkTable: false)) { if (!table.Name.Equals("CT_CONFIG", StringComparison.OrdinalIgnoreCase)) { PrimaryKeyInfo pkInfo = pkInfoList[table.Name]; if (pkInfo != null) { var stagingTableCols = table.Columns; // deduct 1 for src_timetable_id if (stagingTableCols.Count - 1 < pkInfo.Columns.Count) { throw new ApplicationException( $"Staging table contains fewer columns than primary key of source: {table.Name}"); } for (int n = 0; n < pkInfo.Columns.Count; ++n) { if (!pkInfo.Columns[n].ColumnName.Equals(stagingTableCols[n + 1].Name, StringComparison.OrdinalIgnoreCase)) { throw new ApplicationException( $"Incorrect staging table column order: {table.Name} (primary key columns must be defined first)"); } } } } } }
private void InternalCreateEmptyStagingTables() { EnsureSchemaCreated(); var builder = StagingTablesBuilder.Get(_schemaName); builder.Execute(ConnectionString, TimeoutSecs); }
public void StagingColumnsAreAllNullable() { // in the staging tables we leave all cols as nullable var b = StagingTablesBuilder.Get("dummy"); var tables = b.GetTables(); foreach (var t in tables) { var cols = t.Columns; foreach (var col in cols) { Assert.IsTrue(col.Nullable == ColumnNullable.True, "Table = {0}, Col = {1}", t.Name, col.Name); } } }
// is the source timetable schema compatible with CTDS (i.e. with the staging area)? public SourceCompatibilityReport GetCompatibilityWithStagingSchema() { _log.DebugFormat("Generating compatibility report for source timetable: {0}", _connectionDescription); var result = new SourceCompatibilityReport(_connectionDescription); var latestSchema = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName); // first check for the existence of all tables... var tablesInStage = latestSchema.GetTableNames(); var tablesInSource = GetTables(); GetMissingTablesAndColumns(result, latestSchema, tablesInStage, tablesInSource); GetExtraTablesAndColumns(result, latestSchema, tablesInStage, tablesInSource); return(result); }
private void DoParallelConsolidationProcessing(FederationSchema fs) { List <Entity> entities = Enum.GetValues(typeof(Entity)).Cast <object>().Cast <Entity>().ToList(); var b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName); var pOptions = new ParallelOptions { MaxDegreeOfParallelism = _configuration.MaxDegreeOfParallelism }; Parallel.ForEach(entities, pOptions, (e, loopState) => { if (!loopState.IsExceptional && EntityUtils.CanParticipateInConsolidation(e)) { var entry = _configuration.Consolidation.Get(e); fs.UpdateConsolidationTables(e, b.GetTable(EntityUtils.ToCtTableName(e)), entry); } }); }
private static void CheckIdentityColumns(IReadOnlyList <SourceTimetableData> timetables, int commandTimeoutSecs) { _log.Debug("Checking primary keys"); var b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName); var pkInfoForAllTimetables = new List <Dictionary <string, PrimaryKeyInfo> >(); foreach (var tt in timetables) { var pkInfo = DatabaseUtils.GetPrimaryKeyInfo(tt.ConnectionString, commandTimeoutSecs); pkInfoForAllTimetables.Add(pkInfo); } if (pkInfoForAllTimetables.Any()) { CheckSamePrimaryKeyColumns(pkInfoForAllTimetables, b); CheckStagingTablesListPrimaryKeysFirst(pkInfoForAllTimetables[0]); CheckEntitiesHaveSinglePrimaryKeyId(pkInfoForAllTimetables[0]); } }
private void DoParallelFederationProcessing() { var fs = new FederationSchema( AdminConnectionString, Timeouts.AdminDatabase, _configuration.MaxDegreeOfParallelism, _configuration.Pipelines); List <Entity> entities = Enum.GetValues(typeof(Entity)).Cast <object>().Cast <Entity>().ToList(); var b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName); var pOptions = new ParallelOptions { MaxDegreeOfParallelism = _configuration.MaxDegreeOfParallelism }; Parallel.ForEach(entities, pOptions, (e, loopState) => { if (!loopState.IsExceptional && EntityUtils.RequiresFederation(e)) { fs.UpdateStdFederationTable(e, b.GetTable(EntityUtils.ToCtTableName(e))); } }); }
/// <summary> /// Checks that the column names specified in the configuration, consolidation /// section are valid for the given entity /// </summary> public static void CheckNaturalKeyColumnsInConfiguration(ConsolidationParams consolidationParams) { _log.Debug("Checking that consolidation key columns are valid in configuration"); StagingTablesBuilder b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName); if (consolidationParams.Enabled) { var entitiesUsed = new List <Entity>(); foreach (Entity entity in Enum.GetValues(typeof(Entity))) { if (EntityUtils.CanParticipateInConsolidation(entity)) { if (entitiesUsed.Contains(entity)) { throw new ApplicationException(string.Format("Entity declared more than once in consolidation configuration: {0}", entity)); } entitiesUsed.Add(entity); var entry = consolidationParams.Get(entity); if (entry != null && !entry.None) { string stagingTableName = EntityUtils.ToCtTableName(entity); var table = b.GetTable(stagingTableName); if (!table.ColumnExists(entry.Column)) { throw new ApplicationException(string.Format("The specified consolidation column ({0}) does not exist in the entity: {1}", entry.Column, entity)); } } } } } }
private bool AllStagingTablesExist() { var b = StagingTablesBuilder.Get(_schemaName); return(DoParallelProcessing(b)); }
private RowCountAndDuration ExtractTimetablesToStage(IReadOnlyList <SourceTimetableData> srcTimetableRecs, string stageSchemaName) { var stats = new RowCountAndDuration(); var b = StagingTablesBuilder.Get(stageSchemaName); var tables = b.GetTables(); var cs = new ControlSchema(AdminConnectionString, Timeouts.AdminDatabase, _configuration.MaxDegreeOfParallelism, _configuration.Pipelines); // don't use Parallel.ForEach here (no gain) var processedTimetables = new HashSet <int>(); foreach (var tt in srcTimetableRecs) { _log.DebugFormat("Extracting timetable ({0}) to stage ({1})", tt.Name, stageSchemaName); var ttRec = cs.GetSourceTimetableRecord(tt.Identifier); if (ttRec == null) { throw new ApplicationException(string.Format("Could not find source timetable registration: {0}", tt.Name)); } // sanity check... if (processedTimetables.Contains(ttRec.Id)) { throw new ApplicationException(string.Format("Already processed a timetable with this Id: {0}", ttRec.Id)); } processedTimetables.Add(ttRec.Id); // don't use Parallel.ForEach here (no gain) foreach (var t in tables) { var stagingTable = (V7StagingTable)t; using (var p = new StagingEtlProcess( tt.ConnectionString, AdminConnectionString, stagingTable, stageSchemaName, Timeouts, ttRec.Id, _configuration.Pipelines)) { p.Execute(); stats += p.Stats; var errors = p.GetAllErrors().ToArray(); if (errors.Any()) { var msg = $"Errors occurred during execution of staging process: {stagingTable.Name}"; _log.Error(msg); // throw the first exception throw new ApplicationException(msg, errors[0]); } } } } return(stats); }
private void DoParallelProcessing(HistoryTablesBuilder b, Dictionary <string, PrimaryKeyInfo> pkInfo, long logId) { var tables = b.GetTables(); var sb = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName); var pOptions = new ParallelOptions { MaxDegreeOfParallelism = MaxDegreeOfParallelism }; Parallel.ForEach(tables, pOptions, (historyTable, loopState) => { if (!loopState.IsExceptional) { string tableName = historyTable.Name; string tableNew = DatabaseUtils.GetQualifiedTableName(StagingSchema.PrimaryStagingSchemaName, tableName); string tableOld = DatabaseUtils.GetQualifiedTableName(StagingSchema.SecondaryStagingSchemaName, tableName); _log.DebugFormat("Performing diff on tables {0} and {1}", tableNew, tableOld); var statsNew = SourceTimetableAndRowCount.Get( ConnectionString, TimeoutSecs, tableName, StagingSchema.PrimaryStagingSchemaName); var statsOld = SourceTimetableAndRowCount.Get( ConnectionString, TimeoutSecs, tableName, StagingSchema.SecondaryStagingSchemaName); HashSet <int> srcTimetableIds = UnionSrcTimetableIds(statsNew, statsOld); // we could perform the diff on the whole table but to keep memory footprint lower // for large tables we diff by timetable id... foreach (var timetableId in srcTimetableIds) { _log.DebugFormat("Performing diff for timetable Id {0}", timetableId); var stagingTableColumnNames = sb.GetColumnNames(tableName); EtlProcess etl; if (AllNewlyInserted(timetableId, statsNew, statsOld)) { // optimisation here (no need to diff, just insert all into history)... _log.DebugFormat("All rows are new in table {0} for timetable {1}", tableNew, timetableId); etl = new HistoryEtlProcessBasic( ConnectionString, TimeoutSecs, historyTable, timetableId, StagingSchema.PrimaryStagingSchemaName, HistoryStatusInsert, logId, PipelineOptions); } else if (AllNewlyDeleted(timetableId, statsNew, statsOld)) { // optimisation here (no need to diff)... _log.DebugFormat("All rows have been deleted from table {0} for timetable {1}", tableNew, timetableId); etl = new HistoryEtlProcessBasic( ConnectionString, TimeoutSecs, historyTable, timetableId, StagingSchema.SecondaryStagingSchemaName, HistoryStatusDelete, logId, PipelineOptions); } else { var pkInfoForTable = pkInfo[tableName]; if (pkInfoForTable == null) { throw new ApplicationException( $"Could not find primary key info for table: {tableName}"); } // identityColumnCounts originate from the source timetable tables // so we increment by 1 to account for the src_timetable_id column... var identityColCount = pkInfoForTable.Columns.Count + 1; var td = new StageTableDiffer( ConnectionString, TimeoutSecs, tableOld, tableNew, timetableId, identityColCount); var diff = td.Execute(); etl = new HistoryEtlProcessDiff( ConnectionString, TimeoutSecs, historyTable, diff, stagingTableColumnNames, logId, PipelineOptions); } etl.Execute(); var errors = etl.GetAllErrors().ToArray(); if (errors.Any()) { loopState.Stop(); string msg = "Errors occurred during execution of history process"; _log.Error(msg); // throw the first exception throw new ApplicationException(msg, errors[0]); } } } }); }
/// <summary> /// Checks integrity of stage data. Because we extract a table at a time from the source data /// it's possible that we may find broken references. This situation is unnaceptable so this check /// will throw an exception allowing us to quit the process whilst still in temp staging. /// </summary> public void Execute() { var b = StagingTablesBuilder.Get(_schemaName); DoParallelProcessing(b); }