コード例 #1
0
ファイル: SourceTimetable.cs プロジェクト: CELCAT/CTDataStore
        private void GetExtraTablesAndColumns(
            SourceCompatibilityReport report,
            StagingTablesBuilder latestSchema,
            IReadOnlyList <string> tablesInStage,
            IReadOnlyList <string> tablesInSource)
        {
            foreach (var srcTable in tablesInSource)
            {
                if (!tablesInStage.Contains(srcTable, StringComparer.OrdinalIgnoreCase))
                {
                    _log.DebugFormat("Extra table {0}", srcTable);
                    report.AddExtraTable(srcTable);
                }
                else
                {
                    // any extra columns in source?
                    var srcSchema       = GetSchemaForTable(srcTable);
                    var columnsInSource = latestSchema.GetColumns(srcTable);

                    foreach (var col in columnsInSource)
                    {
                        var srcCol = srcSchema.FirstOrDefault(x => x.Name.Equals(col.Name, StringComparison.OrdinalIgnoreCase));
                        if (srcCol == null)
                        {
                            _log.DebugFormat("Extra column {0}.{1}", srcTable, col.Name);
                            report.AddExtraColumn(srcTable, col.Name);
                        }
                    }
                }
            }
        }
コード例 #2
0
        public void CanGenerateStagingTables()
        {
            var b      = StagingTablesBuilder.Get("dummy");
            var tables = b.GetTableNames();

            Assert.IsTrue(tables.Any());
        }
コード例 #3
0
        private static void CheckStagingTablesListPrimaryKeysFirst(Dictionary <string, PrimaryKeyInfo> pkInfoList)
        {
            // it's important that all staging tables list the primary keys first because this is assumed
            // by the algorithm that performs table diffs.
            var b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            foreach (var table in b.GetTables(includePseudoRegisterMarkTable: false))
            {
                if (!table.Name.Equals("CT_CONFIG", StringComparison.OrdinalIgnoreCase))
                {
                    PrimaryKeyInfo pkInfo = pkInfoList[table.Name];
                    if (pkInfo != null)
                    {
                        var stagingTableCols = table.Columns;

                        // deduct 1 for src_timetable_id
                        if (stagingTableCols.Count - 1 < pkInfo.Columns.Count)
                        {
                            throw new ApplicationException(
                                      $"Staging table contains fewer columns than primary key of source: {table.Name}");
                        }

                        for (int n = 0; n < pkInfo.Columns.Count; ++n)
                        {
                            if (!pkInfo.Columns[n].ColumnName.Equals(stagingTableCols[n + 1].Name, StringComparison.OrdinalIgnoreCase))
                            {
                                throw new ApplicationException(
                                          $"Incorrect staging table column order: {table.Name} (primary key columns must be defined first)");
                            }
                        }
                    }
                }
            }
        }
コード例 #4
0
ファイル: StagingSchema.cs プロジェクト: CELCAT/CTDataStore
        private void InternalCreateEmptyStagingTables()
        {
            EnsureSchemaCreated();

            var builder = StagingTablesBuilder.Get(_schemaName);

            builder.Execute(ConnectionString, TimeoutSecs);
        }
コード例 #5
0
        public void StagingColumnsAreAllNullable()
        {
            // in the staging tables we leave all cols as nullable

            var b      = StagingTablesBuilder.Get("dummy");
            var tables = b.GetTables();

            foreach (var t in tables)
            {
                var cols = t.Columns;
                foreach (var col in cols)
                {
                    Assert.IsTrue(col.Nullable == ColumnNullable.True, "Table = {0}, Col = {1}", t.Name, col.Name);
                }
            }
        }
コード例 #6
0
ファイル: SourceTimetable.cs プロジェクト: CELCAT/CTDataStore
        // is the source timetable schema compatible with CTDS (i.e. with the staging area)?
        public SourceCompatibilityReport GetCompatibilityWithStagingSchema()
        {
            _log.DebugFormat("Generating compatibility report for source timetable: {0}", _connectionDescription);

            var result = new SourceCompatibilityReport(_connectionDescription);

            var latestSchema = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            // first check for the existence of all tables...
            var tablesInStage  = latestSchema.GetTableNames();
            var tablesInSource = GetTables();

            GetMissingTablesAndColumns(result, latestSchema, tablesInStage, tablesInSource);
            GetExtraTablesAndColumns(result, latestSchema, tablesInStage, tablesInSource);

            return(result);
        }
コード例 #7
0
ファイル: SourceTimetable.cs プロジェクト: CELCAT/CTDataStore
        private void GetMissingTablesAndColumns(
            SourceCompatibilityReport report,
            StagingTablesBuilder latestSchema,
            IReadOnlyList <string> tablesInStage,
            IReadOnlyList <string> tablesInSource)
        {
            foreach (var stageTable in tablesInStage)
            {
                if (!stageTable.Equals(StagingTablesBuilder.PseudoRegisterMarkTable, StringComparison.OrdinalIgnoreCase))
                {
                    // this table is not actually a member of the CT7 schema
                    if (!tablesInSource.Contains(stageTable, StringComparer.OrdinalIgnoreCase))
                    {
                        _log.ErrorFormat("Missing table {0}", stageTable);
                        report.AddMissingTable(stageTable);
                    }
                    else
                    {
                        // now check that the source columns exist...
                        var srcSchema      = GetSchemaForTable(stageTable);
                        var columnsInStage = latestSchema.GetColumns(stageTable);

                        foreach (var col in columnsInStage)
                        {
                            if (!col.Name.Equals(ColumnConstants.SrcTimetableIdColumnName, StringComparison.OrdinalIgnoreCase) &&
                                !col.Name.Equals(ColumnConstants.RegistersReqResolvedColumnName, StringComparison.OrdinalIgnoreCase))
                            {
                                var srcCol =
                                    srcSchema.FirstOrDefault(x => x.Name.Equals(col.Name, StringComparison.OrdinalIgnoreCase));
                                if (srcCol == null)
                                {
                                    _log.ErrorFormat("Missing column {0}.{1}", stageTable, col.Name);
                                    report.AddMissingColumn(stageTable, col.Name);
                                }
                                else if (!DbTypeMatching.
                                         MatchingDataTypes(srcCol.DataType, srcCol.CharacterMaxLength, col.SqlDbType, col.Length))
                                {
                                    _log.ErrorFormat("Incompatible data type for column {0}.{1}", stageTable, col.Name);
                                    report.BadDataType(stageTable, col.Name);
                                }
                            }
                        }
                    }
                }
            }
        }
コード例 #8
0
ファイル: StagingSchema.cs プロジェクト: CELCAT/CTDataStore
        private bool DoParallelProcessing(StagingTablesBuilder b)
        {
            var rv = true;

            var pOptions = new ParallelOptions {
                MaxDegreeOfParallelism = MaxDegreeOfParallelism
            };

            Parallel.ForEach(b.GetTableNames(), pOptions, (t, loopState) =>
            {
                if (!loopState.IsExceptional && !DatabaseUtils.TableExists(ConnectionString, TimeoutSecs, t, _schemaName))
                {
                    rv = false;
                    loopState.Stop();
                }
            });

            return(rv);
        }
コード例 #9
0
ファイル: AdminDatabase.cs プロジェクト: CELCAT/CTDataStore
        private void DoParallelConsolidationProcessing(FederationSchema fs)
        {
            List <Entity> entities = Enum.GetValues(typeof(Entity)).Cast <object>().Cast <Entity>().ToList();

            var b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            var pOptions = new ParallelOptions {
                MaxDegreeOfParallelism = _configuration.MaxDegreeOfParallelism
            };

            Parallel.ForEach(entities, pOptions, (e, loopState) =>
            {
                if (!loopState.IsExceptional && EntityUtils.CanParticipateInConsolidation(e))
                {
                    var entry = _configuration.Consolidation.Get(e);
                    fs.UpdateConsolidationTables(e, b.GetTable(EntityUtils.ToCtTableName(e)), entry);
                }
            });
        }
コード例 #10
0
        private void DoParallelProcessing(StagingTablesBuilder b)
        {
            var pOptions = new ParallelOptions {
                MaxDegreeOfParallelism = _maxDegreeOfParallelism
            };

            Parallel.ForEach(b.GetTables(), pOptions, (table, loopState) =>
            {
                if (!loopState.IsExceptional)
                {
                    int issuesFound = Check(table);
                    if (issuesFound > 0)
                    {
                        loopState.Stop();
                        throw new ApplicationException($"Found {issuesFound} faulty rows in {table.Name}");
                    }
                }
            });
        }
コード例 #11
0
        private static void CheckIdentityColumns(IReadOnlyList <SourceTimetableData> timetables, int commandTimeoutSecs)
        {
            _log.Debug("Checking primary keys");

            var b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            var pkInfoForAllTimetables = new List <Dictionary <string, PrimaryKeyInfo> >();

            foreach (var tt in timetables)
            {
                var pkInfo = DatabaseUtils.GetPrimaryKeyInfo(tt.ConnectionString, commandTimeoutSecs);
                pkInfoForAllTimetables.Add(pkInfo);
            }

            if (pkInfoForAllTimetables.Any())
            {
                CheckSamePrimaryKeyColumns(pkInfoForAllTimetables, b);
                CheckStagingTablesListPrimaryKeysFirst(pkInfoForAllTimetables[0]);
                CheckEntitiesHaveSinglePrimaryKeyId(pkInfoForAllTimetables[0]);
            }
        }
コード例 #12
0
ファイル: AdminDatabase.cs プロジェクト: CELCAT/CTDataStore
        private void DoParallelFederationProcessing()
        {
            var fs = new FederationSchema(
                AdminConnectionString,
                Timeouts.AdminDatabase,
                _configuration.MaxDegreeOfParallelism,
                _configuration.Pipelines);

            List <Entity> entities = Enum.GetValues(typeof(Entity)).Cast <object>().Cast <Entity>().ToList();

            var b        = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);
            var pOptions = new ParallelOptions {
                MaxDegreeOfParallelism = _configuration.MaxDegreeOfParallelism
            };

            Parallel.ForEach(entities, pOptions, (e, loopState) =>
            {
                if (!loopState.IsExceptional && EntityUtils.RequiresFederation(e))
                {
                    fs.UpdateStdFederationTable(e, b.GetTable(EntityUtils.ToCtTableName(e)));
                }
            });
        }
コード例 #13
0
        /// <summary>
        /// Checks that the column names specified in the configuration, consolidation
        /// section are valid for the given entity
        /// </summary>
        public static void CheckNaturalKeyColumnsInConfiguration(ConsolidationParams consolidationParams)
        {
            _log.Debug("Checking that consolidation key columns are valid in configuration");

            StagingTablesBuilder b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            if (consolidationParams.Enabled)
            {
                var entitiesUsed = new List <Entity>();

                foreach (Entity entity in Enum.GetValues(typeof(Entity)))
                {
                    if (EntityUtils.CanParticipateInConsolidation(entity))
                    {
                        if (entitiesUsed.Contains(entity))
                        {
                            throw new ApplicationException(string.Format("Entity declared more than once in consolidation configuration: {0}", entity));
                        }

                        entitiesUsed.Add(entity);

                        var entry = consolidationParams.Get(entity);
                        if (entry != null && !entry.None)
                        {
                            string stagingTableName = EntityUtils.ToCtTableName(entity);

                            var table = b.GetTable(stagingTableName);

                            if (!table.ColumnExists(entry.Column))
                            {
                                throw new ApplicationException(string.Format("The specified consolidation column ({0}) does not exist in the entity: {1}", entry.Column, entity));
                            }
                        }
                    }
                }
            }
        }
コード例 #14
0
        private static void CheckSamePrimaryKeyColumns(List <Dictionary <string, PrimaryKeyInfo> > allPkInfo, StagingTablesBuilder b)
        {
            // checks that all source timetables have identical primary key col names for
            // the tables that are to be extracted, which is an important consideration when
            // performing the full database diff...
            foreach (var tableName in b.GetTableNames(includePseudoRegisterMarkTable: false))
            {
                PrimaryKeyInfo baseInfo = null;

                foreach (var info in allPkInfo)
                {
                    var pkInfo = info[tableName];
                    if (pkInfo == null)
                    {
                        throw new ApplicationException($"Could not find primary key data for table: {tableName}");
                    }

                    if (baseInfo == null)
                    {
                        baseInfo = pkInfo;
                    }
                    else
                    {
                        if (!PrimaryKeyInfo.Identical(baseInfo, pkInfo))
                        {
                            var sb = new StringBuilder();
                            sb.Append("There is an incompatibility between source timetables.");
                            sb.AppendFormat(" Primary key column definitions are different for table {0}", tableName);

                            throw new ApplicationException(sb.ToString());
                        }
                    }
                }
            }
        }
コード例 #15
0
ファイル: StagingSchema.cs プロジェクト: CELCAT/CTDataStore
        private bool AllStagingTablesExist()
        {
            var b = StagingTablesBuilder.Get(_schemaName);

            return(DoParallelProcessing(b));
        }
コード例 #16
0
        private void DoParallelProcessing(HistoryTablesBuilder b, Dictionary <string, PrimaryKeyInfo> pkInfo, long logId)
        {
            var tables = b.GetTables();

            var sb = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            var pOptions = new ParallelOptions {
                MaxDegreeOfParallelism = MaxDegreeOfParallelism
            };

            Parallel.ForEach(tables, pOptions, (historyTable, loopState) =>
            {
                if (!loopState.IsExceptional)
                {
                    string tableName = historyTable.Name;

                    string tableNew = DatabaseUtils.GetQualifiedTableName(StagingSchema.PrimaryStagingSchemaName, tableName);
                    string tableOld = DatabaseUtils.GetQualifiedTableName(StagingSchema.SecondaryStagingSchemaName, tableName);

                    _log.DebugFormat("Performing diff on tables {0} and {1}", tableNew, tableOld);

                    var statsNew = SourceTimetableAndRowCount.Get(
                        ConnectionString, TimeoutSecs, tableName, StagingSchema.PrimaryStagingSchemaName);

                    var statsOld = SourceTimetableAndRowCount.Get(
                        ConnectionString, TimeoutSecs, tableName, StagingSchema.SecondaryStagingSchemaName);

                    HashSet <int> srcTimetableIds = UnionSrcTimetableIds(statsNew, statsOld);

                    // we could perform the diff on the whole table but to keep memory footprint lower
                    // for large tables we diff by timetable id...
                    foreach (var timetableId in srcTimetableIds)
                    {
                        _log.DebugFormat("Performing diff for timetable Id {0}", timetableId);

                        var stagingTableColumnNames = sb.GetColumnNames(tableName);
                        EtlProcess etl;

                        if (AllNewlyInserted(timetableId, statsNew, statsOld))
                        {
                            // optimisation here (no need to diff, just insert all into history)...
                            _log.DebugFormat("All rows are new in table {0} for timetable {1}", tableNew, timetableId);

                            etl = new HistoryEtlProcessBasic(
                                ConnectionString,
                                TimeoutSecs,
                                historyTable,
                                timetableId,
                                StagingSchema.PrimaryStagingSchemaName,
                                HistoryStatusInsert,
                                logId,
                                PipelineOptions);
                        }
                        else if (AllNewlyDeleted(timetableId, statsNew, statsOld))
                        {
                            // optimisation here (no need to diff)...
                            _log.DebugFormat("All rows have been deleted from table {0} for timetable {1}", tableNew, timetableId);

                            etl = new HistoryEtlProcessBasic(
                                ConnectionString,
                                TimeoutSecs,
                                historyTable,
                                timetableId,
                                StagingSchema.SecondaryStagingSchemaName,
                                HistoryStatusDelete,
                                logId,
                                PipelineOptions);
                        }
                        else
                        {
                            var pkInfoForTable = pkInfo[tableName];
                            if (pkInfoForTable == null)
                            {
                                throw new ApplicationException(
                                    $"Could not find primary key info for table: {tableName}");
                            }

                            // identityColumnCounts originate from the source timetable tables
                            // so we increment by 1 to account for the src_timetable_id column...
                            var identityColCount = pkInfoForTable.Columns.Count + 1;

                            var td = new StageTableDiffer(
                                ConnectionString, TimeoutSecs, tableOld, tableNew, timetableId, identityColCount);

                            var diff = td.Execute();

                            etl = new HistoryEtlProcessDiff(
                                ConnectionString, TimeoutSecs, historyTable, diff, stagingTableColumnNames, logId, PipelineOptions);
                        }

                        etl.Execute();

                        var errors = etl.GetAllErrors().ToArray();
                        if (errors.Any())
                        {
                            loopState.Stop();

                            string msg = "Errors occurred during execution of history process";
                            _log.Error(msg);

                            // throw the first exception
                            throw new ApplicationException(msg, errors[0]);
                        }
                    }
                }
            });
        }
コード例 #17
0
ファイル: AdminDatabase.cs プロジェクト: CELCAT/CTDataStore
        private RowCountAndDuration ExtractTimetablesToStage(IReadOnlyList <SourceTimetableData> srcTimetableRecs, string stageSchemaName)
        {
            var stats = new RowCountAndDuration();

            var b      = StagingTablesBuilder.Get(stageSchemaName);
            var tables = b.GetTables();

            var cs = new ControlSchema(AdminConnectionString, Timeouts.AdminDatabase, _configuration.MaxDegreeOfParallelism, _configuration.Pipelines);

            // don't use Parallel.ForEach here (no gain)
            var processedTimetables = new HashSet <int>();

            foreach (var tt in srcTimetableRecs)
            {
                _log.DebugFormat("Extracting timetable ({0}) to stage ({1})", tt.Name, stageSchemaName);

                var ttRec = cs.GetSourceTimetableRecord(tt.Identifier);
                if (ttRec == null)
                {
                    throw new ApplicationException(string.Format("Could not find source timetable registration: {0}", tt.Name));
                }

                // sanity check...
                if (processedTimetables.Contains(ttRec.Id))
                {
                    throw new ApplicationException(string.Format("Already processed a timetable with this Id: {0}", ttRec.Id));
                }

                processedTimetables.Add(ttRec.Id);

                // don't use Parallel.ForEach here (no gain)
                foreach (var t in tables)
                {
                    var stagingTable = (V7StagingTable)t;

                    using (var p = new StagingEtlProcess(
                               tt.ConnectionString,
                               AdminConnectionString,
                               stagingTable,
                               stageSchemaName,
                               Timeouts,
                               ttRec.Id,
                               _configuration.Pipelines))
                    {
                        p.Execute();
                        stats += p.Stats;

                        var errors = p.GetAllErrors().ToArray();

                        if (errors.Any())
                        {
                            var msg = $"Errors occurred during execution of staging process: {stagingTable.Name}";
                            _log.Error(msg);

                            // throw the first exception
                            throw new ApplicationException(msg, errors[0]);
                        }
                    }
                }
            }

            return(stats);
        }
コード例 #18
0
        /// <summary>
        /// Checks integrity of stage data. Because we extract a table at a time from the source data
        /// it's possible that we may find broken references. This situation is unnaceptable so this check
        /// will throw an exception allowing us to quit the process whilst still in temp staging.
        /// </summary>
        public void Execute()
        {
            var b = StagingTablesBuilder.Get(_schemaName);

            DoParallelProcessing(b);
        }