示例#1
0
        public void CanGenerateStagingTables()
        {
            var b      = StagingTablesBuilder.Get("dummy");
            var tables = b.GetTableNames();

            Assert.IsTrue(tables.Any());
        }
示例#2
0
        private static void CheckStagingTablesListPrimaryKeysFirst(Dictionary <string, PrimaryKeyInfo> pkInfoList)
        {
            // it's important that all staging tables list the primary keys first because this is assumed
            // by the algorithm that performs table diffs.
            var b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            foreach (var table in b.GetTables(includePseudoRegisterMarkTable: false))
            {
                if (!table.Name.Equals("CT_CONFIG", StringComparison.OrdinalIgnoreCase))
                {
                    PrimaryKeyInfo pkInfo = pkInfoList[table.Name];
                    if (pkInfo != null)
                    {
                        var stagingTableCols = table.Columns;

                        // deduct 1 for src_timetable_id
                        if (stagingTableCols.Count - 1 < pkInfo.Columns.Count)
                        {
                            throw new ApplicationException(
                                      $"Staging table contains fewer columns than primary key of source: {table.Name}");
                        }

                        for (int n = 0; n < pkInfo.Columns.Count; ++n)
                        {
                            if (!pkInfo.Columns[n].ColumnName.Equals(stagingTableCols[n + 1].Name, StringComparison.OrdinalIgnoreCase))
                            {
                                throw new ApplicationException(
                                          $"Incorrect staging table column order: {table.Name} (primary key columns must be defined first)");
                            }
                        }
                    }
                }
            }
        }
示例#3
0
        private void InternalCreateEmptyStagingTables()
        {
            EnsureSchemaCreated();

            var builder = StagingTablesBuilder.Get(_schemaName);

            builder.Execute(ConnectionString, TimeoutSecs);
        }
示例#4
0
        public void StagingColumnsAreAllNullable()
        {
            // in the staging tables we leave all cols as nullable

            var b      = StagingTablesBuilder.Get("dummy");
            var tables = b.GetTables();

            foreach (var t in tables)
            {
                var cols = t.Columns;
                foreach (var col in cols)
                {
                    Assert.IsTrue(col.Nullable == ColumnNullable.True, "Table = {0}, Col = {1}", t.Name, col.Name);
                }
            }
        }
示例#5
0
        // is the source timetable schema compatible with CTDS (i.e. with the staging area)?
        public SourceCompatibilityReport GetCompatibilityWithStagingSchema()
        {
            _log.DebugFormat("Generating compatibility report for source timetable: {0}", _connectionDescription);

            var result = new SourceCompatibilityReport(_connectionDescription);

            var latestSchema = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            // first check for the existence of all tables...
            var tablesInStage  = latestSchema.GetTableNames();
            var tablesInSource = GetTables();

            GetMissingTablesAndColumns(result, latestSchema, tablesInStage, tablesInSource);
            GetExtraTablesAndColumns(result, latestSchema, tablesInStage, tablesInSource);

            return(result);
        }
示例#6
0
        private void DoParallelConsolidationProcessing(FederationSchema fs)
        {
            List <Entity> entities = Enum.GetValues(typeof(Entity)).Cast <object>().Cast <Entity>().ToList();

            var b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            var pOptions = new ParallelOptions {
                MaxDegreeOfParallelism = _configuration.MaxDegreeOfParallelism
            };

            Parallel.ForEach(entities, pOptions, (e, loopState) =>
            {
                if (!loopState.IsExceptional && EntityUtils.CanParticipateInConsolidation(e))
                {
                    var entry = _configuration.Consolidation.Get(e);
                    fs.UpdateConsolidationTables(e, b.GetTable(EntityUtils.ToCtTableName(e)), entry);
                }
            });
        }
示例#7
0
        private static void CheckIdentityColumns(IReadOnlyList <SourceTimetableData> timetables, int commandTimeoutSecs)
        {
            _log.Debug("Checking primary keys");

            var b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            var pkInfoForAllTimetables = new List <Dictionary <string, PrimaryKeyInfo> >();

            foreach (var tt in timetables)
            {
                var pkInfo = DatabaseUtils.GetPrimaryKeyInfo(tt.ConnectionString, commandTimeoutSecs);
                pkInfoForAllTimetables.Add(pkInfo);
            }

            if (pkInfoForAllTimetables.Any())
            {
                CheckSamePrimaryKeyColumns(pkInfoForAllTimetables, b);
                CheckStagingTablesListPrimaryKeysFirst(pkInfoForAllTimetables[0]);
                CheckEntitiesHaveSinglePrimaryKeyId(pkInfoForAllTimetables[0]);
            }
        }
示例#8
0
        private void DoParallelFederationProcessing()
        {
            var fs = new FederationSchema(
                AdminConnectionString,
                Timeouts.AdminDatabase,
                _configuration.MaxDegreeOfParallelism,
                _configuration.Pipelines);

            List <Entity> entities = Enum.GetValues(typeof(Entity)).Cast <object>().Cast <Entity>().ToList();

            var b        = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);
            var pOptions = new ParallelOptions {
                MaxDegreeOfParallelism = _configuration.MaxDegreeOfParallelism
            };

            Parallel.ForEach(entities, pOptions, (e, loopState) =>
            {
                if (!loopState.IsExceptional && EntityUtils.RequiresFederation(e))
                {
                    fs.UpdateStdFederationTable(e, b.GetTable(EntityUtils.ToCtTableName(e)));
                }
            });
        }
        /// <summary>
        /// Checks that the column names specified in the configuration, consolidation
        /// section are valid for the given entity
        /// </summary>
        public static void CheckNaturalKeyColumnsInConfiguration(ConsolidationParams consolidationParams)
        {
            _log.Debug("Checking that consolidation key columns are valid in configuration");

            StagingTablesBuilder b = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            if (consolidationParams.Enabled)
            {
                var entitiesUsed = new List <Entity>();

                foreach (Entity entity in Enum.GetValues(typeof(Entity)))
                {
                    if (EntityUtils.CanParticipateInConsolidation(entity))
                    {
                        if (entitiesUsed.Contains(entity))
                        {
                            throw new ApplicationException(string.Format("Entity declared more than once in consolidation configuration: {0}", entity));
                        }

                        entitiesUsed.Add(entity);

                        var entry = consolidationParams.Get(entity);
                        if (entry != null && !entry.None)
                        {
                            string stagingTableName = EntityUtils.ToCtTableName(entity);

                            var table = b.GetTable(stagingTableName);

                            if (!table.ColumnExists(entry.Column))
                            {
                                throw new ApplicationException(string.Format("The specified consolidation column ({0}) does not exist in the entity: {1}", entry.Column, entity));
                            }
                        }
                    }
                }
            }
        }
示例#10
0
        private bool AllStagingTablesExist()
        {
            var b = StagingTablesBuilder.Get(_schemaName);

            return(DoParallelProcessing(b));
        }
示例#11
0
        private RowCountAndDuration ExtractTimetablesToStage(IReadOnlyList <SourceTimetableData> srcTimetableRecs, string stageSchemaName)
        {
            var stats = new RowCountAndDuration();

            var b      = StagingTablesBuilder.Get(stageSchemaName);
            var tables = b.GetTables();

            var cs = new ControlSchema(AdminConnectionString, Timeouts.AdminDatabase, _configuration.MaxDegreeOfParallelism, _configuration.Pipelines);

            // don't use Parallel.ForEach here (no gain)
            var processedTimetables = new HashSet <int>();

            foreach (var tt in srcTimetableRecs)
            {
                _log.DebugFormat("Extracting timetable ({0}) to stage ({1})", tt.Name, stageSchemaName);

                var ttRec = cs.GetSourceTimetableRecord(tt.Identifier);
                if (ttRec == null)
                {
                    throw new ApplicationException(string.Format("Could not find source timetable registration: {0}", tt.Name));
                }

                // sanity check...
                if (processedTimetables.Contains(ttRec.Id))
                {
                    throw new ApplicationException(string.Format("Already processed a timetable with this Id: {0}", ttRec.Id));
                }

                processedTimetables.Add(ttRec.Id);

                // don't use Parallel.ForEach here (no gain)
                foreach (var t in tables)
                {
                    var stagingTable = (V7StagingTable)t;

                    using (var p = new StagingEtlProcess(
                               tt.ConnectionString,
                               AdminConnectionString,
                               stagingTable,
                               stageSchemaName,
                               Timeouts,
                               ttRec.Id,
                               _configuration.Pipelines))
                    {
                        p.Execute();
                        stats += p.Stats;

                        var errors = p.GetAllErrors().ToArray();

                        if (errors.Any())
                        {
                            var msg = $"Errors occurred during execution of staging process: {stagingTable.Name}";
                            _log.Error(msg);

                            // throw the first exception
                            throw new ApplicationException(msg, errors[0]);
                        }
                    }
                }
            }

            return(stats);
        }
示例#12
0
        private void DoParallelProcessing(HistoryTablesBuilder b, Dictionary <string, PrimaryKeyInfo> pkInfo, long logId)
        {
            var tables = b.GetTables();

            var sb = StagingTablesBuilder.Get(StagingSchema.PrimaryStagingSchemaName);

            var pOptions = new ParallelOptions {
                MaxDegreeOfParallelism = MaxDegreeOfParallelism
            };

            Parallel.ForEach(tables, pOptions, (historyTable, loopState) =>
            {
                if (!loopState.IsExceptional)
                {
                    string tableName = historyTable.Name;

                    string tableNew = DatabaseUtils.GetQualifiedTableName(StagingSchema.PrimaryStagingSchemaName, tableName);
                    string tableOld = DatabaseUtils.GetQualifiedTableName(StagingSchema.SecondaryStagingSchemaName, tableName);

                    _log.DebugFormat("Performing diff on tables {0} and {1}", tableNew, tableOld);

                    var statsNew = SourceTimetableAndRowCount.Get(
                        ConnectionString, TimeoutSecs, tableName, StagingSchema.PrimaryStagingSchemaName);

                    var statsOld = SourceTimetableAndRowCount.Get(
                        ConnectionString, TimeoutSecs, tableName, StagingSchema.SecondaryStagingSchemaName);

                    HashSet <int> srcTimetableIds = UnionSrcTimetableIds(statsNew, statsOld);

                    // we could perform the diff on the whole table but to keep memory footprint lower
                    // for large tables we diff by timetable id...
                    foreach (var timetableId in srcTimetableIds)
                    {
                        _log.DebugFormat("Performing diff for timetable Id {0}", timetableId);

                        var stagingTableColumnNames = sb.GetColumnNames(tableName);
                        EtlProcess etl;

                        if (AllNewlyInserted(timetableId, statsNew, statsOld))
                        {
                            // optimisation here (no need to diff, just insert all into history)...
                            _log.DebugFormat("All rows are new in table {0} for timetable {1}", tableNew, timetableId);

                            etl = new HistoryEtlProcessBasic(
                                ConnectionString,
                                TimeoutSecs,
                                historyTable,
                                timetableId,
                                StagingSchema.PrimaryStagingSchemaName,
                                HistoryStatusInsert,
                                logId,
                                PipelineOptions);
                        }
                        else if (AllNewlyDeleted(timetableId, statsNew, statsOld))
                        {
                            // optimisation here (no need to diff)...
                            _log.DebugFormat("All rows have been deleted from table {0} for timetable {1}", tableNew, timetableId);

                            etl = new HistoryEtlProcessBasic(
                                ConnectionString,
                                TimeoutSecs,
                                historyTable,
                                timetableId,
                                StagingSchema.SecondaryStagingSchemaName,
                                HistoryStatusDelete,
                                logId,
                                PipelineOptions);
                        }
                        else
                        {
                            var pkInfoForTable = pkInfo[tableName];
                            if (pkInfoForTable == null)
                            {
                                throw new ApplicationException(
                                    $"Could not find primary key info for table: {tableName}");
                            }

                            // identityColumnCounts originate from the source timetable tables
                            // so we increment by 1 to account for the src_timetable_id column...
                            var identityColCount = pkInfoForTable.Columns.Count + 1;

                            var td = new StageTableDiffer(
                                ConnectionString, TimeoutSecs, tableOld, tableNew, timetableId, identityColCount);

                            var diff = td.Execute();

                            etl = new HistoryEtlProcessDiff(
                                ConnectionString, TimeoutSecs, historyTable, diff, stagingTableColumnNames, logId, PipelineOptions);
                        }

                        etl.Execute();

                        var errors = etl.GetAllErrors().ToArray();
                        if (errors.Any())
                        {
                            loopState.Stop();

                            string msg = "Errors occurred during execution of history process";
                            _log.Error(msg);

                            // throw the first exception
                            throw new ApplicationException(msg, errors[0]);
                        }
                    }
                }
            });
        }
示例#13
0
        /// <summary>
        /// Checks integrity of stage data. Because we extract a table at a time from the source data
        /// it's possible that we may find broken references. This situation is unnaceptable so this check
        /// will throw an exception allowing us to quit the process whilst still in temp staging.
        /// </summary>
        public void Execute()
        {
            var b = StagingTablesBuilder.Get(_schemaName);

            DoParallelProcessing(b);
        }