/// <summary> /// Pulls timetable data from selected tables and cols in the source timetables to the /// Admin database staging area /// </summary> public void ExtractToStage() { try { var srcTimetables = CheckPreconditions(); OnProgressEvent(new VertoProgressEventArgs { ProgressString = "Extracting timetables to stage", Section = ProcessingSection.Staging }); _log.DebugFormat( "Extracting timetables to stage in Admin database: {0}", DatabaseUtils.GetConnectionDescription(AdminConnectionString)); // populate temp stage with timetable data... Log("Extracting to Stage", "Extracting timetables to temporary stage"); RowCountAndDuration stats = ExtractTimetablesToStage(srcTimetables, StagingSchema.TemporaryStagingSchemaName); var msg = $"Extracted {stats.RowCount} rows from {srcTimetables.Count} timetables in {stats.Duration.TotalSeconds:F2} secs"; Log("Extracted", msg); OnProgressEvent(new VertoProgressEventArgs { ProgressString = "Checking stage integrity", Section = ProcessingSection.Staging }); CheckStageIntegrity(); // all ok so we can safely move the existing primary staging tables to secondary staging schema... Log("Moving Stage", "Moving primary stage to secondary"); DatabaseUtils.MoveTablesToNewSchema( AdminConnectionString, Timeouts.AdminDatabase, StagingSchema.PrimaryStagingSchemaName, StagingSchema.SecondaryStagingSchemaName); // and move the temp schema into the primary stage, leaving temp stage empty... Log("Moving Stage", "Moving temporary stage to primary"); DatabaseUtils.MoveTablesToNewSchema( AdminConnectionString, Timeouts.AdminDatabase, StagingSchema.TemporaryStagingSchemaName, StagingSchema.PrimaryStagingSchemaName); // NB - we perform the above cycle so that we don't get caught out if an extract fails // (it's ok if it fails while extracting to the temp stage) } catch (AggregateException ex) { LogError(ex.InnerExceptions[0].Message); throw ex.InnerExceptions[0]; } catch (Exception ex) { LogError(ex.ToString()); throw; } }
public PublicStagingEtlProcess( Table targetTable, string adminConnectionString, string publicConnectionString, int timeoutSecs, Pipelines pipelineOptions) { if (pipelineOptions.PublicStaging.SingleThreaded) { PipelineExecuter = new SingleThreadedPipelineExecuter(); } _targetTable = targetTable; _adminConnectionString = adminConnectionString; _publicConnectionString = publicConnectionString; _timeoutSecs = timeoutSecs; _rowCountAndDuration = new RowCountAndDuration(); }
public StagingEtlProcess( string timetableConnectionString, string stageConnectionString, V7StagingTable stagingTable, string stageSchemaName, CommandTimeout commandTimeouts, int timetableId, Pipelines pipelineOptions) { if (pipelineOptions.AdminStaging.SingleThreaded) { PipelineExecuter = new SingleThreadedPipelineExecuter(); } _timetableConnectionString = timetableConnectionString; _stageConnectionString = stageConnectionString; _stagingTable = stagingTable; _stageSchemaName = stageSchemaName; _commandTimeouts = commandTimeouts; _timetableId = timetableId; _rowCountAndDuration = new RowCountAndDuration(); }
private RowCountAndDuration DoParallelProcessingCreateStage(PublicStagingTablesBuilder b, ParallelOptions pOptions) { RowCountAndDuration result = new RowCountAndDuration(); object locker = new object(); Parallel.ForEach(b.GetTables(), pOptions, (table, loopState) => { if (!loopState.IsExceptional) { var p = new PublicStagingEtlProcess( table, AdminConnectionString, PublicConnectionString, Timeouts.PublicDatabase, _configuration.Pipelines); p.Execute(); var errors = p.GetAllErrors().ToArray(); if (errors.Any()) { loopState.Stop(); string msg = $"Errors occurred during execution of public staging process: {table.Name}"; _log.Error(msg); // throw the first exception throw new ApplicationException(msg, errors[0]); } lock (locker) { result += p.Stats; } } }); return(result); }
private RowCountAndDuration ExtractTimetablesToStage(IReadOnlyList <SourceTimetableData> srcTimetableRecs, string stageSchemaName) { var stats = new RowCountAndDuration(); var b = StagingTablesBuilder.Get(stageSchemaName); var tables = b.GetTables(); var cs = new ControlSchema(AdminConnectionString, Timeouts.AdminDatabase, _configuration.MaxDegreeOfParallelism, _configuration.Pipelines); // don't use Parallel.ForEach here (no gain) var processedTimetables = new HashSet <int>(); foreach (var tt in srcTimetableRecs) { _log.DebugFormat("Extracting timetable ({0}) to stage ({1})", tt.Name, stageSchemaName); var ttRec = cs.GetSourceTimetableRecord(tt.Identifier); if (ttRec == null) { throw new ApplicationException(string.Format("Could not find source timetable registration: {0}", tt.Name)); } // sanity check... if (processedTimetables.Contains(ttRec.Id)) { throw new ApplicationException(string.Format("Already processed a timetable with this Id: {0}", ttRec.Id)); } processedTimetables.Add(ttRec.Id); // don't use Parallel.ForEach here (no gain) foreach (var t in tables) { var stagingTable = (V7StagingTable)t; using (var p = new StagingEtlProcess( tt.ConnectionString, AdminConnectionString, stagingTable, stageSchemaName, Timeouts, ttRec.Id, _configuration.Pipelines)) { p.Execute(); stats += p.Stats; var errors = p.GetAllErrors().ToArray(); if (errors.Any()) { var msg = $"Errors occurred during execution of staging process: {stagingTable.Name}"; _log.Error(msg); // throw the first exception throw new ApplicationException(msg, errors[0]); } } } } return(stats); }