public void TestPipelineContextInitializationNoInterfaces() { var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.FixedDestination | PipelineUsage.LoadsSingleTableInfo); var ti = new TableInfo(mockRepo, "Foo"); var component = new TestObjectNoRequirements(); Assert.DoesNotThrow(() => context.PreInitialize(new ThrowImmediatelyDataLoadEventListener(), component, ti)); }
public void TestExtraSuspiciousPipelineRequirements() { var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.FixedDestination); var suspiciousComponent = new TestObject_ExtraSuspicious(); Assert.Throws <OverlappingImplementationsException>(() => context.PreInitialize(new ThrowImmediatelyDataLoadJob(), suspiciousComponent, "5")); }
protected override IDataFlowPipelineContext GenerateContextImpl() { var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.FixedSource); context.MustHaveDestination = typeof(DataTableUploadDestination); return(context); }
protected override IDataFlowPipelineContext GenerateContextImpl() { var contextFactory = new DataFlowPipelineContextFactory <ReleaseAudit>(); var context = contextFactory.Create(PipelineUsage.FixedSource); context.MustHaveDestination = typeof(IDataFlowDestination <ReleaseAudit>); return(context); }
public void TestSuspiciousPipelineRequirements() { var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.FixedDestination); var suspiciousComponent = new TestObject_Suspicious(); var ex = Assert.Throws <MultipleMatchingImplmentationException>(() => context.PreInitialize(new ThrowImmediatelyDataLoadJob(), suspiciousComponent, 5, "fish")); Console.WriteLine("Exception was:" + ex.Message); }
public void TestPipelineContextInitialization_ForbiddenType() { var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.None); var component = new TestObject_RequiresTableInfo(); var ti = new TableInfo(new MemoryCatalogueRepository(), "Foo"); var ex = Assert.Throws <Exception>(() => context.PreInitialize(new ThrowImmediatelyDataLoadEventListener(), component, ti)); StringAssert.Contains("Type TableInfo is not an allowable PreInitialize parameters type under the current DataFlowPipelineContext (check which flags you passed to the DataFlowPipelineContextFactory and the interfaces IPipelineRequirement<> that your components implement) ", ex.Message); }
public void TestPipelineContextIsAllowable() { var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.FixedSource | PipelineUsage.FixedDestination | PipelineUsage.LoadsSingleTableInfo); var pipeline = new Pipeline(CatalogueRepository, "DeleteMePipeline"); var component = new PipelineComponent(CatalogueRepository, pipeline, typeof(TestObject_RequiresTableInfo), 0); Assert.IsTrue(context.IsAllowable(pipeline)); pipeline.DeleteInDatabase(); }
protected override IDataFlowPipelineContext GenerateContextImpl() { //create the context using the standard context factory var contextFactory = new DataFlowPipelineContextFactory <ICacheChunk>(); var context = contextFactory.Create(PipelineUsage.None); //adjust context: we want a destination requirement of ICacheFileSystemDestination so that we can load from the cache using the pipeline endpoint as the source of the data load context.MustHaveDestination = typeof(ICacheFileSystemDestination);//we want this freaky destination type context.MustHaveSource = typeof(ICacheSource); return(context); }
protected override IDataFlowPipelineContext GenerateContextImpl() { //create the context using the standard context factory var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.LogsToTableLoadInfo); //adjust context: we want a destination requirement of IExecuteDatasetExtractionDestination context.MustHaveDestination = typeof(IExecuteDatasetExtractionDestination);//we want this freaky destination type context.MustHaveSource = typeof(ExecuteDatasetExtractionSource); return(context); }
public void TestPipelineContextInitialization() { var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.FixedDestination | PipelineUsage.LoadsSingleTableInfo); var component = new TestObject_RequiresTableInfo(); var ti = new TableInfo(CatalogueRepository, "TestTableInfo"); context.PreInitialize(new ThrowImmediatelyDataLoadEventListener(), component, ti); Assert.AreEqual(component.PreInitToThis, ti); ti.DeleteInDatabase(); }
public void TestPipelineContextInitialization_UnexpectedType() { var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.FixedDestination | PipelineUsage.LoadsSingleTableInfo); var component = new TestObject_RequiresTableInfo(); var ti = new TableInfo(mockRepo, "Foo"); var ci = new ColumnInfo(mockRepo, "ColumnInfo", "Type", ti); ci.Name = "ColumnInfo"; // because we passed a stubbed repository, the name won't be set var ex = Assert.Throws <Exception>(() => context.PreInitialize(new ThrowImmediatelyDataLoadEventListener(), component, ci)); StringAssert.Contains("The following expected types were not passed to PreInitialize:TableInfo", ex.Message); }
public void TestPipelineContextInitialization_UninitializedInterface() { var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.FixedDestination | PipelineUsage.LoadsSingleTableInfo); //component is both IPipelineRequirement<TableInfo> AND IPipelineRequirement<LoadModuleAssembly> but only TableInfo is passed in params var component = new TestObject_RequiresTableInfoAndFreakyObject(); var testTableInfo = new TableInfo(mockRepo, ""); testTableInfo.Name = "Test Table Info"; var ex = Assert.Throws <Exception>(() => context.PreInitialize(new ThrowImmediatelyDataLoadEventListener(), component, testTableInfo)); StringAssert.Contains("The following expected types were not passed to PreInitialize:LoadModuleAssembly\r\nThe object types passed were:\r\nRdmp.Core.Curation.Data.TableInfo:Test Table Info", ex.Message); }
public void TestPipelineContextIsNOTAllowable() { var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.FixedDestination); var pipeline = new Pipeline(CatalogueRepository, "DeleteMePipeline"); var component = new PipelineComponent(CatalogueRepository, pipeline, typeof(TestObject_RequiresTableInfo), 0); component.Name = "TestPipeComponent"; component.SaveToDatabase(); string reason; bool rejection = context.IsAllowable(pipeline, out reason); Console.WriteLine(reason); Assert.IsFalse(rejection, reason); Assert.AreEqual("Component TestPipeComponent implements a forbidden type (IPipelineRequirement<TableInfo>) under the pipeline usage context", reason); pipeline.DeleteInDatabase(); }
public void Test_ZipFileNotation(bool expressRelative) { //get a clean database to upload to var db = GetCleanedServer(DatabaseType.MicrosoftSQLServer); //create a folder in which to generate some dicoms var dirToLoad = new DirectoryInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, nameof(Test_ZipFileNotation))); if (dirToLoad.Exists) { dirToLoad.Delete(true); } dirToLoad.Create(); //generate some random dicoms var r = new Random(999); DicomDataGenerator generator = new DicomDataGenerator(r, dirToLoad, "CT") { MaximumImages = 5 }; var people = new PersonCollection(); people.GeneratePeople(1, r); generator.GenerateTestDataFile(people, new FileInfo("./inventory.csv"), 1); //This generates // Test_ZipFile // 2015 // 3 // 18 // 751140 2.25.166922918107154891877498685128076062226.dcm // 751140 2.25.179610809676265137473873365625829826423.dcm // 751140 2.25.201969634959506849065133495434871450465.dcm // 751140 2.25.237492679533001779093365416814254319890.dcm // 751140 2.25.316241631782653383510844072713132248731.dcm var yearDir = dirToLoad.GetDirectories().Single(); StringAssert.IsMatch("\\d{4}", yearDir.Name); //should be 5 images in the zip file var dicomFiles = yearDir.GetFiles("*.dcm", SearchOption.AllDirectories); Assert.AreEqual(5, dicomFiles.Length); //e.g. \2015\3\18\2.25.223398837779449245317520567111874824918.dcm //e.g. \2015\3\18\2.25.179610809676265137473873365625829826423.dcm var relativePathWithinZip1 = dicomFiles[0].FullName.Substring(dirToLoad.FullName.Length); var relativePathWithinZip2 = dicomFiles[1].FullName.Substring(dirToLoad.FullName.Length); //zip them up FileInfo zip = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, nameof(Test_ZipFile) + ".zip")); Path.Combine(TestContext.CurrentContext.TestDirectory, nameof(Test_ZipFile) + ".zip"); if (zip.Exists) { zip.Delete(); } ZipFile.CreateFromDirectory(dirToLoad.FullName, zip.FullName); //e.g. E:\RdmpDicom\Rdmp.Dicom.Tests\bin\Debug\netcoreapp2.2\Test_ZipFile.zip!\2015\3\18\2.25.223398837779449245317520567111874824918.dcm string pathToLoad1 = zip.FullName + "!" + relativePathWithinZip1; string pathToLoad2 = zip.FullName + "!" + relativePathWithinZip2; var loadMeTextFile = new FileInfo(Path.Combine(dirToLoad.FullName, "LoadMe.txt")); //tell the source to load the zip File.WriteAllText(loadMeTextFile.FullName, string.Join(Environment.NewLine, pathToLoad1, pathToLoad2)); var f = new FlatFileToLoad(loadMeTextFile); //Setup source var source = new DicomFileCollectionSource { FilenameField = "RelativeFileArchiveURI" }; if (expressRelative) { source.ArchiveRoot = TestContext.CurrentContext.TestDirectory; } var worklist = new FlatFileToLoadDicomFileWorklist(f); //Setup destination var destination = new DataTableUploadDestination { AllowResizingColumnsAtUploadTime = true }; //setup pipeline var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.FixedDestination | PipelineUsage.FixedDestination); //run pipeline var pipe = new DataFlowPipelineEngine <DataTable>(context, source, destination, new ThrowImmediatelyDataLoadEventListener()); pipe.Initialize(db, worklist); pipe.ExecutePipeline(new GracefulCancellationToken()); var finalTable = db.ExpectTable(destination.TargetTableName); using (var dt = finalTable.GetDataTable()) { //should be 2 rows (since we told it to only load 2 files out of the zip) Assert.AreEqual(2, dt.Rows.Count); string pathInDbToDicomFile = (string)dt.Rows[0]["RelativeFileArchiveURI"]; //We expect either something like: // E:/RdmpDicom/Rdmp.Dicom.Tests/bin/Debug/netcoreapp2.2/Test_ZipFile.zip!2015/3/18/2.25.160787663560951826149226183314694084702.dcm // ./Test_ZipFile.zip!2015/3/18/2.25.105592977437473375573190160334447272386.dcm //the path referenced should be the file read in relative/absolute format StringAssert.IsMatch( expressRelative ? $@"./{zip.Name}![\d./]*.dcm": $@"{Regex.Escape(zip.FullName.Replace('\\','/'))}![\d./]*.dcm", pathInDbToDicomFile); StringAssert.Contains(yearDir.Name, pathInDbToDicomFile, "Expected zip file to have subdirectories and for them to be loaded correctly"); //confirm we can read that out again using (var pool = new ZipPool()) { var path = new AmbiguousFilePath(TestContext.CurrentContext.TestDirectory, pathInDbToDicomFile); Assert.IsNotNull(path.GetDataset(pool)); } } Assert.IsTrue(finalTable.Exists()); finalTable.Drop(); }
public override ExitCodeType Attach(IDataLoadJob job, GracefulCancellationToken cancellationToken) { if (job == null) { throw new Exception("Job is Null, we require to know the job to build a DataFlowPipeline"); } string sql; var dbFrom = RemoteSource.Discover(DataAccessContext.DataLoad); var remoteTables = new HashSet <string>(dbFrom.DiscoverTables(true).Select(t => t.GetRuntimeName()), StringComparer.CurrentCultureIgnoreCase); var loadables = job.RegularTablesToLoad.Union(job.LookupTablesToLoad).ToArray(); var syntaxFrom = dbFrom.Server.GetQuerySyntaxHelper(); foreach (var tableInfo in loadables) { var table = tableInfo.GetRuntimeName(); if (!remoteTables.Contains(table)) { throw new Exception("Loadable table " + table + " was NOT found on the remote DB!"); } if (LoadRawColumnsOnly) { var rawColumns = LoadRawColumnsOnly ? tableInfo.GetColumnsAtStage(LoadStage.AdjustRaw) : tableInfo.ColumnInfos; sql = "SELECT " + String.Join(",", rawColumns.Select(c => syntaxFrom.EnsureWrapped(c.GetRuntimeName(LoadStage.AdjustRaw)))) + " FROM " + syntaxFrom.EnsureWrapped(table); } else { sql = "SELECT * FROM " + syntaxFrom.EnsureWrapped(table); } job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to execute SQL:" + Environment.NewLine + sql)); var source = new DbDataCommandDataFlowSource(sql, "Fetch data from " + dbFrom + " to populate RAW table " + table, dbFrom.Server.Builder, Timeout == 0 ? 50000 : Timeout); var destination = new SqlBulkInsertDestination(_dbInfo, table, Enumerable.Empty <string>()); var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.LogsToTableLoadInfo | PipelineUsage.FixedDestination); var engine = new DataFlowPipelineEngine <DataTable>(context, source, destination, job); ITableLoadInfo loadInfo = job.DataLoadInfo.CreateTableLoadInfo("Truncate RAW table " + table, _dbInfo.Server.Name + "." + _dbInfo.GetRuntimeName(), new[] { new DataSource( "Remote SqlServer Servername=" + dbFrom.Server + ";Database=" + _dbInfo.GetRuntimeName() + //Either list the table or the query depending on what is populated (table != null ? " Table=" + table : " Query = " + sql), DateTime.Now) }, -1); engine.Initialize(loadInfo); engine.ExecutePipeline(new GracefulCancellationToken()); if (source.TotalRowsRead == 0) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "No rows were read from the remote table " + table + ".")); } job.OnNotify(this, new NotifyEventArgs(source.TotalRowsRead > 0 ? ProgressEventType.Information : ProgressEventType.Warning, "Finished after reading " + source.TotalRowsRead + " rows")); } return(ExitCodeType.Success); }
public override ExitCodeType Attach(IDataLoadJob job, GracefulCancellationToken cancellationToken) { if (job == null) throw new Exception("Job is Null, we require to know the job to build a DataFlowPipeline"); ThrowIfInvalidRemoteTableName(); var syntax = _remoteDatabase.Server.GetQuerySyntaxHelper(); string sql; if (!string.IsNullOrWhiteSpace(RemoteSelectSQL)) sql = RemoteSelectSQL; else sql = "Select * from " + syntax.EnsureWrapped(RemoteTableName); bool scheduleMismatch = false; //if there is a load progress if (Progress != null) try { //get appropriate date declaration SQL if any sql = GetScheduleParameterDeclarations(job, out scheduleMismatch) + sql; } catch (Exception e) { //if the date range is in the future then GetScheduleParameterDeclarations will throw Exception about future dates if(e.Message.StartsWith(FutureLoadMessage)) return ExitCodeType.OperationNotRequired;//if this is the case then don't bother with the data load throw; } if (scheduleMismatch) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Skipping LoadProgress '" + Progress + "' because it is not the correct Schedule for this component")); return ExitCodeType.Success; } job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to execute SQL:" + Environment.NewLine + sql)); var source = new DbDataCommandDataFlowSource(sql, "Fetch data from " + _remoteDatabase.Server + " to populate RAW table " + RemoteTableName, _remoteDatabase.Server.Builder, Timeout == 0 ? 50000 : Timeout); //For Oracle / Postgres we have to add the parameters to the DbCommand directly if (_minDateParam.HasValue && _maxDateParam.HasValue && !syntax.SupportsEmbeddedParameters()) { source.CommandAdjuster = (cmd) => { var pmin = cmd.CreateParameter(); pmin.Value = _minDateParam.Value; pmin.ParameterName = StartDateParameter; cmd.Parameters.Add(pmin); var pmax = cmd.CreateParameter(); pmax.Value = _maxDateParam.Value; pmax.ParameterName = EndDateParameter; cmd.Parameters.Add(pmax); }; } var destination = new SqlBulkInsertDestination(_dbInfo, RAWTableName, Enumerable.Empty<string>()); var contextFactory = new DataFlowPipelineContextFactory<DataTable>(); var context = contextFactory.Create(PipelineUsage.LogsToTableLoadInfo | PipelineUsage.FixedDestination); var engine = new DataFlowPipelineEngine<DataTable>(context, source, destination, job); ITableLoadInfo loadInfo = job.DataLoadInfo.CreateTableLoadInfo("Truncate RAW table " + RAWTableName, _dbInfo.Server.Name + "." + _dbInfo.GetRuntimeName(), new [] { new DataSource( "Remote SqlServer Servername=" + _remoteDatabase.Server + "Database=" + _dbInfo.GetRuntimeName() + //Either list the table or the query depending on what is populated (RemoteTableName != null?" Table=" + RemoteTableName :" Query = " + sql), DateTime.Now) }, -1); engine.Initialize(loadInfo); engine.ExecutePipeline(new GracefulCancellationToken()); if (source.TotalRowsRead == 0 && LoadNotRequiredIfNoRowsRead) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "No rows were read from the remote table and LoadNotRequiredIfNoRowsRead is true so returning ExitCodeType.LoadNotRequired")); return ExitCodeType.OperationNotRequired; } job.OnNotify(this, new NotifyEventArgs(source.TotalRowsRead > 0 ? ProgressEventType.Information:ProgressEventType.Warning, "Finished after reading " + source.TotalRowsRead + " rows")); if (Progress != null) { if(ProgressUpdateStrategy == null) throw new Exception("ProgressUpdateStrategy is null but there is a Progress"); ProgressUpdateStrategy.AddAppropriateDisposeStep((ScheduledDataLoadJob) job,_dbInfo); } return ExitCodeType.Success; }
public override ExitCodeType Run(IDataLoadJob job, GracefulCancellationToken cancellationToken) { if (_pipeline != null) { throw new Exception("Pipeline already executed once"); } var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.LoadsSingleTableInfo | PipelineUsage.FixedDestination | PipelineUsage.LogsToTableLoadInfo); //where we are coming from (source) var sourceConvention = LoadBubble.Raw; DiscoveredDatabase sourceDatabase = _databaseConfiguration.DeployInfo[sourceConvention]; var sourceTableName = _tableInfo.GetRuntimeName(sourceConvention, _databaseConfiguration.DatabaseNamer); //What to do if where we are coming from does not have the table existing on it if (!sourceDatabase.ExpectTable(sourceTableName).Exists()) { if (_isLookupTable) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Lookup table " + sourceTableName + " did not exist on RAW so was not migrated to STAGING")); return(ExitCodeType.Success); } else { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Table " + sourceTableName + " did not exist in RAW database " + sourceDatabase + " when it came time to migrate RAW to STAGING (and the table is not a lookup)")); } } // where we are going to (destination) // ignore any columns that are marked for discard var destinationConvention = LoadBubble.Staging; DiscoveredDatabase destinationDatabase = _databaseConfiguration.DeployInfo[LoadBubble.Staging]; var destinationTableName = _tableInfo.GetRuntimeName(destinationConvention, _databaseConfiguration.DatabaseNamer); DeleteFullyNullRecords(sourceTableName, sourceDatabase, job); //audit ITableLoadInfo tableLoadInfo = job.DataLoadInfo.CreateTableLoadInfo( "None required, if fails then simply drop Staging database and reload dataset", "STAGING:" + destinationTableName, new DataSource[] { new DataSource("RAW:" + sourceTableName, DateTime.Now) }, -1); var syntax = sourceDatabase.Server.GetQuerySyntaxHelper(); //connect to source and open a reader! note that GetReaderForRAW will at this point preserve the state of the database such that any commands e.g. deletes will not have any effect even though ExecutePipeline has not been called! var source = new DbDataCommandDataFlowSource( "Select distinct * from " + syntax.EnsureWrapped(sourceTableName), "Fetch data from " + syntax.EnsureWrapped(sourceTableName), sourceDatabase.Server.Builder, 50000); //ignore those that are pre load discarded columns (unless they are dilution in which case they get passed through in a decrepid state instead of dumped entirely - these fields will still bein ANODump in pristene state btw) var columnNamesToIgnoreForBulkInsert = _tableInfo.PreLoadDiscardedColumns.Where(c => c.Destination != DiscardedColumnDestination.Dilute).Select(column => column.RuntimeColumnName).ToList(); //pass pre load discard var destination = new SqlBulkInsertDestination(destinationDatabase, destinationTableName, columnNamesToIgnoreForBulkInsert); //engine that will move data _pipeline = new DataFlowPipelineEngine <DataTable>(context, source, destination, job); //add clean strings component _pipeline.ComponentObjects.Add(new CleanStrings()); //add dropping of preload discard columns _pipeline.ComponentObjects.Add(new BasicAnonymisationEngine()); _pipeline.Initialize(tableLoadInfo, _tableInfo); //tell it to move data _pipeline.ExecutePipeline(cancellationToken); return(ExitCodeType.Success); }