public void CommittingNewCohortFile_CallPipeline() { var listener = new ThrowImmediatelyDataLoadEventListener(); var proj = new Project(DataExportRepository, projName); proj.ProjectNumber = 999; proj.SaveToDatabase(); CohortCreationRequest request = new CohortCreationRequest(proj, new CohortDefinition(null, "CommittingNewCohorts", 1, 999, _externalCohortTable), (DataExportRepository)DataExportRepository, "fish"); request.Check(new ThrowImmediatelyCheckNotifier()); DelimitedFlatFileDataFlowSource source = new DelimitedFlatFileDataFlowSource(); BasicCohortDestination destination = new BasicCohortDestination(); source.Separator = ","; source.StronglyTypeInput = true; DataFlowPipelineEngine <DataTable> pipeline = new DataFlowPipelineEngine <DataTable>((DataFlowPipelineContext <DataTable>)request.GetContext(), source, destination, listener); pipeline.Initialize(new FlatFileToLoad(new FileInfo(filename)), request); pipeline.ExecutePipeline(new GracefulCancellationToken()); //there should be a new ExtractableCohort now Assert.NotNull(request.NewCohortDefinition.ID); var ec = DataExportRepository.GetAllObjects <ExtractableCohort>().Single(c => c.OriginID == request.NewCohortDefinition.ID); //with the data in it from the test file Assert.AreEqual(ec.Count, 3); }
public override ExitCodeType Attach(IDataLoadJob job, GracefulCancellationToken cancellationToken) { if (job == null) throw new Exception("Job is Null, we require to know the job to build a DataFlowPipeline"); ThrowIfInvalidRemoteTableName(); var syntax = _remoteDatabase.Server.GetQuerySyntaxHelper(); string sql; if (!string.IsNullOrWhiteSpace(RemoteSelectSQL)) sql = RemoteSelectSQL; else sql = "Select * from " + syntax.EnsureWrapped(RemoteTableName); bool scheduleMismatch = false; //if there is a load progress if (Progress != null) try { //get appropriate date declaration SQL if any sql = GetScheduleParameterDeclarations(job, out scheduleMismatch) + sql; } catch (Exception e) { //if the date range is in the future then GetScheduleParameterDeclarations will throw Exception about future dates if(e.Message.StartsWith(FutureLoadMessage)) return ExitCodeType.OperationNotRequired;//if this is the case then don't bother with the data load throw; } if (scheduleMismatch) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Skipping LoadProgress '" + Progress + "' because it is not the correct Schedule for this component")); return ExitCodeType.Success; } job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to execute SQL:" + Environment.NewLine + sql)); var source = new DbDataCommandDataFlowSource(sql, "Fetch data from " + _remoteDatabase.Server + " to populate RAW table " + RemoteTableName, _remoteDatabase.Server.Builder, Timeout == 0 ? 50000 : Timeout); //For Oracle / Postgres we have to add the parameters to the DbCommand directly if (_minDateParam.HasValue && _maxDateParam.HasValue && !syntax.SupportsEmbeddedParameters()) { source.CommandAdjuster = (cmd) => { var pmin = cmd.CreateParameter(); pmin.Value = _minDateParam.Value; pmin.ParameterName = StartDateParameter; cmd.Parameters.Add(pmin); var pmax = cmd.CreateParameter(); pmax.Value = _maxDateParam.Value; pmax.ParameterName = EndDateParameter; cmd.Parameters.Add(pmax); }; } var destination = new SqlBulkInsertDestination(_dbInfo, RAWTableName, Enumerable.Empty<string>()); var contextFactory = new DataFlowPipelineContextFactory<DataTable>(); var context = contextFactory.Create(PipelineUsage.LogsToTableLoadInfo | PipelineUsage.FixedDestination); var engine = new DataFlowPipelineEngine<DataTable>(context, source, destination, job); ITableLoadInfo loadInfo = job.DataLoadInfo.CreateTableLoadInfo("Truncate RAW table " + RAWTableName, _dbInfo.Server.Name + "." + _dbInfo.GetRuntimeName(), new [] { new DataSource( "Remote SqlServer Servername=" + _remoteDatabase.Server + "Database=" + _dbInfo.GetRuntimeName() + //Either list the table or the query depending on what is populated (RemoteTableName != null?" Table=" + RemoteTableName :" Query = " + sql), DateTime.Now) }, -1); engine.Initialize(loadInfo); engine.ExecutePipeline(new GracefulCancellationToken()); if (source.TotalRowsRead == 0 && LoadNotRequiredIfNoRowsRead) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "No rows were read from the remote table and LoadNotRequiredIfNoRowsRead is true so returning ExitCodeType.LoadNotRequired")); return ExitCodeType.OperationNotRequired; } job.OnNotify(this, new NotifyEventArgs(source.TotalRowsRead > 0 ? ProgressEventType.Information:ProgressEventType.Warning, "Finished after reading " + source.TotalRowsRead + " rows")); if (Progress != null) { if(ProgressUpdateStrategy == null) throw new Exception("ProgressUpdateStrategy is null but there is a Progress"); ProgressUpdateStrategy.AddAppropriateDisposeStep((ScheduledDataLoadJob) job,_dbInfo); } return ExitCodeType.Success; }
public void Test_ZipFileNotation(bool expressRelative) { //get a clean database to upload to var db = GetCleanedServer(DatabaseType.MicrosoftSQLServer); //create a folder in which to generate some dicoms var dirToLoad = new DirectoryInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, nameof(Test_ZipFileNotation))); if (dirToLoad.Exists) { dirToLoad.Delete(true); } dirToLoad.Create(); //generate some random dicoms var r = new Random(999); DicomDataGenerator generator = new DicomDataGenerator(r, dirToLoad, "CT") { MaximumImages = 5 }; var people = new PersonCollection(); people.GeneratePeople(1, r); generator.GenerateTestDataFile(people, new FileInfo("./inventory.csv"), 1); //This generates // Test_ZipFile // 2015 // 3 // 18 // 751140 2.25.166922918107154891877498685128076062226.dcm // 751140 2.25.179610809676265137473873365625829826423.dcm // 751140 2.25.201969634959506849065133495434871450465.dcm // 751140 2.25.237492679533001779093365416814254319890.dcm // 751140 2.25.316241631782653383510844072713132248731.dcm var yearDir = dirToLoad.GetDirectories().Single(); StringAssert.IsMatch("\\d{4}", yearDir.Name); //should be 5 images in the zip file var dicomFiles = yearDir.GetFiles("*.dcm", SearchOption.AllDirectories); Assert.AreEqual(5, dicomFiles.Length); //e.g. \2015\3\18\2.25.223398837779449245317520567111874824918.dcm //e.g. \2015\3\18\2.25.179610809676265137473873365625829826423.dcm var relativePathWithinZip1 = dicomFiles[0].FullName.Substring(dirToLoad.FullName.Length); var relativePathWithinZip2 = dicomFiles[1].FullName.Substring(dirToLoad.FullName.Length); //zip them up FileInfo zip = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, nameof(Test_ZipFile) + ".zip")); Path.Combine(TestContext.CurrentContext.TestDirectory, nameof(Test_ZipFile) + ".zip"); if (zip.Exists) { zip.Delete(); } ZipFile.CreateFromDirectory(dirToLoad.FullName, zip.FullName); //e.g. E:\RdmpDicom\Rdmp.Dicom.Tests\bin\Debug\netcoreapp2.2\Test_ZipFile.zip!\2015\3\18\2.25.223398837779449245317520567111874824918.dcm string pathToLoad1 = zip.FullName + "!" + relativePathWithinZip1; string pathToLoad2 = zip.FullName + "!" + relativePathWithinZip2; var loadMeTextFile = new FileInfo(Path.Combine(dirToLoad.FullName, "LoadMe.txt")); //tell the source to load the zip File.WriteAllText(loadMeTextFile.FullName, string.Join(Environment.NewLine, pathToLoad1, pathToLoad2)); var f = new FlatFileToLoad(loadMeTextFile); //Setup source var source = new DicomFileCollectionSource { FilenameField = "RelativeFileArchiveURI" }; if (expressRelative) { source.ArchiveRoot = TestContext.CurrentContext.TestDirectory; } var worklist = new FlatFileToLoadDicomFileWorklist(f); //Setup destination var destination = new DataTableUploadDestination { AllowResizingColumnsAtUploadTime = true }; //setup pipeline var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.FixedDestination | PipelineUsage.FixedDestination); //run pipeline var pipe = new DataFlowPipelineEngine <DataTable>(context, source, destination, new ThrowImmediatelyDataLoadEventListener()); pipe.Initialize(db, worklist); pipe.ExecutePipeline(new GracefulCancellationToken()); var finalTable = db.ExpectTable(destination.TargetTableName); using (var dt = finalTable.GetDataTable()) { //should be 2 rows (since we told it to only load 2 files out of the zip) Assert.AreEqual(2, dt.Rows.Count); string pathInDbToDicomFile = (string)dt.Rows[0]["RelativeFileArchiveURI"]; //We expect either something like: // E:/RdmpDicom/Rdmp.Dicom.Tests/bin/Debug/netcoreapp2.2/Test_ZipFile.zip!2015/3/18/2.25.160787663560951826149226183314694084702.dcm // ./Test_ZipFile.zip!2015/3/18/2.25.105592977437473375573190160334447272386.dcm //the path referenced should be the file read in relative/absolute format StringAssert.IsMatch( expressRelative ? $@"./{zip.Name}![\d./]*.dcm": $@"{Regex.Escape(zip.FullName.Replace('\\','/'))}![\d./]*.dcm", pathInDbToDicomFile); StringAssert.Contains(yearDir.Name, pathInDbToDicomFile, "Expected zip file to have subdirectories and for them to be loaded correctly"); //confirm we can read that out again using (var pool = new ZipPool()) { var path = new AmbiguousFilePath(TestContext.CurrentContext.TestDirectory, pathInDbToDicomFile); Assert.IsNotNull(path.GetDataset(pool)); } } Assert.IsTrue(finalTable.Exists()); finalTable.Drop(); }
public override ExitCodeType Attach(IDataLoadJob job, GracefulCancellationToken cancellationToken) { if (job == null) { throw new Exception("Job is Null, we require to know the job to build a DataFlowPipeline"); } string sql; var dbFrom = RemoteSource.Discover(DataAccessContext.DataLoad); var remoteTables = new HashSet <string>(dbFrom.DiscoverTables(true).Select(t => t.GetRuntimeName()), StringComparer.CurrentCultureIgnoreCase); var loadables = job.RegularTablesToLoad.Union(job.LookupTablesToLoad).ToArray(); var syntaxFrom = dbFrom.Server.GetQuerySyntaxHelper(); foreach (var tableInfo in loadables) { var table = tableInfo.GetRuntimeName(); if (!remoteTables.Contains(table)) { throw new Exception("Loadable table " + table + " was NOT found on the remote DB!"); } if (LoadRawColumnsOnly) { var rawColumns = LoadRawColumnsOnly ? tableInfo.GetColumnsAtStage(LoadStage.AdjustRaw) : tableInfo.ColumnInfos; sql = "SELECT " + String.Join(",", rawColumns.Select(c => syntaxFrom.EnsureWrapped(c.GetRuntimeName(LoadStage.AdjustRaw)))) + " FROM " + syntaxFrom.EnsureWrapped(table); } else { sql = "SELECT * FROM " + syntaxFrom.EnsureWrapped(table); } job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to execute SQL:" + Environment.NewLine + sql)); var source = new DbDataCommandDataFlowSource(sql, "Fetch data from " + dbFrom + " to populate RAW table " + table, dbFrom.Server.Builder, Timeout == 0 ? 50000 : Timeout); var destination = new SqlBulkInsertDestination(_dbInfo, table, Enumerable.Empty <string>()); var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.LogsToTableLoadInfo | PipelineUsage.FixedDestination); var engine = new DataFlowPipelineEngine <DataTable>(context, source, destination, job); ITableLoadInfo loadInfo = job.DataLoadInfo.CreateTableLoadInfo("Truncate RAW table " + table, _dbInfo.Server.Name + "." + _dbInfo.GetRuntimeName(), new[] { new DataSource( "Remote SqlServer Servername=" + dbFrom.Server + ";Database=" + _dbInfo.GetRuntimeName() + //Either list the table or the query depending on what is populated (table != null ? " Table=" + table : " Query = " + sql), DateTime.Now) }, -1); engine.Initialize(loadInfo); engine.ExecutePipeline(new GracefulCancellationToken()); if (source.TotalRowsRead == 0) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "No rows were read from the remote table " + table + ".")); } job.OnNotify(this, new NotifyEventArgs(source.TotalRowsRead > 0 ? ProgressEventType.Information : ProgressEventType.Warning, "Finished after reading " + source.TotalRowsRead + " rows")); } return(ExitCodeType.Success); }
public override ExitCodeType Run(IDataLoadJob job, GracefulCancellationToken cancellationToken) { if (_pipeline != null) { throw new Exception("Pipeline already executed once"); } var contextFactory = new DataFlowPipelineContextFactory <DataTable>(); var context = contextFactory.Create(PipelineUsage.LoadsSingleTableInfo | PipelineUsage.FixedDestination | PipelineUsage.LogsToTableLoadInfo); //where we are coming from (source) var sourceConvention = LoadBubble.Raw; DiscoveredDatabase sourceDatabase = _databaseConfiguration.DeployInfo[sourceConvention]; var sourceTableName = _tableInfo.GetRuntimeName(sourceConvention, _databaseConfiguration.DatabaseNamer); //What to do if where we are coming from does not have the table existing on it if (!sourceDatabase.ExpectTable(sourceTableName).Exists()) { if (_isLookupTable) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Lookup table " + sourceTableName + " did not exist on RAW so was not migrated to STAGING")); return(ExitCodeType.Success); } else { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Table " + sourceTableName + " did not exist in RAW database " + sourceDatabase + " when it came time to migrate RAW to STAGING (and the table is not a lookup)")); } } // where we are going to (destination) // ignore any columns that are marked for discard var destinationConvention = LoadBubble.Staging; DiscoveredDatabase destinationDatabase = _databaseConfiguration.DeployInfo[LoadBubble.Staging]; var destinationTableName = _tableInfo.GetRuntimeName(destinationConvention, _databaseConfiguration.DatabaseNamer); DeleteFullyNullRecords(sourceTableName, sourceDatabase, job); //audit ITableLoadInfo tableLoadInfo = job.DataLoadInfo.CreateTableLoadInfo( "None required, if fails then simply drop Staging database and reload dataset", "STAGING:" + destinationTableName, new DataSource[] { new DataSource("RAW:" + sourceTableName, DateTime.Now) }, -1); var syntax = sourceDatabase.Server.GetQuerySyntaxHelper(); //connect to source and open a reader! note that GetReaderForRAW will at this point preserve the state of the database such that any commands e.g. deletes will not have any effect even though ExecutePipeline has not been called! var source = new DbDataCommandDataFlowSource( "Select distinct * from " + syntax.EnsureWrapped(sourceTableName), "Fetch data from " + syntax.EnsureWrapped(sourceTableName), sourceDatabase.Server.Builder, 50000); //ignore those that are pre load discarded columns (unless they are dilution in which case they get passed through in a decrepid state instead of dumped entirely - these fields will still bein ANODump in pristene state btw) var columnNamesToIgnoreForBulkInsert = _tableInfo.PreLoadDiscardedColumns.Where(c => c.Destination != DiscardedColumnDestination.Dilute).Select(column => column.RuntimeColumnName).ToList(); //pass pre load discard var destination = new SqlBulkInsertDestination(destinationDatabase, destinationTableName, columnNamesToIgnoreForBulkInsert); //engine that will move data _pipeline = new DataFlowPipelineEngine <DataTable>(context, source, destination, job); //add clean strings component _pipeline.ComponentObjects.Add(new CleanStrings()); //add dropping of preload discard columns _pipeline.ComponentObjects.Add(new BasicAnonymisationEngine()); _pipeline.Initialize(tableLoadInfo, _tableInfo); //tell it to move data _pipeline.ExecutePipeline(cancellationToken); return(ExitCodeType.Success); }