public int Run(IRDMPPlatformRepositoryServiceLocator repositoryLocator, IDataLoadEventListener listener, ICheckNotifier checkNotifier, GracefulCancellationToken token) { // if we have no listener use a throw immediately one (generate exceptions if it went badly) if (listener == null) { listener = new ThrowImmediatelyDataLoadEventListener(); } // whatever happens we want a listener to record the worst result for the return code (even if theres ignore all errors listeners being used) var toMemory = new ToMemoryDataLoadEventListener(false); // User might have some additional listeners registered listener = new ForkDataLoadEventListener(AdditionalListeners.Union(new [] { toMemory, listener }).ToArray()); // build the engine and run it var engine = UseCase.GetEngine(Pipeline, listener); engine.ExecutePipeline(token ?? new GracefulCancellationToken()); // return code of -1 if it went badly otherwise 0 var exitCode = toMemory.GetWorst() >= ProgressEventType.Error ? -1:0; if (exitCode == 0) { PipelineExecutionFinishedsuccessfully?.Invoke(this, new PipelineEngineEventArgs(engine)); } return(exitCode); }
public void AssembleDataTableFromFileArchive() { var zip = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData.zip"); var dir = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData"); if (File.Exists(zip)) { File.Delete(zip); } ZipFile.CreateFromDirectory(dir, zip); var fileCount = Directory.GetFiles(dir, "*.dcm").Count(); var source = new DicomFileCollectionSource(); source.FilenameField = "RelativeFileArchiveURI"; source.PreInitialize(new FlatFileToLoadDicomFileWorklist(new FlatFileToLoad(new FileInfo(zip))), new ThrowImmediatelyDataLoadEventListener()); var toMemory = new ToMemoryDataLoadEventListener(true); var result = source.GetChunk(toMemory, new GracefulCancellationToken()); //processed every file once Assert.AreEqual(fileCount, toMemory.LastProgressRecieivedByTaskName.Single().Value.Progress.Value); Assert.Greater(result.Columns.Count, 0); }
public void Test_IgnoreQuotes(bool ignoreQuotes) { var f = Path.Combine(TestContext.CurrentContext.WorkDirectory, "talk.csv"); File.WriteAllText(f, @"Field1,Field2 1,Watch out guys its Billie ""The Killer"" Cole 2,""The Killer""? I've heard of him hes a bad un"); DelimitedFlatFileDataFlowSource source = new DelimitedFlatFileDataFlowSource(); source.PreInitialize(new FlatFileToLoad(new FileInfo(f)), new ThrowImmediatelyDataLoadEventListener()); source.Separator = ","; source.MaxBatchSize = DelimitedFlatFileDataFlowSource.MinimumStronglyTypeInputBatchSize; source.StronglyTypeInputBatchSize = DelimitedFlatFileDataFlowSource.MinimumStronglyTypeInputBatchSize; source.StronglyTypeInput = true; source.IgnoreQuotes = ignoreQuotes; if (!ignoreQuotes) { var toMem = new ToMemoryDataLoadEventListener(true); var ex = Assert.Throws <ParserException>(() => source.GetChunk(toMem, new GracefulCancellationToken())); Assert.AreEqual(2, ex.ReadingContext.RawRow); source.Dispose(new ThrowImmediatelyDataLoadEventListener(), null); } else { var dt = source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken()); Assert.AreEqual(2, dt.Rows.Count); Assert.AreEqual(@"Watch out guys its Billie ""The Killer"" Cole", dt.Rows[0]["Field2"]); Assert.AreEqual(@"""The Killer""? I've heard of him hes a bad un", dt.Rows[1]["Field2"]); source.Dispose(new ThrowImmediatelyDataLoadEventListener(), null); } }
public void AssembleDataTableFromFolder() { var file1 = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData/FileWithLotsOfTags.dcm")); var file2 = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData/IM-0001-0013.dcm")); var controlFile = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, "list.txt")); File.WriteAllText(controlFile.FullName, file1.FullName + Environment.NewLine + file2.FullName); var source = new DicomFileCollectionSource { FilenameField = "RelativeFileArchiveURI" }; source.PreInitialize(new FlatFileToLoadDicomFileWorklist(new FlatFileToLoad(controlFile)), new ThrowImmediatelyDataLoadEventListener()); var toMemory = new ToMemoryDataLoadEventListener(true); var result = source.GetChunk(toMemory, new GracefulCancellationToken()); Assert.AreEqual(1, result.Rows.Count); result = source.GetChunk(toMemory, new GracefulCancellationToken()); Assert.AreEqual(1, result.Rows.Count); Assert.AreEqual(null, source.GetChunk(toMemory, new GracefulCancellationToken())); }
public void TestRemovingDuplicatesFromDataTable() { DataTable dt = new DataTable(); dt.Columns.Add("Col1"); dt.Columns.Add("Col2", typeof(int)); dt.Rows.Add("Fish", 123); dt.Rows.Add("Fish", 123); dt.Rows.Add("Fish", 123); Assert.AreEqual(3, dt.Rows.Count); Assert.AreEqual(123, dt.Rows[0]["Col2"]); var receiver = new ToMemoryDataLoadEventListener(true); var result = new RemoveDuplicates().ProcessPipelineData(dt, receiver, new GracefulCancellationToken()); //should have told us that it processed 3 rows Assert.AreEqual(3, receiver.LastProgressRecieivedByTaskName["Evaluating For Duplicates"].Progress.Value); //and discarded 2 of them as duplicates Assert.AreEqual(2, receiver.LastProgressRecieivedByTaskName["Discarding Duplicates"].Progress.Value); Assert.AreEqual(1, result.Rows.Count); Assert.AreEqual("Fish", result.Rows[0]["Col1"]); Assert.AreEqual(123, result.Rows[0]["Col2"]); }
public void TestOddFormats() { var listener = new ToMemoryDataLoadEventListener(true); ExcelDataFlowSource source = new ExcelDataFlowSource(); source.WorkSheetName = "MySheet"; source.PreInitialize(new FlatFileToLoad(_fileLocations[OddFormatsFile]), listener); DataTable dt = source.GetChunk(listener, new GracefulCancellationToken()); Assert.AreEqual(2, dt.Rows.Count); Assert.AreEqual(5, dt.Columns.Count); Assert.AreEqual("Name", dt.Columns[0].ColumnName); Assert.AreEqual("Category", dt.Columns[1].ColumnName); Assert.AreEqual("Age", dt.Columns[2].ColumnName); Assert.AreEqual("Wage", dt.Columns[3].ColumnName); Assert.AreEqual("Invisibre", dt.Columns[4].ColumnName); //this column is hidden in the spreadsheet but we still load it Assert.AreEqual("Frank", dt.Rows[0][0]); Assert.AreEqual("Upper, Left", dt.Rows[0][1]); Assert.AreEqual("30", dt.Rows[0][2]); Assert.AreEqual("£11.00", dt.Rows[0][3]); Assert.AreEqual("0.1", dt.Rows[0][4]); Assert.AreEqual("Castello", dt.Rows[1][0]); Assert.AreEqual("Lower, Back", dt.Rows[1][1]); Assert.AreEqual("31", dt.Rows[1][2]); Assert.AreEqual("50.00%", dt.Rows[1][3]); Assert.AreEqual("0.2", dt.Rows[1][4]); }
public void TestOddFormats() { var listener = new ToMemoryDataLoadEventListener(true); ExcelDataFlowSource source = new ExcelDataFlowSource(); source.WorkSheetName = "MySheet"; source.PreInitialize(new FlatFileToLoad(_fileLocations[OddFormatsFile]), listener); DataTable dt = source.GetChunk(listener, new GracefulCancellationToken()); Assert.AreEqual(2, dt.Rows.Count); Assert.AreEqual(5, dt.Columns.Count); Assert.AreEqual("Name", dt.Columns[0].ColumnName); Assert.AreEqual("Category", dt.Columns[1].ColumnName); Assert.AreEqual("Age", dt.Columns[2].ColumnName); Assert.AreEqual("Wage", dt.Columns[3].ColumnName); Assert.AreEqual("Invisibre", dt.Columns[4].ColumnName); //this column is hidden in the spreadsheet but we still load it Assert.AreEqual("Frank", dt.Rows[0][0]); Assert.AreEqual("Upper, Left", dt.Rows[0][1]); Assert.AreEqual("30", dt.Rows[0][2]); //its a pound symbol alright! but since there is 2 encodings for pound symbol lets just make everyones life easier StringAssert.IsMatch(@"^\W11.00$", dt.Rows[0][3].ToString()); Assert.AreEqual("0.1", dt.Rows[0][4]); Assert.AreEqual("Castello", dt.Rows[1][0]); Assert.AreEqual("Lower, Back", dt.Rows[1][1]); Assert.AreEqual("31", dt.Rows[1][2]); Assert.AreEqual("50.00%", dt.Rows[1][3]); Assert.AreEqual("0.2", dt.Rows[1][4]); }
public void NormalBook_NoEmptyRowsRead() { ExcelDataFlowSource source = new ExcelDataFlowSource(); var listener = new ToMemoryDataLoadEventListener(true); source.PreInitialize(new FlatFileToLoad(_fileLocations[TestFile]), listener); DataTable dt = source.GetChunk(listener, new GracefulCancellationToken()); Assert.AreEqual(5, dt.Rows.Count); }
public void FreakyTestFile_WarningsCorrect() { var messages = new ToMemoryDataLoadEventListener(true); ExcelDataFlowSource source = new ExcelDataFlowSource(); source.PreInitialize(new FlatFileToLoad(_fileLocations[FreakyTestFile]), new ThrowImmediatelyDataLoadEventListener()); DataTable dt = source.GetChunk(messages, new GracefulCancellationToken()); var args = messages.EventsReceivedBySender[source]; Console.Write(messages.ToString()); Assert.IsTrue(args.Any(a => a.Message.Contains("Discarded the following data (that was found in unamed columns):RowCount:5") && a.ProgressEventType == ProgressEventType.Warning)); }
public void SourceRead_InvalidFloat_ToTable(InvalidDataHandling dataHandlingStrategy) { var source = new DicomDatasetCollectionSource(); source.InvalidDataHandlingStrategy = dataHandlingStrategy; var ds = new DicomDataset(); ds.Add(DicomTag.PatientAge, "123Y"); ds.Add(DicomTag.WedgeAngleFloat, "3.40282347e+038"); var worklist = new ExplicitListDicomDatasetWorklist(new[] { ds }, "fish.dcm", new Dictionary <string, string> { { "MessageGuid", "123x321" } }); source.PreInitialize(worklist, new ThrowImmediatelyDataLoadEventListener()); source.FilenameField = "RelFileName"; DataTable dt = null; switch (dataHandlingStrategy) { case InvalidDataHandling.ThrowException: Assert.Throws <OverflowException>(() => source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken())); return; case InvalidDataHandling.ConvertToNullAndWarn: var tomem = new ToMemoryDataLoadEventListener(true); dt = source.GetChunk(tomem, new GracefulCancellationToken()); Assert.AreEqual(DBNull.Value, dt.Rows[0]["WedgeAngleFloat"]); //should be a warning about WedgeAngleFloat logged var warning = tomem.EventsReceivedBySender.SelectMany(e => e.Value).Single(v => v.ProgressEventType == ProgressEventType.Warning); Assert.IsTrue(warning.Message.Contains("WedgeAngleFloat")); Assert.IsTrue(warning.Message.Contains("MessageGuid")); Assert.IsTrue(warning.Message.Contains("123x321")); Assert.IsTrue(warning.Message.Contains("fish.dcm")); break; default: throw new ArgumentOutOfRangeException("dataHandlingStrategy"); } Assert.AreEqual("123Y", dt.Rows[0]["PatientAge"]); Assert.AreEqual("fish.dcm", dt.Rows[0]["RelFileName"]); }
public void TestWithEcho() { var source = new ProcessBasedCacheSource(); if (IsLinux) { source.Command = "/bin/echo"; source.Args = "Hey Thomas go get %s and store in %d"; } else { source.Command = "cmd.exe"; source.Args = "/c echo Hey Thomas go get %s and store in %d"; } source.TimeFormat = "dd/MM/yy"; source.ThrowOnNonZeroExitCode = true; // What dates to load var cp = WhenIHaveA <CacheProgress>(); cp.CacheFillProgress = new DateTime(2001, 12, 24); cp.SaveToDatabase(); // Where to put files var lmd = cp.LoadProgress.LoadMetadata; var dir = new DirectoryInfo(TestContext.CurrentContext.WorkDirectory); var loadDir = LoadDirectory.CreateDirectoryStructure(dir, "blah", true); lmd.LocationOfFlatFiles = loadDir.RootPath.FullName; lmd.SaveToDatabase(); source.PreInitialize(new CacheFetchRequestProvider(cp), new ThrowImmediatelyDataLoadEventListener()); source.PreInitialize(cp.CatalogueRepository, new ThrowImmediatelyDataLoadEventListener()); source.PreInitialize(new PermissionWindow(cp.CatalogueRepository), new ThrowImmediatelyDataLoadEventListener()); var toMem = new ToMemoryDataLoadEventListener(true); var fork = new ForkDataLoadEventListener(toMem, new ThrowImmediatelyDataLoadEventListener() { WriteToConsole = true }); source.GetChunk(fork, new GracefulCancellationToken()); Assert.Contains($"Hey Thomas go get 24/12/01 and store in {Path.Combine(loadDir.Cache.FullName,"ALL")}", toMem.GetAllMessagesByProgressEventType()[ProgressEventType.Information].Select(v => v.Message).ToArray()); }
public void TestColumnSwapper_MappingTableNulls() { using var dt = new DataTable(); dt.Columns.Add("In"); dt.Columns.Add("Out"); dt.Rows.Add(1, 1); dt.Rows.Add(DBNull.Value, 3); // this value should be ignored dt.Rows.Add(2, 2); var db = GetCleanedServer(DatabaseType.MicrosoftSQLServer); Import(db.CreateTable("Map", dt), out var map, out var mapCols); var swapper = new ColumnSwapper(); swapper.MappingFromColumn = mapCols.Single(c => c.GetRuntimeName().Equals("In")); swapper.MappingToColumn = mapCols.Single(c => c.GetRuntimeName().Equals("Out")); swapper.Check(new ThrowImmediatelyCheckNotifier()); using var dtToSwap = new DataTable(); dtToSwap.Columns.Add("In",typeof(int)); dtToSwap.Columns.Add("Name"); dtToSwap.Columns.Add("Age"); dtToSwap.Rows.Add(1, "Dave", 30); dtToSwap.Rows.Add(null, "Bob", 30); var toMem = new ToMemoryDataLoadEventListener(true); var resultDt = swapper.ProcessPipelineData(dtToSwap,toMem , new GracefulCancellationToken()); //this is the primary thing we are testing here Assert.Contains("Discarded 1 Null key values read from mapping table",toMem.GetAllMessagesByProgressEventType()[ProgressEventType.Warning].Select(m=>m.Message).ToArray()); Assert.AreEqual(2, resultDt.Rows.Count); AreBasicallyEquals(1, resultDt.Rows[0]["Out"]); Assert.AreEqual("Dave", resultDt.Rows[0]["Name"]); AreBasicallyEquals(DBNull.Value, resultDt.Rows[1]["Out"]); Assert.AreEqual("Bob", resultDt.Rows[1]["Name"]); }
public void ExtractNormally() { AdjustPipelineComponentDelegate = (p) => { if (p.Class.Contains("ExecuteDatasetExtractionSource")) { var hashJoinsArg = p.PipelineComponentArguments.Single(a => a.Name.Equals("UseHashJoins")); hashJoinsArg.SetValue(true); hashJoinsArg.SaveToDatabase(); } }; ExtractionPipelineUseCase execute; IExecuteDatasetExtractionDestination result; _catalogue.Name = "TestTable"; _catalogue.SaveToDatabase(); _request.DatasetBundle.DataSet.RevertToDatabaseState(); Assert.AreEqual(1, _request.ColumnsToExtract.Count(c => c.IsExtractionIdentifier)); var listener = new ToMemoryDataLoadEventListener(true); base.Execute(out execute, out result, listener); var messages = listener.EventsReceivedBySender.SelectMany(m => m.Value) .Where(m => m.ProgressEventType == ProgressEventType.Information && m.Message.Contains("/*Decided on extraction SQL:*/")) .ToArray(); Assert.AreEqual(1, messages.Length, "Expected a message about what the final extraction SQL was"); Assert.IsTrue(messages[0].Message.Contains(" HASH JOIN "), "expected use of hash matching was not reported by ExecuteDatasetExtractionSource in the SQL actually executed"); var r = (ExecuteDatasetExtractionFlatFileDestination)result; //this should be what is in the file, the private identifier and the 1 that was put into the table in the first place (see parent class for the test data setup) Assert.AreEqual(@"ReleaseID,Name,DateOfBirth " + _cohortKeysGenerated[_cohortKeysGenerated.Keys.First()] + @",Dave,2001-01-01", File.ReadAllText(r.OutputFile).Trim()); Assert.AreEqual(1, _request.QueryBuilder.SelectColumns.Count(c => c.IColumn is ReleaseIdentifierSubstitution)); File.Delete(r.OutputFile); }
public void TestSetNull_OneCell() { var operation = new SetNull(); operation.ColumnNameToFind = "b"; operation.NullCellsWhereValuesMatch = new Regex("^cat$"); using (var dt = new DataTable()) { dt.Columns.Add("a"); dt.Columns.Add("b"); dt.Rows.Add("cat", "cat"); dt.Rows.Add("dog", "dog"); dt.Rows.Add("cat", "dog"); var listener = new ToMemoryDataLoadEventListener(true); var result = operation.ProcessPipelineData(dt, listener, new GracefulCancellationToken()); Assert.AreEqual(3, result.Rows.Count); Assert.AreEqual("cat", result.Rows[0]["a"]); Assert.AreEqual(DBNull.Value, result.Rows[0]["b"]); Assert.AreEqual("dog", result.Rows[1]["a"]); Assert.AreEqual("dog", result.Rows[1]["b"]); Assert.AreEqual("cat", result.Rows[2]["a"]); Assert.AreEqual("dog", result.Rows[2]["b"]); operation.Dispose(listener, null); var msg = listener.EventsReceivedBySender[operation].Single(); Assert.AreEqual(ProgressEventType.Warning, msg.ProgressEventType); Assert.AreEqual("Total SetNull operations for ColumnNameToFind 'b' was 1", msg.Message); } }
public void PatientFileMissingOne() { _extractor.PerPatient = true; _extractor.Directories = false; _extractor.Pattern = "$p.txt"; _extractor.Check(new ThrowImmediatelyCheckNotifier()); FileAssert.DoesNotExist(Path.Combine(_outDir.FullName, "blah.txt")); FileAssert.DoesNotExist(Path.Combine(_outDir.FullName, "blah2.txt")); var mem = new ToMemoryDataLoadEventListener(true); _extractor.MovePatient("Pat1", "Rel1", _outDir, mem, new GracefulCancellationToken()); _extractor.MovePatient("Pat2", "Rel2", _outDir, mem, new GracefulCancellationToken()); FileAssert.DoesNotExist(Path.Combine(_outDir.FullName, "blah.txt")); FileAssert.DoesNotExist(Path.Combine(_outDir.FullName, "blah2.txt")); FileAssert.Exists(Path.Combine(_outDir.FullName, "Rel1.txt")); Assert.AreEqual(ProgressEventType.Warning, mem.GetWorst()); StringAssert.StartsWith("No Files were found matching Pattern Pat2.txt in ", mem.GetAllMessagesByProgressEventType()[ProgressEventType.Warning].Single().Message); }
public void ExcelDateTimeDeciphering(string versionOfTestFile) { /* * 01/01/2001 0.1 01/01/2001 * 01/01/2001 10:30 0.51 01/01/2001 10:30 * 01/01/2002 11:30 0.22 0.1 * 01/01/2003 01:30 0.10 0.51 */ var listener = new ToMemoryDataLoadEventListener(true); ExcelDataFlowSource source = new ExcelDataFlowSource(); source.PreInitialize(new FlatFileToLoad(_fileLocations[versionOfTestFile]), listener); DataTable dt = source.GetChunk(listener, new GracefulCancellationToken()); Assert.AreEqual(5, dt.Rows.Count); Assert.AreEqual("2001-01-01", dt.Rows[0][3]); Assert.AreEqual("0.1", dt.Rows[0][4]); Assert.AreEqual("10:30:00", dt.Rows[0][5]); Assert.AreEqual("2001-01-01 10:30:00", dt.Rows[1][3]); Assert.AreEqual("0.51", dt.Rows[1][4]); Assert.AreEqual("11:30:00", dt.Rows[1][5]); Assert.AreEqual("2002-01-01 11:30:00", dt.Rows[2][3]); Assert.AreEqual("0.22", dt.Rows[2][4]); Assert.AreEqual("0.1", dt.Rows[2][5]); Assert.AreEqual("2003-01-01 01:30:00", dt.Rows[3][3]); Assert.AreEqual("0.10", dt.Rows[3][4]); Assert.AreEqual("0.51", dt.Rows[3][5]); Assert.AreEqual("2015-09-18", dt.Rows[4][3]); Assert.AreEqual("15:09:00", dt.Rows[4][4]); Assert.AreEqual("00:03:56", dt.Rows[4][5]); }
public void RefreshCohort_WithCaching() { ExtractionPipelineUseCase useCase; IExecuteDatasetExtractionDestination results; var pipe = new Pipeline(CatalogueRepository, "RefreshPipeWithCaching"); var source = new PipelineComponent(CatalogueRepository, pipe, typeof(CohortIdentificationConfigurationSource), 0); var args = source.CreateArgumentsForClassIfNotExists <CohortIdentificationConfigurationSource>(); var freezeArg = args.Single(a => a.Name.Equals("FreezeAfterSuccessfulImport")); freezeArg.SetValue(false); freezeArg.SaveToDatabase(); var dest = new PipelineComponent(CatalogueRepository, pipe, typeof(BasicCohortDestination), 0); var argsDest = dest.CreateArgumentsForClassIfNotExists <BasicCohortDestination>(); var allocatorArg = argsDest.Single(a => a.Name.Equals("ReleaseIdentifierAllocator")); allocatorArg.SetValue(null); allocatorArg.SaveToDatabase(); pipe.SourcePipelineComponent_ID = source.ID; pipe.DestinationPipelineComponent_ID = dest.ID; pipe.SaveToDatabase(); Execute(out useCase, out results); var oldcohort = _configuration.Cohort; //Create a query cache var p = new QueryCachingPatcher(); ExternalDatabaseServer queryCacheServer = new ExternalDatabaseServer(CatalogueRepository, "TestCohortRefreshing_CacheTest", p); DiscoveredDatabase cachedb = DiscoveredServerICanCreateRandomDatabasesAndTablesOn.ExpectDatabase("TestCohortRefreshing_CacheTest"); if (cachedb.Exists()) { cachedb.Drop(); } new MasterDatabaseScriptExecutor(cachedb).CreateAndPatchDatabase(p, new ThrowImmediatelyCheckNotifier()); queryCacheServer.SetProperties(cachedb); //Create a Cohort Identification configuration (query) that will identify the cohort CohortIdentificationConfiguration cic = new CohortIdentificationConfiguration(RepositoryLocator.CatalogueRepository, "RefreshCohort.cs");; try { //make it use the cache cic.QueryCachingServer_ID = queryCacheServer.ID; cic.SaveToDatabase(); //give it a single table query to fetch distinct chi from test data var agg = cic.CreateNewEmptyConfigurationForCatalogue(_catalogue, null); //add the sub query as the only entry in the cic (in the root container) cic.CreateRootContainerIfNotExists(); cic.RootCohortAggregateContainer.AddChild(agg, 1); //make the ExtractionConfiguration refresh cohort query be the cic _configuration.CohortIdentificationConfiguration_ID = cic.ID; _configuration.CohortRefreshPipeline_ID = pipe.ID; _configuration.SaveToDatabase(); //get a refreshing engine var engine = new CohortRefreshEngine(new ThrowImmediatelyDataLoadEventListener(), _configuration); engine.Execute(); Assert.NotNull(engine.Request.NewCohortDefinition); var oldData = oldcohort.GetExternalData(); Assert.AreEqual(oldData.ExternalDescription, engine.Request.NewCohortDefinition.Description); Assert.AreEqual(oldData.ExternalVersion + 1, engine.Request.NewCohortDefinition.Version); Assert.AreNotEqual(oldcohort.CountDistinct, engine.Request.CohortCreatedIfAny.CountDistinct); //now nuke all data in the catalogue so the cic returns nobody (except that the identifiers are cached eh?) DataAccessPortal.GetInstance().ExpectDatabase(_tableInfo, DataAccessContext.InternalDataProcessing).ExpectTable(_tableInfo.GetRuntimeName()).Truncate(); var toMem = new ToMemoryDataLoadEventListener(false); //get a new engine engine = new CohortRefreshEngine(toMem, _configuration); //execute it var ex = Assert.Throws <Exception>(() => engine.Execute()); Assert.IsTrue(ex.InnerException.InnerException.Message.Contains("CohortIdentificationCriteria execution resulted in an empty dataset")); //expected this message to happen //that it did clear the cache Assert.AreEqual(1, toMem.EventsReceivedBySender.SelectMany(kvp => kvp.Value).Count(msg => msg.Message.Equals("Clearing Cohort Identifier Cache"))); } finally { //make the ExtractionConfiguration not use the cic query _configuration.CohortRefreshPipeline_ID = null; _configuration.CohortIdentificationConfiguration_ID = null; _configuration.SaveToDatabase(); //delete the cic query cic.QueryCachingServer_ID = null; cic.SaveToDatabase(); cic.DeleteInDatabase(); //delete the caching database queryCacheServer.DeleteInDatabase(); cachedb.Drop(); } }
public void SourceRead_InvalidFloatInSequence_WithElevation_ToTable(InvalidDataHandling dataHandlingStrategy) { //create the elevation configuration var elevationRules = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, "ElevationConfig.xml")); File.WriteAllText(elevationRules.FullName, @"<!DOCTYPE TagElevationRequestCollection [ <!ELEMENT TagElevationRequestCollection (TagElevationRequest*)> <!ELEMENT TagElevationRequest (ColumnName,ElevationPathway,Conditional?)> <!ELEMENT ColumnName (#PCDATA)> <!ELEMENT ElevationPathway (#PCDATA)> <!ELEMENT Conditional (ConditionalPathway,ConditionalRegex)> <!ELEMENT ConditionalPathway (#PCDATA)> <!ELEMENT ConditionalRegex (#PCDATA)> ]> <TagElevationRequestCollection> <TagElevationRequest> <ColumnName>WedgeAngleFloat</ColumnName> <ElevationPathway>AcquisitionContextSequence->WedgeAngleFloat</ElevationPathway> </TagElevationRequest> </TagElevationRequestCollection>"); //setup the source reader var source = new DicomDatasetCollectionSource(); source.InvalidDataHandlingStrategy = dataHandlingStrategy; source.TagElevationConfigurationFile = elevationRules; //don't load the sequence, just the elevation source.TagBlacklist = new Regex("AcquisitionContextSequence"); //The dataset we are trying to load var ds = new DicomDataset(); ds.Add(DicomTag.PatientAge, "123Y"); var sequence = new DicomSequence(DicomTag.AcquisitionContextSequence, new DicomDataset() { { DicomTag.WedgeAngleFloat, "3.40282347e+038" } //dodgy float in sequence (the sequence we are trying to elevate) }); ds.Add(sequence); var worklist = new ExplicitListDicomDatasetWorklist(new[] { ds }, "fish.dcm", new Dictionary <string, string> { { "MessageGuid", "123x321" } }); source.PreInitialize(worklist, new ThrowImmediatelyDataLoadEventListener()); source.FilenameField = "RelFileName"; DataTable dt = null; switch (dataHandlingStrategy) { case InvalidDataHandling.ThrowException: Assert.Throws <OverflowException>(() => source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken())); return; case InvalidDataHandling.ConvertToNullAndWarn: var tomem = new ToMemoryDataLoadEventListener(true); dt = source.GetChunk(tomem, new GracefulCancellationToken()); Assert.AreEqual(DBNull.Value, dt.Rows[0]["WedgeAngleFloat"]); //should be a warning about WedgeAngleFloat logged var warning = tomem.EventsReceivedBySender.SelectMany(e => e.Value).Single(v => v.ProgressEventType == ProgressEventType.Warning); Assert.IsTrue(warning.Message.Contains("WedgeAngleFloat")); Assert.IsTrue(warning.Message.Contains("MessageGuid")); Assert.IsTrue(warning.Message.Contains("123x321")); Assert.IsTrue(warning.Message.Contains("fish.dcm")); break; default: throw new ArgumentOutOfRangeException("dataHandlingStrategy"); } Assert.AreEqual("123Y", dt.Rows[0]["PatientAge"]); Assert.AreEqual("fish.dcm", dt.Rows[0]["RelFileName"]); }
public void TestBasicDataTableAnonymiser5(LoggerTestCase testCase) { //Create a names table that will go into the database var dt = new DataTable(); dt.Columns.Add("Name"); dt.Rows.Add(new[] { "Thomas" }); dt.Rows.Add(new[] { "Wallace" }); dt.Rows.Add(new[] { "Frank" }); //upload the DataTable from memory into the database var discoveredTable = GetCleanedServer(DatabaseType.MicrosoftSQLServer).CreateTable("ForbiddenNames", dt); try { TableInfo tableInfo; //import the persistent TableInfo reference var importer = Import(discoveredTable, out tableInfo, out _); //Create the test dataset chunks that will be anonymised var dtStories1 = new DataTable(); dtStories1.Columns.Add("Story"); dtStories1.Rows.Add(new[] { "Thomas went to school regularly" }); //1st redact dtStories1.Rows.Add(new[] { "It seems like Wallace went less regularly" }); //2nd redact dtStories1.Rows.Add(new[] { "Mr Smitty was the teacher" }); var dtStories2 = new DataTable(); dtStories2.Columns.Add("Story"); dtStories2.Rows.Add(new[] { "Things were going so well" }); dtStories2.Rows.Add(new[] { "And then it all turned bad for Wallace" }); //3rd redact var dtStories3 = new DataTable(); dtStories3.Columns.Add("Story"); dtStories3.Rows.Add(new[] { "There were things creeping in the dark" }); dtStories3.Rows.Add(new[] { "Surely Frank would know what to do. Frank was a genius" }); //4th redact dtStories3.Rows.Add(new[] { "Mr Smitty was the teacher" }); //Create the anonymiser var a = new BasicDataTableAnonymiser5(); //Tell it about the database table a.NamesTable = tableInfo; //Create a listener according to the test case IDataLoadEventListener listener = null; switch (testCase) { case LoggerTestCase.ToConsole: listener = new ThrowImmediatelyDataLoadEventListener(); break; case LoggerTestCase.ToMemory: listener = new ToMemoryDataLoadEventListener(true); break; case LoggerTestCase.ToDatabase: //get the default logging server var logManager = CatalogueRepository.GetDefaultLogManager(); //create a new super task Anonymising Data Tables logManager.CreateNewLoggingTaskIfNotExists("Anonymising Data Tables"); //setup a listener that goes to this logging database listener = new ToLoggingDatabaseDataLoadEventListener(this, logManager, "Anonymising Data Tables", "Run on " + DateTime.Now); break; default: throw new ArgumentOutOfRangeException("testCase"); } //run the anonymisation //process all 3 batches a.ProcessPipelineData(dtStories1, listener, new GracefulCancellationToken()); a.ProcessPipelineData(dtStories2, listener, new GracefulCancellationToken()); a.ProcessPipelineData(dtStories3, listener, new GracefulCancellationToken()); //check the results switch (testCase) { case LoggerTestCase.ToMemory: Assert.AreEqual(4, ((ToMemoryDataLoadEventListener)listener).LastProgressRecieivedByTaskName["REDACTING Names"].Progress.Value); break; case LoggerTestCase.ToDatabase: ((ToLoggingDatabaseDataLoadEventListener)listener).FinalizeTableLoadInfos(); break; } } finally { //finally drop the database table discoveredTable.Drop(); } }
public void ValidateBulkTestData(bool testCancellingValiationEarly) { int numberOfRecordsToGenerate = 10000; DateTime startTime = DateTime.Now; BulkTestsData testData = new BulkTestsData(CatalogueRepository, DiscoveredDatabaseICanCreateRandomTablesIn, numberOfRecordsToGenerate); testData.SetupTestData(); testData.ImportAsCatalogue(); DQERepository dqeRepository = new DQERepository(CatalogueRepository); //the shouldn't be any lingering results in the database Assert.IsNull(dqeRepository.GetMostRecentEvaluationFor(_catalogue)); //set some validation rules testData.catalogue.ValidatorXML = bulkTestDataValidation; //set the time periodicity field var toBeTimePeriodicityCol = testData.catalogue.GetAllExtractionInformation(ExtractionCategory.Any).Single(e => e.GetRuntimeName().Equals("dtCreated")); testData.catalogue.TimeCoverage_ExtractionInformation_ID = toBeTimePeriodicityCol.ID; //do the validation CatalogueConstraintReport report = new CatalogueConstraintReport(testData.catalogue, SpecialFieldNames.DataLoadRunID); report.Check(new ThrowImmediatelyCheckNotifier()); CancellationTokenSource source = new CancellationTokenSource(); if (testCancellingValiationEarly) { source.Cancel(); } ToMemoryDataLoadEventListener listener = new ToMemoryDataLoadEventListener(false); report.GenerateReport(testData.catalogue, listener, source.Token); if (testCancellingValiationEarly) { Assert.IsTrue(listener.EventsReceivedBySender[report].Count(m => m.Exception is OperationCanceledException) == 1); testData.Destroy(); testData.DeleteCatalogue(); return; } Assert.IsTrue(listener.EventsReceivedBySender[report].All(m => m.Exception == null));//all messages must have null exceptions //get the reuslts now var results = dqeRepository.GetMostRecentEvaluationFor(testData.catalogue); Assert.IsNotNull(results); //the sum of all consquences across all data load run ids should be the record count Assert.AreEqual(10000, results.RowStates.Sum(r => r.Missing + r.Invalid + r.Wrong + r.Correct)); //there should be at least 5 data load run ids (should be around 12 actually - see BulkTestData but theoretically everyone could magically - all 10,000 into 5 decades - or even less but those statistics must be astronomical) Assert.GreaterOrEqual(results.RowStates.Count(), 5); //there should be lots of column results too Assert.GreaterOrEqual(results.ColumnStates.Count(), 5); //Did it log? LogManager logManager = new LogManager(CatalogueRepository.GetServerDefaults().GetDefaultFor(PermissableDefaults.LiveLoggingServer_ID)); var log = logManager.GetArchivalDataLoadInfos("DQE").FirstOrDefault(); Assert.IsNotNull(log); Assert.GreaterOrEqual(log.StartTime, startTime); Assert.AreEqual(0, log.Errors.Count); Assert.AreEqual(numberOfRecordsToGenerate, log.TableLoadInfos.Single().Inserts); testData.Destroy(); testData.DeleteCatalogue(); }