Beispiel #1
0
        public int Run(IRDMPPlatformRepositoryServiceLocator repositoryLocator, IDataLoadEventListener listener, ICheckNotifier checkNotifier, GracefulCancellationToken token)
        {
            // if we have no listener use a throw immediately one (generate exceptions if it went badly)
            if (listener == null)
            {
                listener = new ThrowImmediatelyDataLoadEventListener();
            }

            // whatever happens we want a listener to record the worst result for the return code (even if theres ignore all errors listeners being used)
            var toMemory = new ToMemoryDataLoadEventListener(false);

            // User might have some additional listeners registered
            listener = new ForkDataLoadEventListener(AdditionalListeners.Union(new [] { toMemory, listener }).ToArray());

            // build the engine and run it
            var engine = UseCase.GetEngine(Pipeline, listener);

            engine.ExecutePipeline(token ?? new GracefulCancellationToken());

            // return code of -1 if it went badly otherwise 0
            var exitCode = toMemory.GetWorst() >= ProgressEventType.Error ? -1:0;

            if (exitCode == 0)
            {
                PipelineExecutionFinishedsuccessfully?.Invoke(this, new PipelineEngineEventArgs(engine));
            }

            return(exitCode);
        }
        public void AssembleDataTableFromFileArchive()
        {
            var zip = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData.zip");
            var dir = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData");

            if (File.Exists(zip))
            {
                File.Delete(zip);
            }

            ZipFile.CreateFromDirectory(dir, zip);

            var fileCount = Directory.GetFiles(dir, "*.dcm").Count();

            var source = new DicomFileCollectionSource();

            source.FilenameField = "RelativeFileArchiveURI";
            source.PreInitialize(new FlatFileToLoadDicomFileWorklist(new FlatFileToLoad(new FileInfo(zip))), new ThrowImmediatelyDataLoadEventListener());
            var toMemory = new ToMemoryDataLoadEventListener(true);
            var result   = source.GetChunk(toMemory, new GracefulCancellationToken());

            //processed every file once
            Assert.AreEqual(fileCount, toMemory.LastProgressRecieivedByTaskName.Single().Value.Progress.Value);

            Assert.Greater(result.Columns.Count, 0);
        }
        public void Test_IgnoreQuotes(bool ignoreQuotes)
        {
            var f = Path.Combine(TestContext.CurrentContext.WorkDirectory, "talk.csv");

            File.WriteAllText(f, @"Field1,Field2
1,Watch out guys its Billie ""The Killer"" Cole
2,""The Killer""? I've heard of him hes a bad un");

            DelimitedFlatFileDataFlowSource source = new DelimitedFlatFileDataFlowSource();

            source.PreInitialize(new FlatFileToLoad(new FileInfo(f)), new ThrowImmediatelyDataLoadEventListener());
            source.Separator    = ",";
            source.MaxBatchSize = DelimitedFlatFileDataFlowSource.MinimumStronglyTypeInputBatchSize;
            source.StronglyTypeInputBatchSize = DelimitedFlatFileDataFlowSource.MinimumStronglyTypeInputBatchSize;
            source.StronglyTypeInput          = true;
            source.IgnoreQuotes = ignoreQuotes;

            if (!ignoreQuotes)
            {
                var toMem = new ToMemoryDataLoadEventListener(true);
                var ex    = Assert.Throws <ParserException>(() => source.GetChunk(toMem, new GracefulCancellationToken()));
                Assert.AreEqual(2, ex.ReadingContext.RawRow);
                source.Dispose(new ThrowImmediatelyDataLoadEventListener(), null);
            }
            else
            {
                var dt = source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken());
                Assert.AreEqual(2, dt.Rows.Count);
                Assert.AreEqual(@"Watch out guys its Billie ""The Killer"" Cole", dt.Rows[0]["Field2"]);
                Assert.AreEqual(@"""The Killer""? I've heard of him hes a bad un", dt.Rows[1]["Field2"]);
                source.Dispose(new ThrowImmediatelyDataLoadEventListener(), null);
            }
        }
Beispiel #4
0
        public void AssembleDataTableFromFolder()
        {
            var file1 = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData/FileWithLotsOfTags.dcm"));
            var file2 = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData/IM-0001-0013.dcm"));

            var controlFile = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, "list.txt"));

            File.WriteAllText(controlFile.FullName, file1.FullName + Environment.NewLine + file2.FullName);

            var source = new DicomFileCollectionSource {
                FilenameField = "RelativeFileArchiveURI"
            };

            source.PreInitialize(new FlatFileToLoadDicomFileWorklist(new FlatFileToLoad(controlFile)), new ThrowImmediatelyDataLoadEventListener());

            var toMemory = new ToMemoryDataLoadEventListener(true);
            var result   = source.GetChunk(toMemory, new GracefulCancellationToken());

            Assert.AreEqual(1, result.Rows.Count);

            result = source.GetChunk(toMemory, new GracefulCancellationToken());
            Assert.AreEqual(1, result.Rows.Count);

            Assert.AreEqual(null, source.GetChunk(toMemory, new GracefulCancellationToken()));
        }
        public void TestRemovingDuplicatesFromDataTable()
        {
            DataTable dt = new DataTable();

            dt.Columns.Add("Col1");
            dt.Columns.Add("Col2", typeof(int));

            dt.Rows.Add("Fish", 123);
            dt.Rows.Add("Fish", 123);
            dt.Rows.Add("Fish", 123);

            Assert.AreEqual(3, dt.Rows.Count);


            Assert.AreEqual(123, dt.Rows[0]["Col2"]);

            var receiver = new ToMemoryDataLoadEventListener(true);

            var result = new RemoveDuplicates().ProcessPipelineData(dt, receiver, new GracefulCancellationToken());

            //should have told us that it processed 3 rows
            Assert.AreEqual(3, receiver.LastProgressRecieivedByTaskName["Evaluating For Duplicates"].Progress.Value);

            //and discarded 2 of them as duplicates
            Assert.AreEqual(2, receiver.LastProgressRecieivedByTaskName["Discarding Duplicates"].Progress.Value);

            Assert.AreEqual(1, result.Rows.Count);
            Assert.AreEqual("Fish", result.Rows[0]["Col1"]);
            Assert.AreEqual(123, result.Rows[0]["Col2"]);
        }
Beispiel #6
0
        public void TestOddFormats()
        {
            var listener = new ToMemoryDataLoadEventListener(true);

            ExcelDataFlowSource source = new ExcelDataFlowSource();

            source.WorkSheetName = "MySheet";

            source.PreInitialize(new FlatFileToLoad(_fileLocations[OddFormatsFile]), listener);
            DataTable dt = source.GetChunk(listener, new GracefulCancellationToken());

            Assert.AreEqual(2, dt.Rows.Count);
            Assert.AreEqual(5, dt.Columns.Count);

            Assert.AreEqual("Name", dt.Columns[0].ColumnName);
            Assert.AreEqual("Category", dt.Columns[1].ColumnName);
            Assert.AreEqual("Age", dt.Columns[2].ColumnName);
            Assert.AreEqual("Wage", dt.Columns[3].ColumnName);
            Assert.AreEqual("Invisibre", dt.Columns[4].ColumnName); //this column is hidden in the spreadsheet but we still load it

            Assert.AreEqual("Frank", dt.Rows[0][0]);
            Assert.AreEqual("Upper, Left", dt.Rows[0][1]);
            Assert.AreEqual("30", dt.Rows[0][2]);
            Assert.AreEqual("£11.00", dt.Rows[0][3]);
            Assert.AreEqual("0.1", dt.Rows[0][4]);

            Assert.AreEqual("Castello", dt.Rows[1][0]);
            Assert.AreEqual("Lower, Back", dt.Rows[1][1]);
            Assert.AreEqual("31", dt.Rows[1][2]);
            Assert.AreEqual("50.00%", dt.Rows[1][3]);
            Assert.AreEqual("0.2", dt.Rows[1][4]);
        }
Beispiel #7
0
        public void TestOddFormats()
        {
            var listener = new ToMemoryDataLoadEventListener(true);

            ExcelDataFlowSource source = new ExcelDataFlowSource();

            source.WorkSheetName = "MySheet";

            source.PreInitialize(new FlatFileToLoad(_fileLocations[OddFormatsFile]), listener);
            DataTable dt = source.GetChunk(listener, new GracefulCancellationToken());

            Assert.AreEqual(2, dt.Rows.Count);
            Assert.AreEqual(5, dt.Columns.Count);

            Assert.AreEqual("Name", dt.Columns[0].ColumnName);
            Assert.AreEqual("Category", dt.Columns[1].ColumnName);
            Assert.AreEqual("Age", dt.Columns[2].ColumnName);
            Assert.AreEqual("Wage", dt.Columns[3].ColumnName);
            Assert.AreEqual("Invisibre", dt.Columns[4].ColumnName); //this column is hidden in the spreadsheet but we still load it

            Assert.AreEqual("Frank", dt.Rows[0][0]);
            Assert.AreEqual("Upper, Left", dt.Rows[0][1]);
            Assert.AreEqual("30", dt.Rows[0][2]);

            //its a pound symbol alright! but since there is 2 encodings for pound symbol lets just make everyones life easier
            StringAssert.IsMatch(@"^\W11.00$", dt.Rows[0][3].ToString());

            Assert.AreEqual("0.1", dt.Rows[0][4]);

            Assert.AreEqual("Castello", dt.Rows[1][0]);
            Assert.AreEqual("Lower, Back", dt.Rows[1][1]);
            Assert.AreEqual("31", dt.Rows[1][2]);
            Assert.AreEqual("50.00%", dt.Rows[1][3]);
            Assert.AreEqual("0.2", dt.Rows[1][4]);
        }
Beispiel #8
0
        public void NormalBook_NoEmptyRowsRead()
        {
            ExcelDataFlowSource source = new ExcelDataFlowSource();

            var listener = new ToMemoryDataLoadEventListener(true);

            source.PreInitialize(new FlatFileToLoad(_fileLocations[TestFile]), listener);
            DataTable dt = source.GetChunk(listener, new GracefulCancellationToken());

            Assert.AreEqual(5, dt.Rows.Count);
        }
Beispiel #9
0
        public void FreakyTestFile_WarningsCorrect()
        {
            var messages = new ToMemoryDataLoadEventListener(true);

            ExcelDataFlowSource source = new ExcelDataFlowSource();

            source.PreInitialize(new FlatFileToLoad(_fileLocations[FreakyTestFile]), new ThrowImmediatelyDataLoadEventListener());
            DataTable dt = source.GetChunk(messages, new GracefulCancellationToken());

            var args = messages.EventsReceivedBySender[source];

            Console.Write(messages.ToString());

            Assert.IsTrue(args.Any(a => a.Message.Contains("Discarded the following data (that was found in unamed columns):RowCount:5") && a.ProgressEventType == ProgressEventType.Warning));
        }
Beispiel #10
0
        public void SourceRead_InvalidFloat_ToTable(InvalidDataHandling dataHandlingStrategy)
        {
            var source = new DicomDatasetCollectionSource();

            source.InvalidDataHandlingStrategy = dataHandlingStrategy;

            var ds = new DicomDataset();

            ds.Add(DicomTag.PatientAge, "123Y");
            ds.Add(DicomTag.WedgeAngleFloat, "3.40282347e+038");

            var worklist = new ExplicitListDicomDatasetWorklist(new[] { ds }, "fish.dcm", new Dictionary <string, string> {
                { "MessageGuid", "123x321" }
            });

            source.PreInitialize(worklist, new ThrowImmediatelyDataLoadEventListener());
            source.FilenameField = "RelFileName";

            DataTable dt = null;

            switch (dataHandlingStrategy)
            {
            case InvalidDataHandling.ThrowException:
                Assert.Throws <OverflowException>(() => source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken()));
                return;

            case InvalidDataHandling.ConvertToNullAndWarn:
                var tomem = new ToMemoryDataLoadEventListener(true);
                dt = source.GetChunk(tomem, new GracefulCancellationToken());

                Assert.AreEqual(DBNull.Value, dt.Rows[0]["WedgeAngleFloat"]);

                //should be a warning about WedgeAngleFloat logged
                var warning = tomem.EventsReceivedBySender.SelectMany(e => e.Value).Single(v => v.ProgressEventType == ProgressEventType.Warning);
                Assert.IsTrue(warning.Message.Contains("WedgeAngleFloat"));
                Assert.IsTrue(warning.Message.Contains("MessageGuid"));
                Assert.IsTrue(warning.Message.Contains("123x321"));
                Assert.IsTrue(warning.Message.Contains("fish.dcm"));

                break;

            default:
                throw new ArgumentOutOfRangeException("dataHandlingStrategy");
            }

            Assert.AreEqual("123Y", dt.Rows[0]["PatientAge"]);
            Assert.AreEqual("fish.dcm", dt.Rows[0]["RelFileName"]);
        }
        public void TestWithEcho()
        {
            var source = new ProcessBasedCacheSource();

            if (IsLinux)
            {
                source.Command = "/bin/echo";
                source.Args    = "Hey Thomas go get %s and store in %d";
            }
            else
            {
                source.Command = "cmd.exe";
                source.Args    = "/c echo Hey Thomas go get %s and store in %d";
            }
            source.TimeFormat             = "dd/MM/yy";
            source.ThrowOnNonZeroExitCode = true;

            // What dates to load
            var cp = WhenIHaveA <CacheProgress>();

            cp.CacheFillProgress = new DateTime(2001, 12, 24);
            cp.SaveToDatabase();

            // Where to put files
            var lmd = cp.LoadProgress.LoadMetadata;

            var dir     = new DirectoryInfo(TestContext.CurrentContext.WorkDirectory);
            var loadDir = LoadDirectory.CreateDirectoryStructure(dir, "blah", true);

            lmd.LocationOfFlatFiles = loadDir.RootPath.FullName;
            lmd.SaveToDatabase();

            source.PreInitialize(new CacheFetchRequestProvider(cp), new ThrowImmediatelyDataLoadEventListener());
            source.PreInitialize(cp.CatalogueRepository, new ThrowImmediatelyDataLoadEventListener());
            source.PreInitialize(new PermissionWindow(cp.CatalogueRepository), new ThrowImmediatelyDataLoadEventListener());

            var toMem = new ToMemoryDataLoadEventListener(true);
            var fork  = new ForkDataLoadEventListener(toMem, new ThrowImmediatelyDataLoadEventListener()
            {
                WriteToConsole = true
            });

            source.GetChunk(fork, new GracefulCancellationToken());

            Assert.Contains($"Hey Thomas go get 24/12/01 and store in {Path.Combine(loadDir.Cache.FullName,"ALL")}", toMem.GetAllMessagesByProgressEventType()[ProgressEventType.Information].Select(v => v.Message).ToArray());
        }
Beispiel #12
0
        public void TestColumnSwapper_MappingTableNulls()
        {
            using var dt = new DataTable();
            dt.Columns.Add("In");
            dt.Columns.Add("Out");

            dt.Rows.Add(1, 1);
            dt.Rows.Add(DBNull.Value, 3); // this value should be ignored
            dt.Rows.Add(2, 2);

            var db = GetCleanedServer(DatabaseType.MicrosoftSQLServer);

            Import(db.CreateTable("Map", dt), out var map, out var mapCols);

            var swapper = new ColumnSwapper();
            swapper.MappingFromColumn = mapCols.Single(c => c.GetRuntimeName().Equals("In"));
            swapper.MappingToColumn = mapCols.Single(c => c.GetRuntimeName().Equals("Out"));

            swapper.Check(new ThrowImmediatelyCheckNotifier());

            using var dtToSwap = new DataTable();

            dtToSwap.Columns.Add("In",typeof(int));
            dtToSwap.Columns.Add("Name");
            dtToSwap.Columns.Add("Age");

            dtToSwap.Rows.Add(1, "Dave", 30);
            dtToSwap.Rows.Add(null, "Bob", 30);

            var toMem = new ToMemoryDataLoadEventListener(true);

            var resultDt = swapper.ProcessPipelineData(dtToSwap,toMem , new GracefulCancellationToken());

            //this is the primary thing we are testing here
            Assert.Contains("Discarded 1 Null key values read from mapping table",toMem.GetAllMessagesByProgressEventType()[ProgressEventType.Warning].Select(m=>m.Message).ToArray());

            Assert.AreEqual(2, resultDt.Rows.Count);
            AreBasicallyEquals(1, resultDt.Rows[0]["Out"]);
            Assert.AreEqual("Dave", resultDt.Rows[0]["Name"]);

            AreBasicallyEquals(DBNull.Value, resultDt.Rows[1]["Out"]);
            Assert.AreEqual("Bob", resultDt.Rows[1]["Name"]);
        }
        public void ExtractNormally()
        {
            AdjustPipelineComponentDelegate = (p) =>
            {
                if (p.Class.Contains("ExecuteDatasetExtractionSource"))
                {
                    var hashJoinsArg = p.PipelineComponentArguments.Single(a => a.Name.Equals("UseHashJoins"));
                    hashJoinsArg.SetValue(true);
                    hashJoinsArg.SaveToDatabase();
                }
            };

            ExtractionPipelineUseCase            execute;
            IExecuteDatasetExtractionDestination result;

            _catalogue.Name = "TestTable";
            _catalogue.SaveToDatabase();
            _request.DatasetBundle.DataSet.RevertToDatabaseState();

            Assert.AreEqual(1, _request.ColumnsToExtract.Count(c => c.IsExtractionIdentifier));
            var listener = new ToMemoryDataLoadEventListener(true);

            base.Execute(out execute, out result, listener);

            var messages =
                listener.EventsReceivedBySender.SelectMany(m => m.Value)
                .Where(m => m.ProgressEventType == ProgressEventType.Information && m.Message.Contains("/*Decided on extraction SQL:*/"))
                .ToArray();

            Assert.AreEqual(1, messages.Length, "Expected a message about what the final extraction SQL was");
            Assert.IsTrue(messages[0].Message.Contains(" HASH JOIN "), "expected use of hash matching was not reported by ExecuteDatasetExtractionSource in the SQL actually executed");

            var r = (ExecuteDatasetExtractionFlatFileDestination)result;

            //this should be what is in the file, the private identifier and the 1 that was put into the table in the first place (see parent class for the test data setup)
            Assert.AreEqual(@"ReleaseID,Name,DateOfBirth
" + _cohortKeysGenerated[_cohortKeysGenerated.Keys.First()] + @",Dave,2001-01-01", File.ReadAllText(r.OutputFile).Trim());

            Assert.AreEqual(1, _request.QueryBuilder.SelectColumns.Count(c => c.IColumn is ReleaseIdentifierSubstitution));
            File.Delete(r.OutputFile);
        }
Beispiel #14
0
        public void TestSetNull_OneCell()
        {
            var operation = new SetNull();

            operation.ColumnNameToFind          = "b";
            operation.NullCellsWhereValuesMatch = new Regex("^cat$");

            using (var dt = new DataTable())
            {
                dt.Columns.Add("a");
                dt.Columns.Add("b");

                dt.Rows.Add("cat", "cat");
                dt.Rows.Add("dog", "dog");
                dt.Rows.Add("cat", "dog");

                var listener = new ToMemoryDataLoadEventListener(true);

                var result = operation.ProcessPipelineData(dt, listener, new GracefulCancellationToken());

                Assert.AreEqual(3, result.Rows.Count);

                Assert.AreEqual("cat", result.Rows[0]["a"]);
                Assert.AreEqual(DBNull.Value, result.Rows[0]["b"]);

                Assert.AreEqual("dog", result.Rows[1]["a"]);
                Assert.AreEqual("dog", result.Rows[1]["b"]);

                Assert.AreEqual("cat", result.Rows[2]["a"]);
                Assert.AreEqual("dog", result.Rows[2]["b"]);

                operation.Dispose(listener, null);

                var msg = listener.EventsReceivedBySender[operation].Single();

                Assert.AreEqual(ProgressEventType.Warning, msg.ProgressEventType);
                Assert.AreEqual("Total SetNull operations for ColumnNameToFind 'b' was 1", msg.Message);
            }
        }
Beispiel #15
0
        public void PatientFileMissingOne()
        {
            _extractor.PerPatient  = true;
            _extractor.Directories = false;
            _extractor.Pattern     = "$p.txt";
            _extractor.Check(new ThrowImmediatelyCheckNotifier());

            FileAssert.DoesNotExist(Path.Combine(_outDir.FullName, "blah.txt"));
            FileAssert.DoesNotExist(Path.Combine(_outDir.FullName, "blah2.txt"));

            var mem = new ToMemoryDataLoadEventListener(true);

            _extractor.MovePatient("Pat1", "Rel1", _outDir, mem, new GracefulCancellationToken());
            _extractor.MovePatient("Pat2", "Rel2", _outDir, mem, new GracefulCancellationToken());

            FileAssert.DoesNotExist(Path.Combine(_outDir.FullName, "blah.txt"));
            FileAssert.DoesNotExist(Path.Combine(_outDir.FullName, "blah2.txt"));
            FileAssert.Exists(Path.Combine(_outDir.FullName, "Rel1.txt"));

            Assert.AreEqual(ProgressEventType.Warning, mem.GetWorst());

            StringAssert.StartsWith("No Files were found matching Pattern Pat2.txt in ", mem.GetAllMessagesByProgressEventType()[ProgressEventType.Warning].Single().Message);
        }
Beispiel #16
0
        public void ExcelDateTimeDeciphering(string versionOfTestFile)
        {
            /*
             * 01/01/2001	0.1	01/01/2001
             * 01/01/2001 10:30	0.51	01/01/2001 10:30
             * 01/01/2002 11:30	0.22	0.1
             * 01/01/2003 01:30	0.10	0.51
             */
            var listener = new ToMemoryDataLoadEventListener(true);

            ExcelDataFlowSource source = new ExcelDataFlowSource();

            source.PreInitialize(new FlatFileToLoad(_fileLocations[versionOfTestFile]), listener);
            DataTable dt = source.GetChunk(listener, new GracefulCancellationToken());

            Assert.AreEqual(5, dt.Rows.Count);

            Assert.AreEqual("2001-01-01", dt.Rows[0][3]);
            Assert.AreEqual("0.1", dt.Rows[0][4]);
            Assert.AreEqual("10:30:00", dt.Rows[0][5]);

            Assert.AreEqual("2001-01-01 10:30:00", dt.Rows[1][3]);
            Assert.AreEqual("0.51", dt.Rows[1][4]);
            Assert.AreEqual("11:30:00", dt.Rows[1][5]);

            Assert.AreEqual("2002-01-01 11:30:00", dt.Rows[2][3]);
            Assert.AreEqual("0.22", dt.Rows[2][4]);
            Assert.AreEqual("0.1", dt.Rows[2][5]);

            Assert.AreEqual("2003-01-01 01:30:00", dt.Rows[3][3]);
            Assert.AreEqual("0.10", dt.Rows[3][4]);
            Assert.AreEqual("0.51", dt.Rows[3][5]);

            Assert.AreEqual("2015-09-18", dt.Rows[4][3]);
            Assert.AreEqual("15:09:00", dt.Rows[4][4]);
            Assert.AreEqual("00:03:56", dt.Rows[4][5]);
        }
Beispiel #17
0
        public void RefreshCohort_WithCaching()
        {
            ExtractionPipelineUseCase            useCase;
            IExecuteDatasetExtractionDestination results;

            var pipe = new Pipeline(CatalogueRepository, "RefreshPipeWithCaching");

            var source    = new PipelineComponent(CatalogueRepository, pipe, typeof(CohortIdentificationConfigurationSource), 0);
            var args      = source.CreateArgumentsForClassIfNotExists <CohortIdentificationConfigurationSource>();
            var freezeArg = args.Single(a => a.Name.Equals("FreezeAfterSuccessfulImport"));

            freezeArg.SetValue(false);
            freezeArg.SaveToDatabase();

            var dest         = new PipelineComponent(CatalogueRepository, pipe, typeof(BasicCohortDestination), 0);
            var argsDest     = dest.CreateArgumentsForClassIfNotExists <BasicCohortDestination>();
            var allocatorArg = argsDest.Single(a => a.Name.Equals("ReleaseIdentifierAllocator"));

            allocatorArg.SetValue(null);
            allocatorArg.SaveToDatabase();

            pipe.SourcePipelineComponent_ID      = source.ID;
            pipe.DestinationPipelineComponent_ID = dest.ID;
            pipe.SaveToDatabase();

            Execute(out useCase, out results);

            var oldcohort = _configuration.Cohort;

            //Create a query cache
            var p = new QueryCachingPatcher();
            ExternalDatabaseServer queryCacheServer = new ExternalDatabaseServer(CatalogueRepository, "TestCohortRefreshing_CacheTest", p);

            DiscoveredDatabase cachedb = DiscoveredServerICanCreateRandomDatabasesAndTablesOn.ExpectDatabase("TestCohortRefreshing_CacheTest");

            if (cachedb.Exists())
            {
                cachedb.Drop();
            }

            new MasterDatabaseScriptExecutor(cachedb).CreateAndPatchDatabase(p, new ThrowImmediatelyCheckNotifier());
            queryCacheServer.SetProperties(cachedb);

            //Create a Cohort Identification configuration (query) that will identify the cohort
            CohortIdentificationConfiguration cic = new CohortIdentificationConfiguration(RepositoryLocator.CatalogueRepository, "RefreshCohort.cs");;

            try
            {
                //make it use the cache
                cic.QueryCachingServer_ID = queryCacheServer.ID;
                cic.SaveToDatabase();

                //give it a single table query to fetch distinct chi from test data
                var agg = cic.CreateNewEmptyConfigurationForCatalogue(_catalogue, null);

                //add the sub query as the only entry in the cic (in the root container)
                cic.CreateRootContainerIfNotExists();
                cic.RootCohortAggregateContainer.AddChild(agg, 1);

                //make the ExtractionConfiguration refresh cohort query be the cic
                _configuration.CohortIdentificationConfiguration_ID = cic.ID;
                _configuration.CohortRefreshPipeline_ID             = pipe.ID;
                _configuration.SaveToDatabase();

                //get a refreshing engine
                var engine = new CohortRefreshEngine(new ThrowImmediatelyDataLoadEventListener(), _configuration);
                engine.Execute();

                Assert.NotNull(engine.Request.NewCohortDefinition);

                var oldData = oldcohort.GetExternalData();

                Assert.AreEqual(oldData.ExternalDescription, engine.Request.NewCohortDefinition.Description);
                Assert.AreEqual(oldData.ExternalVersion + 1, engine.Request.NewCohortDefinition.Version);

                Assert.AreNotEqual(oldcohort.CountDistinct, engine.Request.CohortCreatedIfAny.CountDistinct);

                //now nuke all data in the catalogue so the cic returns nobody (except that the identifiers are cached eh?)
                DataAccessPortal.GetInstance().ExpectDatabase(_tableInfo, DataAccessContext.InternalDataProcessing).ExpectTable(_tableInfo.GetRuntimeName()).Truncate();

                var toMem = new ToMemoryDataLoadEventListener(false);

                //get a new engine
                engine = new CohortRefreshEngine(toMem, _configuration);

                //execute it
                var ex = Assert.Throws <Exception>(() => engine.Execute());

                Assert.IsTrue(ex.InnerException.InnerException.Message.Contains("CohortIdentificationCriteria execution resulted in an empty dataset"));

                //expected this message to happen
                //that it did clear the cache
                Assert.AreEqual(1, toMem.EventsReceivedBySender.SelectMany(kvp => kvp.Value).Count(msg => msg.Message.Equals("Clearing Cohort Identifier Cache")));
            }
            finally
            {
                //make the ExtractionConfiguration not use the cic query
                _configuration.CohortRefreshPipeline_ID             = null;
                _configuration.CohortIdentificationConfiguration_ID = null;
                _configuration.SaveToDatabase();

                //delete the cic query
                cic.QueryCachingServer_ID = null;
                cic.SaveToDatabase();
                cic.DeleteInDatabase();

                //delete the caching database
                queryCacheServer.DeleteInDatabase();
                cachedb.Drop();
            }
        }
Beispiel #18
0
        public void SourceRead_InvalidFloatInSequence_WithElevation_ToTable(InvalidDataHandling dataHandlingStrategy)
        {
            //create the elevation configuration
            var elevationRules = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, "ElevationConfig.xml"));

            File.WriteAllText(elevationRules.FullName,
                              @"<!DOCTYPE TagElevationRequestCollection
[
  <!ELEMENT TagElevationRequestCollection (TagElevationRequest*)>
  <!ELEMENT TagElevationRequest (ColumnName,ElevationPathway,Conditional?)>
  <!ELEMENT ColumnName (#PCDATA)>
  <!ELEMENT ElevationPathway (#PCDATA)>
  <!ELEMENT Conditional (ConditionalPathway,ConditionalRegex)>
  <!ELEMENT ConditionalPathway (#PCDATA)>
  <!ELEMENT ConditionalRegex (#PCDATA)>
]>
<TagElevationRequestCollection>
  <TagElevationRequest>
    <ColumnName>WedgeAngleFloat</ColumnName>
    <ElevationPathway>AcquisitionContextSequence->WedgeAngleFloat</ElevationPathway>
  </TagElevationRequest>
</TagElevationRequestCollection>");

            //setup the source reader
            var source = new DicomDatasetCollectionSource();

            source.InvalidDataHandlingStrategy   = dataHandlingStrategy;
            source.TagElevationConfigurationFile = elevationRules;

            //don't load the sequence, just the elevation
            source.TagBlacklist = new Regex("AcquisitionContextSequence");

            //The dataset we are trying to load
            var ds = new DicomDataset();

            ds.Add(DicomTag.PatientAge, "123Y");

            var sequence = new DicomSequence(DicomTag.AcquisitionContextSequence,
                                             new DicomDataset()
            {
                { DicomTag.WedgeAngleFloat, "3.40282347e+038" }   //dodgy float in sequence (the sequence we are trying to elevate)
            });

            ds.Add(sequence);

            var worklist = new ExplicitListDicomDatasetWorklist(new[] { ds }, "fish.dcm", new Dictionary <string, string> {
                { "MessageGuid", "123x321" }
            });

            source.PreInitialize(worklist, new ThrowImmediatelyDataLoadEventListener());
            source.FilenameField = "RelFileName";

            DataTable dt = null;

            switch (dataHandlingStrategy)
            {
            case InvalidDataHandling.ThrowException:
                Assert.Throws <OverflowException>(() => source.GetChunk(new ThrowImmediatelyDataLoadEventListener(), new GracefulCancellationToken()));
                return;

            case InvalidDataHandling.ConvertToNullAndWarn:
                var tomem = new ToMemoryDataLoadEventListener(true);
                dt = source.GetChunk(tomem, new GracefulCancellationToken());
                Assert.AreEqual(DBNull.Value, dt.Rows[0]["WedgeAngleFloat"]);

                //should be a warning about WedgeAngleFloat logged
                var warning = tomem.EventsReceivedBySender.SelectMany(e => e.Value).Single(v => v.ProgressEventType == ProgressEventType.Warning);
                Assert.IsTrue(warning.Message.Contains("WedgeAngleFloat"));
                Assert.IsTrue(warning.Message.Contains("MessageGuid"));
                Assert.IsTrue(warning.Message.Contains("123x321"));
                Assert.IsTrue(warning.Message.Contains("fish.dcm"));

                break;

            default:
                throw new ArgumentOutOfRangeException("dataHandlingStrategy");
            }

            Assert.AreEqual("123Y", dt.Rows[0]["PatientAge"]);
            Assert.AreEqual("fish.dcm", dt.Rows[0]["RelFileName"]);
        }
Beispiel #19
0
        public void TestBasicDataTableAnonymiser5(LoggerTestCase testCase)
        {
            //Create a names table that will go into the database
            var dt = new DataTable();

            dt.Columns.Add("Name");
            dt.Rows.Add(new[] { "Thomas" });
            dt.Rows.Add(new[] { "Wallace" });
            dt.Rows.Add(new[] { "Frank" });

            //upload the DataTable from memory into the database
            var discoveredTable = GetCleanedServer(DatabaseType.MicrosoftSQLServer).CreateTable("ForbiddenNames", dt);

            try
            {
                TableInfo tableInfo;

                //import the persistent TableInfo reference
                var importer = Import(discoveredTable, out tableInfo, out _);

                //Create the test dataset chunks that will be anonymised
                var dtStories1 = new DataTable();
                dtStories1.Columns.Add("Story");
                dtStories1.Rows.Add(new[] { "Thomas went to school regularly" });           //1st redact
                dtStories1.Rows.Add(new[] { "It seems like Wallace went less regularly" }); //2nd redact
                dtStories1.Rows.Add(new[] { "Mr Smitty was the teacher" });

                var dtStories2 = new DataTable();
                dtStories2.Columns.Add("Story");
                dtStories2.Rows.Add(new[] { "Things were going so well" });
                dtStories2.Rows.Add(new[] { "And then it all turned bad for Wallace" });         //3rd redact

                var dtStories3 = new DataTable();
                dtStories3.Columns.Add("Story");
                dtStories3.Rows.Add(new[] { "There were things creeping in the dark" });
                dtStories3.Rows.Add(new[] { "Surely Frank would know what to do.  Frank was a genius" });         //4th redact
                dtStories3.Rows.Add(new[] { "Mr Smitty was the teacher" });

                //Create the anonymiser
                var a = new BasicDataTableAnonymiser5();

                //Tell it about the database table
                a.NamesTable = tableInfo;

                //Create a listener according to the test case
                IDataLoadEventListener listener = null;

                switch (testCase)
                {
                case LoggerTestCase.ToConsole:
                    listener = new ThrowImmediatelyDataLoadEventListener();
                    break;

                case LoggerTestCase.ToMemory:
                    listener = new ToMemoryDataLoadEventListener(true);
                    break;

                case LoggerTestCase.ToDatabase:

                    //get the default logging server
                    var logManager = CatalogueRepository.GetDefaultLogManager();

                    //create a new super task Anonymising Data Tables
                    logManager.CreateNewLoggingTaskIfNotExists("Anonymising Data Tables");

                    //setup a listener that goes to this logging database
                    listener = new ToLoggingDatabaseDataLoadEventListener(this, logManager, "Anonymising Data Tables", "Run on " + DateTime.Now);
                    break;

                default:
                    throw new ArgumentOutOfRangeException("testCase");
                }

                //run the anonymisation
                //process all 3 batches
                a.ProcessPipelineData(dtStories1, listener, new GracefulCancellationToken());
                a.ProcessPipelineData(dtStories2, listener, new GracefulCancellationToken());
                a.ProcessPipelineData(dtStories3, listener, new GracefulCancellationToken());

                //check the results
                switch (testCase)
                {
                case LoggerTestCase.ToMemory:
                    Assert.AreEqual(4, ((ToMemoryDataLoadEventListener)listener).LastProgressRecieivedByTaskName["REDACTING Names"].Progress.Value);
                    break;

                case LoggerTestCase.ToDatabase:
                    ((ToLoggingDatabaseDataLoadEventListener)listener).FinalizeTableLoadInfos();
                    break;
                }
            }
            finally
            {
                //finally drop the database table
                discoveredTable.Drop();
            }
        }
Beispiel #20
0
        public void ValidateBulkTestData(bool testCancellingValiationEarly)
        {
            int      numberOfRecordsToGenerate = 10000;
            DateTime startTime = DateTime.Now;

            BulkTestsData testData = new BulkTestsData(CatalogueRepository, DiscoveredDatabaseICanCreateRandomTablesIn, numberOfRecordsToGenerate);

            testData.SetupTestData();
            testData.ImportAsCatalogue();

            DQERepository dqeRepository = new DQERepository(CatalogueRepository);

            //the shouldn't be any lingering results in the database
            Assert.IsNull(dqeRepository.GetMostRecentEvaluationFor(_catalogue));

            //set some validation rules
            testData.catalogue.ValidatorXML = bulkTestDataValidation;

            //set the time periodicity field
            var toBeTimePeriodicityCol = testData.catalogue.GetAllExtractionInformation(ExtractionCategory.Any).Single(e => e.GetRuntimeName().Equals("dtCreated"));

            testData.catalogue.TimeCoverage_ExtractionInformation_ID = toBeTimePeriodicityCol.ID;

            //do the validation
            CatalogueConstraintReport report = new CatalogueConstraintReport(testData.catalogue, SpecialFieldNames.DataLoadRunID);

            report.Check(new ThrowImmediatelyCheckNotifier());

            CancellationTokenSource source = new CancellationTokenSource();

            if (testCancellingValiationEarly)
            {
                source.Cancel();
            }

            ToMemoryDataLoadEventListener listener = new ToMemoryDataLoadEventListener(false);

            report.GenerateReport(testData.catalogue, listener, source.Token);

            if (testCancellingValiationEarly)
            {
                Assert.IsTrue(listener.EventsReceivedBySender[report].Count(m => m.Exception is OperationCanceledException) == 1);
                testData.Destroy();
                testData.DeleteCatalogue();
                return;
            }

            Assert.IsTrue(listener.EventsReceivedBySender[report].All(m => m.Exception == null));//all messages must have null exceptions


            //get the reuslts now
            var results = dqeRepository.GetMostRecentEvaluationFor(testData.catalogue);

            Assert.IsNotNull(results);

            //the sum of all consquences across all data load run ids should be the record count
            Assert.AreEqual(10000, results.RowStates.Sum(r => r.Missing + r.Invalid + r.Wrong + r.Correct));

            //there should be at least 5 data load run ids (should be around 12 actually - see BulkTestData but theoretically everyone could magically - all 10,000 into 5 decades - or even less but those statistics must be astronomical)
            Assert.GreaterOrEqual(results.RowStates.Count(), 5);

            //there should be lots of column results too
            Assert.GreaterOrEqual(results.ColumnStates.Count(), 5);

            //Did it log?
            LogManager logManager = new LogManager(CatalogueRepository.GetServerDefaults().GetDefaultFor(PermissableDefaults.LiveLoggingServer_ID));
            var        log        = logManager.GetArchivalDataLoadInfos("DQE").FirstOrDefault();

            Assert.IsNotNull(log);
            Assert.GreaterOrEqual(log.StartTime, startTime);
            Assert.AreEqual(0, log.Errors.Count);
            Assert.AreEqual(numberOfRecordsToGenerate, log.TableLoadInfos.Single().Inserts);

            testData.Destroy();

            testData.DeleteCatalogue();
        }