예제 #1
0
        protected override IDataFlowPipelineContext GenerateContextImpl()
        {
            var context = new DataFlowPipelineContextFactory <DataTable>().Create(PipelineUsage.LoadsSingleFlatFile);

            context.MustHaveDestination = typeof(DataTableUploadDestination);
            return(context);
        }
        protected override IDataFlowPipelineContext GenerateContextImpl()
        {
            var context = new DataFlowPipelineContextFactory <DataTable>().Create(PipelineUsage.FixedDestination);

            context.MustHaveSource = typeof(IDataFlowSource <DataTable>);

            return(context);
        }
        protected override IDataFlowPipelineContext GenerateContextImpl()
        {
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.FixedSource);

            context.MustHaveDestination = typeof(DataTableUploadDestination);

            return(context);
        }
예제 #4
0
파일: SourceTests.cs 프로젝트: 24418863/rdm
        public void TestExtraSuspiciousPipelineRequirements()
        {
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.FixedDestination);

            var suspiciousComponent = new TestObject_ExtraSuspicious();

            Assert.Throws <OverlappingImplementationsException>(() => context.PreInitialize(new ThrowImmediatelyDataLoadJob(), suspiciousComponent, "5"));
        }
예제 #5
0
파일: SourceTests.cs 프로젝트: 24418863/rdm
        public void TestPipelineContextInitializationNoInterfaces()
        {
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.FixedDestination | PipelineUsage.LoadsSingleTableInfo);
            var ti             = new TableInfo(mockRepo, "Foo");
            var component      = new TestObjectNoRequirements();

            Assert.DoesNotThrow(() => context.PreInitialize(new ThrowImmediatelyDataLoadEventListener(), component, ti));
        }
예제 #6
0
        protected override IDataFlowPipelineContext GenerateContextImpl()
        {
            var contextFactory = new DataFlowPipelineContextFactory <ReleaseAudit>();
            var context        = contextFactory.Create(PipelineUsage.FixedSource);

            context.MustHaveDestination = typeof(IDataFlowDestination <ReleaseAudit>);

            return(context);
        }
예제 #7
0
파일: SourceTests.cs 프로젝트: 24418863/rdm
        public void TestSuspiciousPipelineRequirements()
        {
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.FixedDestination);

            var suspiciousComponent = new TestObject_Suspicious();
            var ex = Assert.Throws <MultipleMatchingImplmentationException>(() => context.PreInitialize(new ThrowImmediatelyDataLoadJob(), suspiciousComponent, 5, "fish"));

            Console.WriteLine("Exception was:" + ex.Message);
        }
예제 #8
0
파일: SourceTests.cs 프로젝트: 24418863/rdm
        public void TestPipelineContextInitialization_ForbiddenType()
        {
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.None);

            var component = new TestObject_RequiresTableInfo();
            var ti        = new TableInfo(new MemoryCatalogueRepository(), "Foo");
            var ex        = Assert.Throws <Exception>(() => context.PreInitialize(new ThrowImmediatelyDataLoadEventListener(), component, ti));

            StringAssert.Contains("Type TableInfo is not an allowable PreInitialize parameters type under the current DataFlowPipelineContext (check which flags you passed to the DataFlowPipelineContextFactory and the interfaces IPipelineRequirement<> that your components implement) ", ex.Message);
        }
예제 #9
0
        protected override IDataFlowPipelineContext GenerateContextImpl()
        {
            //create the context using the standard context factory
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.LogsToTableLoadInfo);

            //adjust context: we want a destination requirement of IExecuteDatasetExtractionDestination
            context.MustHaveDestination = typeof(IExecuteDatasetExtractionDestination);//we want this freaky destination type
            context.MustHaveSource      = typeof(ExecuteDatasetExtractionSource);

            return(context);
        }
예제 #10
0
        protected override IDataFlowPipelineContext GenerateContextImpl()
        {
            //create the context using the standard context factory
            var contextFactory = new DataFlowPipelineContextFactory <ICacheChunk>();
            var context        = contextFactory.Create(PipelineUsage.None);

            //adjust context: we want a destination requirement of ICacheFileSystemDestination so that we can load from the cache using the pipeline endpoint as the source of the data load
            context.MustHaveDestination = typeof(ICacheFileSystemDestination);//we want this freaky destination type
            context.MustHaveSource      = typeof(ICacheSource);

            return(context);
        }
예제 #11
0
파일: SourceTests.cs 프로젝트: 24418863/rdm
        public void TestPipelineContextIsAllowable()
        {
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.FixedSource | PipelineUsage.FixedDestination | PipelineUsage.LoadsSingleTableInfo);

            var pipeline  = new Pipeline(CatalogueRepository, "DeleteMePipeline");
            var component = new PipelineComponent(CatalogueRepository, pipeline, typeof(TestObject_RequiresTableInfo), 0);

            Assert.IsTrue(context.IsAllowable(pipeline));

            pipeline.DeleteInDatabase();
        }
예제 #12
0
파일: SourceTests.cs 프로젝트: 24418863/rdm
        public void TestPipelineContextInitialization()
        {
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.FixedDestination | PipelineUsage.LoadsSingleTableInfo);

            var component = new TestObject_RequiresTableInfo();
            var ti        = new TableInfo(CatalogueRepository, "TestTableInfo");

            context.PreInitialize(new ThrowImmediatelyDataLoadEventListener(), component, ti);

            Assert.AreEqual(component.PreInitToThis, ti);
            ti.DeleteInDatabase();
        }
예제 #13
0
파일: SourceTests.cs 프로젝트: 24418863/rdm
        public void TestPipelineContextInitialization_UnexpectedType()
        {
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.FixedDestination | PipelineUsage.LoadsSingleTableInfo);

            var component = new TestObject_RequiresTableInfo();
            var ti        = new TableInfo(mockRepo, "Foo");
            var ci        = new ColumnInfo(mockRepo, "ColumnInfo", "Type", ti);

            ci.Name = "ColumnInfo"; // because we passed a stubbed repository, the name won't be set

            var ex = Assert.Throws <Exception>(() => context.PreInitialize(new ThrowImmediatelyDataLoadEventListener(), component, ci));

            StringAssert.Contains("The following expected types were not passed to PreInitialize:TableInfo", ex.Message);
        }
예제 #14
0
파일: SourceTests.cs 프로젝트: 24418863/rdm
        public void TestPipelineContextInitialization_UninitializedInterface()
        {
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.FixedDestination | PipelineUsage.LoadsSingleTableInfo);

            //component is both IPipelineRequirement<TableInfo> AND IPipelineRequirement<LoadModuleAssembly> but only TableInfo is passed in params
            var component = new TestObject_RequiresTableInfoAndFreakyObject();

            var testTableInfo = new TableInfo(mockRepo, "");

            testTableInfo.Name = "Test Table Info";

            var ex = Assert.Throws <Exception>(() => context.PreInitialize(new ThrowImmediatelyDataLoadEventListener(), component, testTableInfo));

            StringAssert.Contains("The following expected types were not passed to PreInitialize:LoadModuleAssembly\r\nThe object types passed were:\r\nRdmp.Core.Curation.Data.TableInfo:Test Table Info", ex.Message);
        }
예제 #15
0
파일: SourceTests.cs 프로젝트: 24418863/rdm
        public void TestPipelineContextIsNOTAllowable()
        {
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.FixedDestination);

            var pipeline  = new Pipeline(CatalogueRepository, "DeleteMePipeline");
            var component = new PipelineComponent(CatalogueRepository, pipeline, typeof(TestObject_RequiresTableInfo), 0);

            component.Name = "TestPipeComponent";
            component.SaveToDatabase();

            string reason;
            bool   rejection = context.IsAllowable(pipeline, out reason);

            Console.WriteLine(reason);

            Assert.IsFalse(rejection, reason);

            Assert.AreEqual("Component TestPipeComponent implements a forbidden type (IPipelineRequirement<TableInfo>) under the pipeline usage context", reason);

            pipeline.DeleteInDatabase();
        }
예제 #16
0
        public void Test_ZipFileNotation(bool expressRelative)
        {
            //get a clean database to upload to
            var db = GetCleanedServer(DatabaseType.MicrosoftSQLServer);

            //create a folder in which to generate some dicoms
            var dirToLoad = new DirectoryInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, nameof(Test_ZipFileNotation)));

            if (dirToLoad.Exists)
            {
                dirToLoad.Delete(true);
            }

            dirToLoad.Create();

            //generate some random dicoms
            var r = new Random(999);
            DicomDataGenerator generator = new DicomDataGenerator(r, dirToLoad, "CT")
            {
                MaximumImages = 5
            };
            var people = new PersonCollection();

            people.GeneratePeople(1, r);
            generator.GenerateTestDataFile(people, new FileInfo("./inventory.csv"), 1);

            //This generates
            // Test_ZipFile
            //      2015
            //          3
            //              18
            //                  751140 2.25.166922918107154891877498685128076062226.dcm
            //                  751140 2.25.179610809676265137473873365625829826423.dcm
            //                  751140 2.25.201969634959506849065133495434871450465.dcm
            //                  751140 2.25.237492679533001779093365416814254319890.dcm
            //                  751140 2.25.316241631782653383510844072713132248731.dcm

            var yearDir = dirToLoad.GetDirectories().Single();

            StringAssert.IsMatch("\\d{4}", yearDir.Name);

            //should be 5 images in the zip file
            var dicomFiles = yearDir.GetFiles("*.dcm", SearchOption.AllDirectories);

            Assert.AreEqual(5, dicomFiles.Length);

            //e.g. \2015\3\18\2.25.223398837779449245317520567111874824918.dcm
            //e.g. \2015\3\18\2.25.179610809676265137473873365625829826423.dcm
            var relativePathWithinZip1 = dicomFiles[0].FullName.Substring(dirToLoad.FullName.Length);
            var relativePathWithinZip2 = dicomFiles[1].FullName.Substring(dirToLoad.FullName.Length);

            //zip them up
            FileInfo zip = new FileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, nameof(Test_ZipFile) + ".zip")); Path.Combine(TestContext.CurrentContext.TestDirectory, nameof(Test_ZipFile) + ".zip");

            if (zip.Exists)
            {
                zip.Delete();
            }

            ZipFile.CreateFromDirectory(dirToLoad.FullName, zip.FullName);

            //e.g. E:\RdmpDicom\Rdmp.Dicom.Tests\bin\Debug\netcoreapp2.2\Test_ZipFile.zip!\2015\3\18\2.25.223398837779449245317520567111874824918.dcm
            string pathToLoad1 = zip.FullName + "!" + relativePathWithinZip1;
            string pathToLoad2 = zip.FullName + "!" + relativePathWithinZip2;

            var loadMeTextFile = new FileInfo(Path.Combine(dirToLoad.FullName, "LoadMe.txt"));

            //tell the source to load the zip
            File.WriteAllText(loadMeTextFile.FullName, string.Join(Environment.NewLine, pathToLoad1, pathToLoad2));

            var f = new FlatFileToLoad(loadMeTextFile);

            //Setup source
            var source = new DicomFileCollectionSource {
                FilenameField = "RelativeFileArchiveURI"
            };

            if (expressRelative)
            {
                source.ArchiveRoot = TestContext.CurrentContext.TestDirectory;
            }

            var worklist = new FlatFileToLoadDicomFileWorklist(f);

            //Setup destination
            var destination = new DataTableUploadDestination {
                AllowResizingColumnsAtUploadTime = true
            };

            //setup pipeline
            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.FixedDestination | PipelineUsage.FixedDestination);

            //run pipeline
            var pipe = new DataFlowPipelineEngine <DataTable>(context, source, destination, new ThrowImmediatelyDataLoadEventListener());

            pipe.Initialize(db, worklist);
            pipe.ExecutePipeline(new GracefulCancellationToken());

            var finalTable = db.ExpectTable(destination.TargetTableName);

            using (var dt = finalTable.GetDataTable())
            {
                //should be 2 rows (since we told it to only load 2 files out of the zip)
                Assert.AreEqual(2, dt.Rows.Count);

                string pathInDbToDicomFile = (string)dt.Rows[0]["RelativeFileArchiveURI"];

                //We expect either something like:
                // E:/RdmpDicom/Rdmp.Dicom.Tests/bin/Debug/netcoreapp2.2/Test_ZipFile.zip!2015/3/18/2.25.160787663560951826149226183314694084702.dcm
                // ./Test_ZipFile.zip!2015/3/18/2.25.105592977437473375573190160334447272386.dcm

                //the path referenced should be the file read in relative/absolute format
                StringAssert.IsMatch(
                    expressRelative ? $@"./{zip.Name}![\d./]*.dcm":
                    $@"{Regex.Escape(zip.FullName.Replace('\\','/'))}![\d./]*.dcm",
                    pathInDbToDicomFile);

                StringAssert.Contains(yearDir.Name, pathInDbToDicomFile, "Expected zip file to have subdirectories and for them to be loaded correctly");

                //confirm we can read that out again
                using (var pool = new ZipPool())
                {
                    var path = new AmbiguousFilePath(TestContext.CurrentContext.TestDirectory, pathInDbToDicomFile);
                    Assert.IsNotNull(path.GetDataset(pool));
                }
            }

            Assert.IsTrue(finalTable.Exists());
            finalTable.Drop();
        }
예제 #17
0
        public override ExitCodeType Attach(IDataLoadJob job, GracefulCancellationToken cancellationToken)
        {
            if (job == null)
            {
                throw new Exception("Job is Null, we require to know the job to build a DataFlowPipeline");
            }

            string sql;

            var dbFrom = RemoteSource.Discover(DataAccessContext.DataLoad);

            var remoteTables = new HashSet <string>(dbFrom.DiscoverTables(true).Select(t => t.GetRuntimeName()), StringComparer.CurrentCultureIgnoreCase);
            var loadables    = job.RegularTablesToLoad.Union(job.LookupTablesToLoad).ToArray();

            var syntaxFrom = dbFrom.Server.GetQuerySyntaxHelper();

            foreach (var tableInfo in loadables)
            {
                var table = tableInfo.GetRuntimeName();
                if (!remoteTables.Contains(table))
                {
                    throw new Exception("Loadable table " + table + " was NOT found on the remote DB!");
                }

                if (LoadRawColumnsOnly)
                {
                    var rawColumns = LoadRawColumnsOnly ? tableInfo.GetColumnsAtStage(LoadStage.AdjustRaw) : tableInfo.ColumnInfos;
                    sql = "SELECT " + String.Join(",", rawColumns.Select(c =>
                                                                         syntaxFrom.EnsureWrapped(c.GetRuntimeName(LoadStage.AdjustRaw)))) + " FROM " + syntaxFrom.EnsureWrapped(table);
                }
                else
                {
                    sql = "SELECT * FROM " + syntaxFrom.EnsureWrapped(table);
                }

                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to execute SQL:" + Environment.NewLine + sql));

                var source = new DbDataCommandDataFlowSource(sql, "Fetch data from " + dbFrom + " to populate RAW table " + table, dbFrom.Server.Builder, Timeout == 0 ? 50000 : Timeout);

                var destination = new SqlBulkInsertDestination(_dbInfo, table, Enumerable.Empty <string>());

                var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
                var context        = contextFactory.Create(PipelineUsage.LogsToTableLoadInfo | PipelineUsage.FixedDestination);

                var engine = new DataFlowPipelineEngine <DataTable>(context, source, destination, job);

                ITableLoadInfo loadInfo = job.DataLoadInfo.CreateTableLoadInfo("Truncate RAW table " + table,
                                                                               _dbInfo.Server.Name + "." + _dbInfo.GetRuntimeName(),
                                                                               new[]
                {
                    new DataSource(
                        "Remote SqlServer Servername=" + dbFrom.Server + ";Database=" + _dbInfo.GetRuntimeName() +

                        //Either list the table or the query depending on what is populated
                        (table != null ? " Table=" + table : " Query = " + sql), DateTime.Now)
                }, -1);

                engine.Initialize(loadInfo);
                engine.ExecutePipeline(new GracefulCancellationToken());

                if (source.TotalRowsRead == 0)
                {
                    job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "No rows were read from the remote table " + table + "."));
                }

                job.OnNotify(this, new NotifyEventArgs(source.TotalRowsRead > 0 ? ProgressEventType.Information : ProgressEventType.Warning, "Finished after reading " + source.TotalRowsRead + " rows"));
            }

            return(ExitCodeType.Success);
        }
예제 #18
0
        public override ExitCodeType Attach(IDataLoadJob job, GracefulCancellationToken cancellationToken)
        {
            if (job == null)
                throw new Exception("Job is Null, we require to know the job to build a DataFlowPipeline");
      
            ThrowIfInvalidRemoteTableName();

            var syntax = _remoteDatabase.Server.GetQuerySyntaxHelper();

            string sql;

            if (!string.IsNullOrWhiteSpace(RemoteSelectSQL))
                sql = RemoteSelectSQL;
            else
                sql = "Select * from " + syntax.EnsureWrapped(RemoteTableName);
            
            bool scheduleMismatch = false;

            //if there is a load progress 
            if (Progress != null)
                try
                {
                    //get appropriate date declaration SQL if any
                    sql = GetScheduleParameterDeclarations(job, out scheduleMismatch) + sql;
                }
                catch (Exception e)
                {
                    //if the date range is in the future then GetScheduleParameterDeclarations will throw Exception about future dates
                    if(e.Message.StartsWith(FutureLoadMessage))
                        return ExitCodeType.OperationNotRequired;//if this is the case then don't bother with the data load

                    throw;
                }
            if (scheduleMismatch)
            {
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Skipping LoadProgress '" + Progress + "' because it is not the correct Schedule for this component"));
                return ExitCodeType.Success;
            }

            job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to execute SQL:" + Environment.NewLine + sql));

            var source = new DbDataCommandDataFlowSource(sql, "Fetch data from " + _remoteDatabase.Server + " to populate RAW table " + RemoteTableName, _remoteDatabase.Server.Builder, Timeout == 0 ? 50000 : Timeout);

            //For Oracle / Postgres we have to add the parameters to the DbCommand directly
            if (_minDateParam.HasValue && _maxDateParam.HasValue && !syntax.SupportsEmbeddedParameters())
            {
                source.CommandAdjuster = (cmd) =>
                {
                    var pmin = cmd.CreateParameter();
                    pmin.Value = _minDateParam.Value;
                    pmin.ParameterName = StartDateParameter;
                    cmd.Parameters.Add(pmin);

                    var pmax = cmd.CreateParameter();
                    pmax.Value = _maxDateParam.Value;
                    pmax.ParameterName = EndDateParameter;
                    cmd.Parameters.Add(pmax);
                };
            }
                
            var destination = new SqlBulkInsertDestination(_dbInfo, RAWTableName, Enumerable.Empty<string>());

            var contextFactory = new DataFlowPipelineContextFactory<DataTable>();
            var context = contextFactory.Create(PipelineUsage.LogsToTableLoadInfo | PipelineUsage.FixedDestination);

            var engine = new DataFlowPipelineEngine<DataTable>(context, source, destination, job);

            ITableLoadInfo loadInfo = job.DataLoadInfo.CreateTableLoadInfo("Truncate RAW table " + RAWTableName,
                _dbInfo.Server.Name + "." + _dbInfo.GetRuntimeName(),
                new []
                {
                    new DataSource(
                        "Remote SqlServer Servername=" + _remoteDatabase.Server + "Database=" + _dbInfo.GetRuntimeName() +
                        
                        //Either list the table or the query depending on what is populated
                        (RemoteTableName != null?" Table=" + RemoteTableName
                            :" Query = " + sql), DateTime.Now)
                }, -1);

            engine.Initialize(loadInfo);
            engine.ExecutePipeline(new GracefulCancellationToken());

            if (source.TotalRowsRead == 0 && LoadNotRequiredIfNoRowsRead)
            {
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "No rows were read from the remote table and LoadNotRequiredIfNoRowsRead is true so returning ExitCodeType.LoadNotRequired"));
                return ExitCodeType.OperationNotRequired;
            }

            job.OnNotify(this, new NotifyEventArgs(source.TotalRowsRead  > 0 ? ProgressEventType.Information:ProgressEventType.Warning, "Finished after reading " + source.TotalRowsRead + " rows"));


            if (Progress != null)
            {
                if(ProgressUpdateStrategy == null)
                    throw new Exception("ProgressUpdateStrategy is null but there is a Progress");

                ProgressUpdateStrategy.AddAppropriateDisposeStep((ScheduledDataLoadJob) job,_dbInfo);

            }
                
            
            return ExitCodeType.Success;
        }
예제 #19
0
        public override ExitCodeType Run(IDataLoadJob job, GracefulCancellationToken cancellationToken)
        {
            if (_pipeline != null)
            {
                throw new Exception("Pipeline already executed once");
            }

            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.LoadsSingleTableInfo | PipelineUsage.FixedDestination | PipelineUsage.LogsToTableLoadInfo);

            //where we are coming from (source)
            var sourceConvention = LoadBubble.Raw;
            DiscoveredDatabase sourceDatabase = _databaseConfiguration.DeployInfo[sourceConvention];
            var sourceTableName = _tableInfo.GetRuntimeName(sourceConvention, _databaseConfiguration.DatabaseNamer);

            //What to do if where we are coming from does not have the table existing on it
            if (!sourceDatabase.ExpectTable(sourceTableName).Exists())
            {
                if (_isLookupTable)
                {
                    job.OnNotify(this,
                                 new NotifyEventArgs(ProgressEventType.Warning,
                                                     "Lookup table " + sourceTableName + " did not exist on RAW so was not migrated to STAGING"));
                    return(ExitCodeType.Success);
                }
                else
                {
                    job.OnNotify(this,
                                 new NotifyEventArgs(ProgressEventType.Error,
                                                     "Table " + sourceTableName + " did not exist in RAW database " + sourceDatabase +
                                                     " when it came time to migrate RAW to STAGING (and the table is not a lookup)"));
                }
            }


            // where we are going to (destination)
            // ignore any columns that are marked for discard
            var destinationConvention = LoadBubble.Staging;
            DiscoveredDatabase destinationDatabase = _databaseConfiguration.DeployInfo[LoadBubble.Staging];
            var destinationTableName = _tableInfo.GetRuntimeName(destinationConvention, _databaseConfiguration.DatabaseNamer);

            DeleteFullyNullRecords(sourceTableName, sourceDatabase, job);

            //audit
            ITableLoadInfo tableLoadInfo = job.DataLoadInfo.CreateTableLoadInfo(
                "None required, if fails then simply drop Staging database and reload dataset", "STAGING:" + destinationTableName,
                new DataSource[] { new DataSource("RAW:" + sourceTableName, DateTime.Now) }, -1);

            var syntax = sourceDatabase.Server.GetQuerySyntaxHelper();

            //connect to source and open a reader! note that GetReaderForRAW will at this point preserve the state of the database such that any commands e.g. deletes will not have any effect even though ExecutePipeline has not been called!
            var source = new DbDataCommandDataFlowSource(
                "Select distinct * from " + syntax.EnsureWrapped(sourceTableName),
                "Fetch data from " + syntax.EnsureWrapped(sourceTableName),
                sourceDatabase.Server.Builder, 50000);

            //ignore those that are pre load discarded columns (unless they are dilution in which case they get passed through in a decrepid state instead of dumped entirely - these fields will still bein ANODump in pristene state btw)
            var columnNamesToIgnoreForBulkInsert = _tableInfo.PreLoadDiscardedColumns.Where(c => c.Destination != DiscardedColumnDestination.Dilute).Select(column => column.RuntimeColumnName).ToList();

            //pass pre load discard
            var destination = new SqlBulkInsertDestination(destinationDatabase, destinationTableName, columnNamesToIgnoreForBulkInsert);

            //engine that will move data
            _pipeline = new DataFlowPipelineEngine <DataTable>(context, source, destination, job);

            //add clean strings component
            _pipeline.ComponentObjects.Add(new CleanStrings());

            //add dropping of preload discard columns
            _pipeline.ComponentObjects.Add(new BasicAnonymisationEngine());

            _pipeline.Initialize(tableLoadInfo, _tableInfo);

            //tell it to move data
            _pipeline.ExecutePipeline(cancellationToken);

            return(ExitCodeType.Success);
        }