Beispiel #1
0
        protected override void OpenFile(FileInfo fileToLoad, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            if (BatchesReadyForProcessing.Any())
            {
                throw new NotSupportedException("There are still batches awaiting dispatch to RAW, we cannot open a new file at this time");
            }

            var flatFileToLoad = new FlatFileToLoad(fileToLoad);

            //stamp out the pipeline into an instance
            var dataFlow = new KVPAttacherPipelineUseCase(this, flatFileToLoad).GetEngine(PipelineForReadingFromFlatFile, listener);

            //will result in the opening and processing of the file and the passing of DataTables through the Pipeline finally arriving at the destination (us) in ProcessPipelineData
            dataFlow.ExecutePipeline(cancellationToken);
        }
        protected override void WriteRows(DataTable toProcess, IDataLoadEventListener job, GracefulCancellationToken cancellationToken, Stopwatch stopwatch)
        {
            // empty batches are allowed when using batch/resume
            if (toProcess.Rows.Count == 0 && _request.IsBatchResume)
            {
                return;
            }

            if (_request.IsBatchResume)
            {
                _destination.AllowLoadingPopulatedTables = true;
            }

            _destination.ProcessPipelineData(toProcess, job, cancellationToken);

            LinesWritten += toProcess.Rows.Count;
        }
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            //todo: This really shouldn't be needed surely
            MySqlConnection.ClearAllPools();
            MySqlBulkCopy.BulkInsertBatchTimeoutInSeconds = int.MaxValue; //forever

            _sw.Start();

            RefreshUploadDictionary();

            CreateTableUploaders();

            CreateModalityMap();

            AddRows(toProcess);

            Exception ex = null;

            try
            {
                BulkInsert(cancellationToken);
            }
            catch (Exception exception)
            {
                ex = exception;
            }

            DisposeUploaders(ex);

            if (ex != null)
            {
                throw new Exception("Error occurred during upload", ex);
            }

            _sw.Stop();

            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "ProcessPipelineData (Upload) cumulative total time is " + _sw.ElapsedMilliseconds + "ms"));

            return(null);
        }
Beispiel #4
0
 public ExitCodeType Attach(IDataLoadJob job, GracefulCancellationToken cancellationToken)
 {
     throw new NotImplementedException();
 }
 public override DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener job,
                                               GracefulCancellationToken cancellationToken)
 {
     _destinationDatabase = GetDestinationDatabase(job);
     return(base.ProcessPipelineData(toProcess, job, cancellationToken));
 }
        /// <summary>
        /// Makes the current batch ONLY distinct.  This only works if you have a bounded batch (see OrderByAndDistinctInMemory)
        /// </summary>
        /// <param name="chunk"></param>
        /// <param name="listener"></param>
        /// <param name="cancellationToken"></param>
        /// <returns></returns>
        private DataTable MakeDistinct(DataTable chunk, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            var removeDuplicates = new RemoveDuplicates()
            {
                NoLogging = true
            };

            return(removeDuplicates.ProcessPipelineData(chunk, listener, cancellationToken));
        }
Beispiel #7
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            //Things we ignore, Lookups, SupportingSql etc
            if (_extractCommand == null)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Ignoring non dataset command "));
                return(toProcess);
            }

            //if it isn't a dicom dataset don't process it
            if (!toProcess.Columns.Contains(RelativeArchiveColumnName))
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Dataset " + _extractCommand.DatasetBundle.DataSet + " did not contain field '" + RelativeArchiveColumnName + "' so we will not attempt to extract images"));
                return(toProcess);
            }

            if (_putter == null)
            {
                _putter = (IPutDicomFilesInExtractionDirectories)  new ObjectConstructor().Construct(PutterType);
            }

            var projectNumber = _extractCommand.Configuration.Project.ProjectNumber.Value;

            var mappingServer        = new MappingRepository(UIDMappingServer);
            var destinationDirectory = new DirectoryInfo(Path.Combine(_extractCommand.GetExtractionDirectory().FullName, "Images"));

            var releaseCol = _extractCommand.QueryBuilder.SelectColumns.Select(c => c.IColumn).Single(c => c.IsExtractionIdentifier);

            // See: ftp://medical.nema.org/medical/dicom/2011/11_15pu.pdf

            var flags = DicomAnonymizer.SecurityProfileOptions.BasicProfile |
                        DicomAnonymizer.SecurityProfileOptions.CleanStructdCont |
                        DicomAnonymizer.SecurityProfileOptions.CleanDesc |
                        DicomAnonymizer.SecurityProfileOptions.CleanGraph |
                        DicomAnonymizer.SecurityProfileOptions.RetainLongFullDates |
                        DicomAnonymizer.SecurityProfileOptions.RetainUIDs;

            if (RetainDates)
            {
                flags = flags | DicomAnonymizer.SecurityProfileOptions.RetainLongFullDates;
            }

            var profile = DicomAnonymizer.SecurityProfile.LoadProfile(null, flags);

            var anonymiser = new DicomAnonymizer(profile);

            using (var pool = new ZipPool())
            {
                _sw.Start();

                foreach (DataRow row in toProcess.Rows)
                {
                    cancellationToken.ThrowIfAbortRequested();

                    var path = new AmbiguousFilePath(ArchiveRootIfAny, (string)row[RelativeArchiveColumnName]);

                    var dicomFile = path.GetDataset(pool);

                    //get the new patient ID
                    var releaseId = row[releaseCol.GetRuntimeName()].ToString();

                    var ds = anonymiser.Anonymize(dicomFile.Dataset);

                    //now we want to explicitly use our own release Id regardless of what FoDicom said
                    ds.AddOrUpdate(DicomTag.PatientID, releaseId);

                    //rewrite the UIDs
                    foreach (var kvp in UIDMapping.SupportedTags)
                    {
                        if (!ds.Contains(kvp.Key))
                        {
                            continue;
                        }

                        var value = ds.GetValue <string>(kvp.Key, 0);

                        //if it has a value for this UID
                        if (value != null)
                        {
                            var releaseValue = mappingServer.GetOrAllocateMapping(value, projectNumber, kvp.Value);

                            //change value in dataset
                            ds.AddOrUpdate(kvp.Key, releaseValue);

                            //and change value in DataTable
                            if (toProcess.Columns.Contains(kvp.Key.DictionaryEntry.Keyword))
                            {
                                row[kvp.Key.DictionaryEntry.Keyword] = releaseValue;
                            }
                        }
                    }

                    var newPath = _putter.WriteOutDataset(destinationDirectory, releaseId, ds);
                    row[RelativeArchiveColumnName] = newPath;

                    _anonymisedImagesCount++;

                    listener.OnProgress(this, new ProgressEventArgs("Writing ANO images", new ProgressMeasurement(_anonymisedImagesCount, ProgressType.Records), _sw.Elapsed));
                }

                _sw.Stop();
            }

            return(toProcess);
        }
Beispiel #8
0
        protected override int IterativelyBatchLoadDataIntoDataTable(DataTable loadTarget, int maxBatchSize, GracefulCancellationToken cancellationToken)
        {
            if (!_haveServedData)
            {
                foreach (DataRow dr in _dataTable.Rows)
                {
                    try
                    {
                        var targetRow = loadTarget.Rows.Add();

                        //column names must be the same!
                        foreach (DataColumn column in loadTarget.Columns)
                        {
                            if (_dataTable.Columns.Contains(column.ColumnName))
                            {
                                if (dr[column.ColumnName] == null || string.IsNullOrWhiteSpace(dr[column.ColumnName].ToString()))
                                {
                                    targetRow[column.ColumnName] = DBNull.Value;
                                }
                                else
                                {
                                    targetRow[column.ColumnName] = dr[column.ColumnName];//copy values into the destination
                                }
                            }
                            else
                            if (AllowExtraColumnsInTargetWithoutComplainingOfColumnMismatch)   //it is an extra destination column, see if that is allowed
                            {
                                targetRow[column.ColumnName] = DBNull.Value;
                            }
                            else
                            {
                                throw new Exception("Could not find column " + column.ColumnName +
                                                    " in the source table we loaded from Excel, this should have been picked up earlier in GenerateColumnNameMismatchErrors");
                            }
                        }
                    }
                    catch (Exception e)
                    {
                        throw new Exception("Could not import values into RAW DataTable structure (from Excel DataTable structure):" + string.Join(",", dr.ItemArray), e);
                    }
                }

                _haveServedData = true;

                return(_dataTable.Rows.Count);
            }

            return(0);
        }
Beispiel #9
0
        protected override void OpenFile(FileInfo fileToLoad, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            _haveServedData                      = false;
            _fileToLoad                          = fileToLoad;
            _hostedSource                        = new ExcelDataFlowSource();
            _hostedSource.WorkSheetName          = WorkSheetName;
            _hostedSource.AddFilenameColumnNamed = AddFilenameColumnNamed;

            _hostedSource.PreInitialize(new FlatFileToLoad(fileToLoad), listener);
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to start processing " + fileToLoad.FullName));

            _dataTable = _hostedSource.GetChunk(listener, cancellationToken);

            if (!string.IsNullOrEmpty(ForceReplacementHeaders))
            {
                //split headers by , (and trim leading/trailing whitespace).
                string[] replacementHeadersSplit = ForceReplacementHeaders.Split(',').Select(h => string.IsNullOrWhiteSpace(h)?h:h.Trim()).ToArray();

                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Force headers will make the following header changes:" + GenerateASCIIArtOfSubstitutions(replacementHeadersSplit, _dataTable.Columns)));

                if (replacementHeadersSplit.Length != _dataTable.Columns.Count)
                {
                    listener.OnNotify(this,
                                      new NotifyEventArgs(ProgressEventType.Error,
                                                          "ForceReplacementHeaders was set but it had " + replacementHeadersSplit.Length +
                                                          " column header names while the file had " + _dataTable.Columns.Count +
                                                          " (there must be the same number of replacement headers as headers in the excel file)"));
                }
                else
                {
                    for (int i = 0; i < replacementHeadersSplit.Length; i++)
                    {
                        _dataTable.Columns[i].ColumnName = replacementHeadersSplit[i];//rename the columns to match the forced replacments
                    }
                }
            }

            //all data should now be exhausted
            if (_hostedSource.GetChunk(listener, cancellationToken) != null)
            {
                throw new Exception("Hosted source served more than 1 chunk, expected all the data to be read from the Excel file in one go");
            }
        }
        private void WriteBundleContents(IExtractableDatasetBundle datasetBundle, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            var bundle = ((ExtractDatasetCommand)_request).DatasetBundle;

            foreach (var sql in bundle.SupportingSQL)
            {
                bundle.States[sql] = ExtractSupportingSql(sql, listener, _dataLoadInfo);
            }

            foreach (var document in ((ExtractDatasetCommand)_request).DatasetBundle.Documents)
            {
                bundle.States[document] = ExtractSupportingDocument(_request.GetExtractionDirectory(), document, listener);
            }

            //extract lookups
            foreach (BundledLookupTable lookup in datasetBundle.LookupTables)
            {
                try
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to extract lookup " + lookup));

                    ExtractLookupTableSql(lookup, listener, _dataLoadInfo);

                    datasetBundle.States[lookup] = ExtractCommandState.Completed;
                }
                catch (Exception e)
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Error occurred trying to extract lookup " + lookup + " on server " + lookup.TableInfo.Server, e));

                    datasetBundle.States[lookup] = ExtractCommandState.Crashed;
                }
            }

            haveExtractedBundledContent = true;
        }
Beispiel #11
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            if (toProcess == null)
            {
                return(null);
            }

            if (_haveServedResult)
            {
                throw new NotSupportedException("Error, we received multiple batches, Transposer only works when all the data arrives in a single DataTable");
            }

            if (toProcess.Rows.Count == 0 || toProcess.Columns.Count == 0)
            {
                throw new NotSupportedException("DataTable toProcess had " + toProcess.Rows.Count + " rows and " + toProcess.Columns.Count + " columns, thus it cannot be transposed");
            }

            _haveServedResult = true;

            return(GenerateTransposedTable(toProcess));
        }
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            _request.ElevateState(ExtractCommandState.WritingToFile);
            _toProcess = toProcess;

            _destinationDatabase = GetDestinationDatabase(listener);

            //give the data table the correct name
            if (toProcess.ExtendedProperties.ContainsKey("ProperlyNamed") && toProcess.ExtendedProperties["ProperlyNamed"].Equals(true))
            {
                _isTableAlreadyNamed = true;
            }

            _toProcess.TableName = GetTableName();

            if (_destination == null)
            {
                _destination = PrepareDestination(listener, toProcess);
            }

            if (TableLoadInfo == null)
            {
                TableLoadInfo = new TableLoadInfo(_dataLoadInfo, "", _toProcess.TableName, new[] { new DataSource(_request.DescribeExtractionImplementation(), DateTime.Now) }, -1);
            }

            if (TableLoadInfo.IsClosed) // Maybe it was open and it creashed?
            {
                throw new Exception("TableLoadInfo was closed so could not write number of rows (" + toProcess.Rows.Count + ") to audit object - most likely the extraction crashed?");
            }

            if (_request is ExtractDatasetCommand && !haveExtractedBundledContent)
            {
                WriteBundleContents(((ExtractDatasetCommand)_request).DatasetBundle, listener, cancellationToken);
            }

            if (_request is ExtractGlobalsCommand)
            {
                ExtractGlobals((ExtractGlobalsCommand)_request, listener, _dataLoadInfo);
                return(null);
            }

            _destination.ProcessPipelineData(toProcess, listener, cancellationToken);
            TableLoadInfo.Inserts += toProcess.Rows.Count;

            return(null);
        }
Beispiel #13
0
        public override ExitCodeType Run(IDataLoadJob job, GracefulCancellationToken cancellationToken)
        {
            if (_pipeline != null)
            {
                throw new Exception("Pipeline already executed once");
            }

            var contextFactory = new DataFlowPipelineContextFactory <DataTable>();
            var context        = contextFactory.Create(PipelineUsage.LoadsSingleTableInfo | PipelineUsage.FixedDestination | PipelineUsage.LogsToTableLoadInfo);

            //where we are coming from (source)
            var sourceConvention = LoadBubble.Raw;
            DiscoveredDatabase sourceDatabase = _databaseConfiguration.DeployInfo[sourceConvention];
            var sourceTableName = _tableInfo.GetRuntimeName(sourceConvention, _databaseConfiguration.DatabaseNamer);

            //What to do if where we are coming from does not have the table existing on it
            if (!sourceDatabase.ExpectTable(sourceTableName).Exists())
            {
                if (_isLookupTable)
                {
                    job.OnNotify(this,
                                 new NotifyEventArgs(ProgressEventType.Warning,
                                                     "Lookup table " + sourceTableName + " did not exist on RAW so was not migrated to STAGING"));
                    return(ExitCodeType.Success);
                }
                else
                {
                    job.OnNotify(this,
                                 new NotifyEventArgs(ProgressEventType.Error,
                                                     "Table " + sourceTableName + " did not exist in RAW database " + sourceDatabase +
                                                     " when it came time to migrate RAW to STAGING (and the table is not a lookup)"));
                }
            }


            // where we are going to (destination)
            // ignore any columns that are marked for discard
            var destinationConvention = LoadBubble.Staging;
            DiscoveredDatabase destinationDatabase = _databaseConfiguration.DeployInfo[LoadBubble.Staging];
            var destinationTableName = _tableInfo.GetRuntimeName(destinationConvention, _databaseConfiguration.DatabaseNamer);

            DeleteFullyNullRecords(sourceTableName, sourceDatabase, job);

            //audit
            ITableLoadInfo tableLoadInfo = job.DataLoadInfo.CreateTableLoadInfo(
                "None required, if fails then simply drop Staging database and reload dataset", "STAGING:" + destinationTableName,
                new DataSource[] { new DataSource("RAW:" + sourceTableName, DateTime.Now) }, -1);

            //connect to source and open a reader! note that GetReaderForRAW will at this point preserve the state of the database such that any commands e.g. deletes will not have any effect even though ExecutePipeline has not been called!
            var source = new DbDataCommandDataFlowSource(
                "Select distinct * from " + sourceTableName,
                "Fetch data from " + sourceTableName,
                sourceDatabase.Server.Builder, 50000);

            //ignore those that are pre load discarded columns (unless they are dilution in which case they get passed through in a decrepid state instead of dumped entirely - these fields will still bein ANODump in pristene state btw)
            var columnNamesToIgnoreForBulkInsert = _tableInfo.PreLoadDiscardedColumns.Where(c => c.Destination != DiscardedColumnDestination.Dilute).Select(column => column.RuntimeColumnName).ToList();

            //pass pre load discard
            var destination = new SqlBulkInsertDestination(destinationDatabase, destinationTableName, columnNamesToIgnoreForBulkInsert);

            //engine that will move data
            _pipeline = new DataFlowPipelineEngine <DataTable>(context, source, destination, job);

            //add clean strings component
            _pipeline.ComponentObjects.Add(new CleanStrings());

            //add dropping of preload discard columns
            _pipeline.ComponentObjects.Add(new BasicAnonymisationEngine());

            _pipeline.Initialize(tableLoadInfo, _tableInfo);

            //tell it to move data
            _pipeline.ExecutePipeline(cancellationToken);

            return(ExitCodeType.Success);
        }
Beispiel #14
0
        public override void MigrateTable(IDataLoadJob job, MigrationColumnSet columnsToMigrate, int dataLoadInfoID, GracefulCancellationToken cancellationToken, ref int inserts, ref int updates)
        {
            var server = columnsToMigrate.DestinationTable.Database.Server;

            //see CrossDatabaseMergeCommandTest

            /*          ------------MIGRATE NEW RECORDS (novel by primary key)--------
             *
             *
             * INSERT INTO CrossDatabaseMergeCommandTo..ToTable (Name,Age,Postcode,hic_dataLoadRunID)
             * SELECT
             * [CrossDatabaseMergeCommandFrom]..CrossDatabaseMergeCommandTo_ToTable_STAGING.Name,
             * [CrossDatabaseMergeCommandFrom]..CrossDatabaseMergeCommandTo_ToTable_STAGING.Age,
             * [CrossDatabaseMergeCommandFrom]..CrossDatabaseMergeCommandTo_ToTable_STAGING.Postcode,
             * 1
             * FROM
             * [CrossDatabaseMergeCommandFrom]..CrossDatabaseMergeCommandTo_ToTable_STAGING
             * left join
             * CrossDatabaseMergeCommandTo..ToTable
             * on
             * [CrossDatabaseMergeCommandFrom]..CrossDatabaseMergeCommandTo_ToTable_STAGING.Age = CrossDatabaseMergeCommandTo..ToTable.Age
             * AND
             * [CrossDatabaseMergeCommandFrom]..CrossDatabaseMergeCommandTo_ToTable_STAGING.Name = CrossDatabaseMergeCommandTo..ToTable.Name
             * WHERE
             * CrossDatabaseMergeCommandTo..ToTable.Age is null
             */

            StringBuilder sbInsert = new StringBuilder();
            var           syntax   = server.GetQuerySyntaxHelper();


            sbInsert.AppendLine(string.Format("INSERT INTO {0} ({1},{2})",
                                              columnsToMigrate.DestinationTable.GetFullyQualifiedName(),
                                              string.Join(",", columnsToMigrate.FieldsToUpdate.Select(c => syntax.EnsureWrapped(c.GetRuntimeName()))),
                                              syntax.EnsureWrapped(SpecialFieldNames.DataLoadRunID)));

            sbInsert.AppendLine("SELECT");

            foreach (var col in columnsToMigrate.FieldsToUpdate)
            {
                sbInsert.AppendLine(col.GetFullyQualifiedName() + ",");
            }

            sbInsert.AppendLine(dataLoadInfoID.ToString());

            sbInsert.AppendLine("FROM");
            sbInsert.AppendLine(columnsToMigrate.SourceTable.GetFullyQualifiedName());
            sbInsert.AppendLine("LEFT JOIN");
            sbInsert.AppendLine(columnsToMigrate.DestinationTable.GetFullyQualifiedName());
            sbInsert.AppendLine("ON");

            sbInsert.AppendLine(
                string.Join(" AND " + Environment.NewLine,
                            columnsToMigrate.PrimaryKeys.Select(
                                pk =>
                                string.Format("{0}.{1}={2}.{1}", columnsToMigrate.SourceTable.GetFullyQualifiedName(),
                                              syntax.EnsureWrapped(pk.GetRuntimeName()), columnsToMigrate.DestinationTable.GetFullyQualifiedName()))));

            sbInsert.AppendLine("WHERE");
            sbInsert.AppendLine(string.Format("{0}.{1} IS NULL",
                                              columnsToMigrate.DestinationTable.GetFullyQualifiedName(),
                                              syntax.EnsureWrapped(columnsToMigrate.PrimaryKeys.First().GetRuntimeName())));

            string insertSql = sbInsert.ToString();

            var cmd = server.GetCommand(insertSql, _managedConnection);

            cmd.CommandTimeout = Timeout;

            job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "INSERT query: " + Environment.NewLine + insertSql));

            cancellationToken.ThrowIfCancellationRequested();


            try
            {
                inserts = cmd.ExecuteNonQuery();

                List <CustomLine> sqlLines = new List <CustomLine>();

                var toSet = columnsToMigrate.FieldsToUpdate.Where(c => !c.IsPrimaryKey).Select(c => string.Format("t1.{0} = t2.{0}", syntax.EnsureWrapped(c.GetRuntimeName()))).ToArray();

                if (!toSet.Any())
                {
                    job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Table " + columnsToMigrate.DestinationTable + " is entirely composed of PrimaryKey columns or hic_ columns so UPDATE will NOT take place"));
                    return;
                }

                var toDiff = columnsToMigrate.FieldsToDiff.Where(c => !c.IsPrimaryKey).ToArray();

                if (!toDiff.Any())
                {
                    job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Table " + columnsToMigrate.DestinationTable + " is entirely composed of PrimaryKey columns or hic_ columns/ other non DIFF columns that will not result in an UPDATE will NOT take place"));
                    return;
                }

                //t1.Name = t2.Name, t1.Age=T2.Age etc
                sqlLines.Add(new CustomLine(string.Join(",", toSet), QueryComponent.SET));

                //also update the hic_dataLoadRunID field
                sqlLines.Add(new CustomLine(string.Format("t1.{0}={1}",
                                                          syntax.EnsureWrapped(SpecialFieldNames.DataLoadRunID)
                                                          , dataLoadInfoID), QueryComponent.SET));

                //t1.Name <> t2.Name AND t1.Age <> t2.Age etc
                sqlLines.Add(new CustomLine(string.Join(" OR ", toDiff.Select(c => GetORLine(c, syntax))), QueryComponent.WHERE));

                //the join
                sqlLines.AddRange(columnsToMigrate.PrimaryKeys.Select(p => new CustomLine(string.Format("t1.{0} = t2.{0}", syntax.EnsureWrapped(p.GetRuntimeName())), QueryComponent.JoinInfoJoin)));

                var updateHelper = columnsToMigrate.DestinationTable.Database.Server.GetQuerySyntaxHelper().UpdateHelper;

                var updateQuery = updateHelper.BuildUpdate(
                    columnsToMigrate.DestinationTable,
                    columnsToMigrate.SourceTable,
                    sqlLines);

                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Update query:" + Environment.NewLine + updateQuery));

                var updateCmd = server.GetCommand(updateQuery, _managedConnection);
                updateCmd.CommandTimeout = Timeout;
                cancellationToken.ThrowIfCancellationRequested();

                try
                {
                    updates = updateCmd.ExecuteNonQuery();
                }
                catch (Exception e)
                {
                    job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Did not successfully perform the update queries: " + updateQuery, e));
                    throw new Exception("Did not successfully perform the update queries: " + updateQuery + " - " + e);
                }
            }
            catch (OperationCanceledException)
            {
                throw; // have to catch and rethrow this because of the catch-all below
            }
            catch (Exception e)
            {
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Failed to migrate " + columnsToMigrate.SourceTable + " to " + columnsToMigrate.DestinationTable, e));
                throw new Exception("Failed to migrate " + columnsToMigrate.SourceTable + " to " + columnsToMigrate.DestinationTable + ": " + e);
            }
        }
Beispiel #15
0
 public abstract ExitCodeType Fetch(IDataLoadJob dataLoadJob, GracefulCancellationToken cancellationToken);
Beispiel #16
0
        private void CopyCohortToDataServer(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            DataTable cohortDataTable = null;

            SetServer();

            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to wait for Semaphore OneCrossServerExtractionAtATime to become available"));
            OneCrossServerExtractionAtATime.WaitOne(-1);
            _semaphoreObtained = true;
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Captured Semaphore OneCrossServerExtractionAtATime"));

            try
            {
                IExtractableCohort cohort = Request.ExtractableCohort;
                cohortDataTable = cohort.FetchEntireCohort();
            }
            catch (Exception e)
            {
                throw new Exception("An error occurred while trying to download the cohort from the Cohort server (in preparation for transfering it to the data server for linkage and extraction)", e);
            }

            //make sure tempdb exists (this covers you for servers where it doesn't exist e.g. mysql or when user has specified a different database name)
            if (!_tempDb.Exists())
            {
                if (CreateTemporaryDatabaseIfNotExists)
                {
                    _tempDb.Create();
                }
                else
                {
                    throw new Exception("Database '" + _tempDb + "' did not exist on server '" + _server + "' and CreateAndDestroyTemporaryDatabaseIfNotExists was false");
                }
            }


            var tbl = _tempDb.ExpectTable(cohortDataTable.TableName);

            if (tbl.Exists())
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Found existing table called '" + tbl + "' in '" + _tempDb + "'"));

                if (DropExistingCohortTableIfExists)
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "About to drop existing table '" + tbl + "'"));

                    try
                    {
                        tbl.Drop();
                        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Dropped existing table '" + tbl + "'"));
                    }
                    catch (Exception ex)
                    {
                        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Warning dropping '" + tbl + "' failed", ex));
                    }
                }
                else
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "'" + _tempDb + "' contains a table called '" + tbl + "' and DropExistingCohortTableIfExists is false"));
                }
            }

            var destination = new DataTableUploadDestination();

            destination.PreInitialize(_tempDb, listener);
            destination.ProcessPipelineData(cohortDataTable, listener, cancellationToken);
            destination.Dispose(listener, null);



            if (!tbl.Exists())
            {
                throw new Exception("Table '" + tbl + "' did not exist despite DataTableUploadDestination completing Successfully!");
            }

            tablesToCleanup.Add(tbl);

            //table will now be in tempdb
            _haveCopiedCohortAndAdjustedSql = true;
        }
        public virtual DataTable GetChunk(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            // we are in the Global Commands case, let's return an empty DataTable (not null)
            // so we can trigger the destination to extract the globals docs and sql
            if (GlobalsRequest != null)
            {
                GlobalsRequest.ElevateState(ExtractCommandState.WaitingForSQLServer);
                if (firstGlobalChunk)
                {
                    //unless we are checking, start auditing
                    StartAuditGlobals();

                    firstGlobalChunk = false;
                    return(new DataTable(ExtractionDirectory.GLOBALS_DATA_NAME));
                }

                return(null);
            }

            if (Request == null)
            {
                throw new Exception("Component has not been initialized before being asked to GetChunk(s)");
            }

            Request.ElevateState(ExtractCommandState.WaitingForSQLServer);

            if (_cancel)
            {
                throw new Exception("User cancelled data extraction");
            }

            if (_hostedSource == null)
            {
                StartAudit(Request.QueryBuilder.SQL);

                if (Request.DatasetBundle.DataSet.DisableExtraction)
                {
                    throw new Exception("Cannot extract " + Request.DatasetBundle.DataSet + " because DisableExtraction is set to true");
                }

                _hostedSource = new DbDataCommandDataFlowSource(GetCommandSQL(listener),
                                                                "ExecuteDatasetExtraction " + Request.DatasetBundle.DataSet,
                                                                Request.GetDistinctLiveDatabaseServer().Builder,
                                                                ExecutionTimeout);

                _hostedSource.AllowEmptyResultSets = AllowEmptyExtractions;
                _hostedSource.BatchSize            = BatchSize;
            }

            DataTable chunk = null;

            try
            {
                chunk = _hostedSource.GetChunk(listener, cancellationToken);

                chunk = _peeker.AddPeekedRowsIfAny(chunk);

                //if we are trying to distinct the records in memory based on release id
                if (DistinctStrategy == DistinctStrategy.OrderByAndDistinctInMemory)
                {
                    var releaseIdentifierColumn = Request.ReleaseIdentifierSubstitutions.First().GetRuntimeName();

                    if (chunk != null)
                    {
                        //last release id in the current chunk
                        var lastReleaseId = chunk.Rows[chunk.Rows.Count - 1][releaseIdentifierColumn];

                        _peeker.AddWhile(_hostedSource, r => Equals(r[releaseIdentifierColumn], lastReleaseId), chunk);
                        chunk = MakeDistinct(chunk, listener, cancellationToken);
                    }
                }
            }
            catch (AggregateException a)
            {
                if (a.GetExceptionIfExists <TaskCanceledException>() != null)
                {
                    _cancel = true;
                }

                throw;
            }
            catch (Exception e)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Read from source failed", e));
            }

            if (cancellationToken.IsCancellationRequested)
            {
                throw new Exception("Data read cancelled because our cancellationToken was set, aborting data reading");
            }

            //if the first chunk is null
            if (firstChunk && chunk == null)
            {
                throw new Exception("There is no data to load, query returned no rows, query was:" + Environment.NewLine +
                                    (_hostedSource.Sql ?? Request.QueryBuilder.SQL));
            }

            //not the first chunk anymore
            firstChunk = false;

            //data exhausted
            if (chunk == null)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Data exhausted after reading " + _rowsRead + " rows of data (" + UniqueReleaseIdentifiersEncountered.Count + " unique release identifiers seen)"));
                if (Request != null)
                {
                    Request.CumulativeExtractionResults.DistinctReleaseIdentifiersEncountered = UniqueReleaseIdentifiersEncountered.Count;
                }
                return(null);
            }

            _rowsRead += chunk.Rows.Count;
            //chunk will have datatypes for all the things in the buffer so we can populate our dictionary of facts about what columns/catalogue items have spontaneously changed name/type etc
            if (ExtractTimeTransformationsObserved == null)
            {
                GenerateExtractionTransformObservations(chunk);
            }


            //see if the SqlDataReader has a column with the same name as the ReleaseIdentifierSQL (if so then we can use it to count the number of distinct subjects written out to the csv)
            bool includesReleaseIdentifier = _extractionIdentifiersidx.Count > 0;


            //first line - lets see what columns we wrote out
            //looks at the buffer and computes any transforms performed on the column


            _timeSpentValidating.Start();
            //build up the validation report (Missing/Wrong/Etc) - this has no mechanical effect on the extracted data just some metadata that goes into a flat file
            if (ExtractionTimeValidator != null && Request.IncludeValidation)
            {
                try
                {
                    chunk.Columns.Add(ValidationColumnName);

                    ExtractionTimeValidator.Validate(chunk, ValidationColumnName);

                    _rowsValidated += chunk.Rows.Count;
                    listener.OnProgress(this, new ProgressEventArgs("Validation", new ProgressMeasurement(_rowsValidated, ProgressType.Records), _timeSpentValidating.Elapsed));
                }
                catch (Exception ex)
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Could not validate data chunk", ex));
                    ValidationFailureException = ex;
                    ExtractionTimeValidator    = null;
                }
            }
            _timeSpentValidating.Stop();

            _timeSpentBuckettingDates.Start();
            if (ExtractionTimeTimeCoverageAggregator != null)
            {
                _rowsBucketted += chunk.Rows.Count;

                foreach (DataRow row in chunk.Rows)
                {
                    ExtractionTimeTimeCoverageAggregator.ProcessRow(row);
                }

                listener.OnProgress(this, new ProgressEventArgs("Bucketting Dates", new ProgressMeasurement(_rowsBucketted, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed));
            }
            _timeSpentBuckettingDates.Stop();

            _timeSpentCalculatingDISTINCT.Start();
            //record unique release identifiers found
            if (includesReleaseIdentifier)
            {
                foreach (string idx in _extractionIdentifiersidx)
                {
                    foreach (DataRow r in chunk.Rows)
                    {
                        if (r[idx] == DBNull.Value)
                        {
                            if (_extractionIdentifiersidx.Count == 1)
                            {
                                throw new Exception("Null release identifier found in extract of dataset " + Request.DatasetBundle.DataSet);
                            }
                            else
                            {
                                continue; //there are multiple extraction identifiers thats fine if one or two are null
                            }
                        }
                        if (!UniqueReleaseIdentifiersEncountered.Contains(r[idx]))
                        {
                            UniqueReleaseIdentifiersEncountered.Add(r[idx]);
                        }
                    }

                    listener.OnProgress(this, new ProgressEventArgs("Calculating Distinct Release Identifiers", new ProgressMeasurement(UniqueReleaseIdentifiersEncountered.Count, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed));
                }
            }
            _timeSpentCalculatingDISTINCT.Stop();

            return(chunk);
        }
Beispiel #18
0
        public DataTable GetChunk(IDataLoadEventListener job, GracefulCancellationToken cancellationToken)
        {
            if (_reader == null)
            {
                _con = DatabaseCommandHelper.GetConnection(_builder);
                _con.Open();

                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Running SQL:" + Environment.NewLine + Sql));

                cmd = DatabaseCommandHelper.GetCommand(Sql, _con);
                cmd.CommandTimeout = _timeout;

                _reader          = cmd.ExecuteReaderAsync(cancellationToken.AbortToken).Result;
                _numberOfColumns = _reader.FieldCount;
            }

            int readThisBatch = 0;

            timer.Start();
            try
            {
                DataTable chunk = GetChunkSchema(_reader);

                while (_reader.Read())
                {
                    AddRowToDataTable(chunk, _reader);
                    readThisBatch++;

                    //we reached batch limit
                    if (readThisBatch == BatchSize)
                    {
                        return(chunk);
                    }
                }

                //if data was read
                if (readThisBatch > 0)
                {
                    return(chunk);
                }

                //data is exhausted

                //if data was exhausted on first read and we are allowing empty result sets
                if (firstChunk && AllowEmptyResultSets)
                {
                    return(chunk);//return the empty chunk
                }
                //data exhausted
                return(null);
            }
            catch (Exception e)
            {
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Source read failed", e));
                throw;
            }
            finally
            {
                firstChunk = false;
                timer.Stop();
                job.OnProgress(this, new ProgressEventArgs(_taskBeingPerformed, new ProgressMeasurement(TotalRowsRead, ProgressType.Records), timer.Elapsed));
            }
        }
Beispiel #19
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            sw.Start();

            DataTable toReturn = toProcess.Clone();

            //now sort rows
            foreach (DataRow row in toProcess.Rows)
            {
                totalRecordsProcessed++;
                int hashOfItems = GetHashCode(row.ItemArray);

                if (unqiueHashesSeen.ContainsKey(hashOfItems))
                {
                    //GetHashCode on ItemArray of row has been seen before but it could be a collision so call Enumerable.SequenceEqual just incase.
                    if (unqiueHashesSeen[hashOfItems].Any(r => r.ItemArray.SequenceEqual(row.ItemArray)))
                    {
                        totalDuplicatesFound++;
                        continue; //it's a duplicate
                    }

                    unqiueHashesSeen[hashOfItems].Add(row);
                }
                else
                {
                    //its not a duplicate hashcode so add it to the return array and the record of everything we have seen so far (in order that we do not run across issues across batches)
                    unqiueHashesSeen.Add(hashOfItems, new List <DataRow>(new[] { row }));
                }

                toReturn.Rows.Add(row.ItemArray);
            }

            sw.Stop();

            if (!NoLogging)
            {
                listener.OnProgress(this, new ProgressEventArgs("Evaluating For Duplicates", new ProgressMeasurement(totalRecordsProcessed, ProgressType.Records), sw.Elapsed));
                listener.OnProgress(this, new ProgressEventArgs("Discarding Duplicates", new ProgressMeasurement(totalDuplicatesFound, ProgressType.Records), sw.Elapsed));
            }
            return(toReturn);
        }
Beispiel #20
0
        public override ExitCodeType Attach(IDataLoadJob job, GracefulCancellationToken cancellationToken)
        {
            if (string.IsNullOrWhiteSpace(TableName) && TableToLoad != null)
            {
                var allTables = job.RegularTablesToLoad.Union(job.LookupTablesToLoad).Distinct().ToArray();

                if (!allTables.Contains(TableToLoad))
                {
                    job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, $"FlatFileAttacher TableToLoad was '{TableToLoad}' (ID={TableToLoad.ID}) but that table was not one of the tables in the load:{string.Join(",", allTables.Select(t=>"'" + t.Name + "'"))}"));
                }

                TableName = TableToLoad.GetRuntimeName(LoadBubble.Raw, job.Configuration.DatabaseNamer);
            }


            if (TableName != null)
            {
                TableName = TableName.Trim();
            }

            Stopwatch timer = new Stopwatch();

            timer.Start();


            if (string.IsNullOrWhiteSpace(TableName))
            {
                throw new ArgumentNullException("TableName has not been set, set it in the DataCatalogue");
            }

            DiscoveredTable table = _dbInfo.ExpectTable(TableName);

            //table didnt exist!
            if (!table.Exists())
            {
                if (!_dbInfo.DiscoverTables(false).Any())//maybe no tables existed
                {
                    throw new FlatFileLoadException("Raw database had 0 tables we could load");
                }
                else//no there are tables just not the one we were looking for
                {
                    throw new FlatFileLoadException("RAW database did not have a table called:" + TableName);
                }
            }


            //load the flat file
            var filepattern = FilePattern ?? "*";

            var filesToLoad = LoadDirectory.ForLoading.EnumerateFiles(filepattern).ToList();

            if (!filesToLoad.Any())
            {
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Did not find any files matching pattern " + filepattern + " in forLoading directory"));

                if (SendLoadNotRequiredIfFileNotFound)
                {
                    return(ExitCodeType.OperationNotRequired);
                }

                return(ExitCodeType.Success);
            }

            foreach (var fileToLoad in filesToLoad)
            {
                LoadFile(table, fileToLoad, _dbInfo, timer, job);
            }

            timer.Stop();

            return(ExitCodeType.Success);
        }
Beispiel #21
0
        protected override void WriteRows(DataTable toProcess, IDataLoadEventListener job, GracefulCancellationToken cancellationToken, Stopwatch stopwatch)
        {
            _destination.ProcessPipelineData(toProcess, job, cancellationToken);

            LinesWritten += toProcess.Rows.Count;
        }
Beispiel #22
0
        public int Run(IRDMPPlatformRepositoryServiceLocator repositoryLocator, IDataLoadEventListener listener, ICheckNotifier checkNotifier, GracefulCancellationToken token)
        {
            RepositoryLocator = repositoryLocator;
            Token             = token;
            List <Task> tasks = new List <Task>();

            Semaphore semaphore = null;

            if (_options.MaxConcurrentExtractions != null)
            {
                semaphore = new Semaphore(_options.MaxConcurrentExtractions.Value, _options.MaxConcurrentExtractions.Value);
            }

            Initialize();

            switch (_options.Command)
            {
            case CommandLineActivity.none:
                break;

            case CommandLineActivity.run:

                object[] runnables = GetRunnables();

                foreach (object runnable in runnables)
                {
                    if (semaphore != null)
                    {
                        semaphore.WaitOne();
                    }

                    object r = runnable;
                    tasks.Add(Task.Run(() =>
                    {
                        try
                        {
                            ExecuteRun(r, new OverrideSenderIDataLoadEventListener(r.ToString(), listener));
                        }
                        finally
                        {
                            if (semaphore != null)
                            {
                                semaphore.Release();
                            }
                        }
                    }));
                }

                break;

            case CommandLineActivity.check:

                lock (_oLock)
                    _checksDictionary.Clear();

                ICheckable[] checkables = GetCheckables(checkNotifier);
                foreach (ICheckable checkable in checkables)
                {
                    if (semaphore != null)
                    {
                        semaphore.WaitOne();
                    }

                    ICheckable checkable1 = checkable;
                    var        memory     = new ToMemoryCheckNotifier(checkNotifier);

                    lock (_oLock)
                        _checksDictionary.Add(checkable1, memory);

                    tasks.Add(Task.Run(() =>
                    {
                        try
                        {
                            checkable1.Check(memory);
                        }
                        finally
                        {
                            if (semaphore != null)
                            {
                                semaphore.Release();
                            }
                        }
                    }));
                }

                break;

            default:
                throw new ArgumentOutOfRangeException();
            }

            Task.WaitAll(tasks.ToArray());

            AfterRun();

            return(0);
        }
 public int Run(IRDMPPlatformRepositoryServiceLocator repositoryLocator, IDataLoadEventListener listener, ICheckNotifier checkNotifier, GracefulCancellationToken token)
 {
     Activator.ShowDialog(new SingleControlForm(this));
     return(0);
 }
Beispiel #24
0
 public ExitCodeType Fetch(IDataLoadJob job, GracefulCancellationToken cancellationToken)
 {
     job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "DoNothingDataProvider did nothing!"));
     return(ExitCodeType.Success);
 }
        protected override void Open(DataTable toProcess, IDataLoadEventListener job, GracefulCancellationToken cancellationToken)
        {
            _toProcess = toProcess;

            //give the data table the correct name
            if (_toProcess.ExtendedProperties.ContainsKey("ProperlyNamed") && _toProcess.ExtendedProperties["ProperlyNamed"].Equals(true))
            {
                _isTableAlreadyNamed = true;
            }

            _toProcess.TableName = GetTableName();

            _destination = PrepareDestination(job, _toProcess);
            OutputFile   = _toProcess.TableName;
        }
Beispiel #26
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            if (_command == null)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Ignoring {GetType().Name} component because command is not ExtractGlobalsCommand"));
                return(toProcess);
            }

            if (_isFirstTime)
            {
                MoveFiles(_command, listener, cancellationToken);
            }

            _isFirstTime = false;

            return(toProcess);
        }
Beispiel #27
0
        public int Run(IRDMPPlatformRepositoryServiceLocator repositoryLocator, IDataLoadEventListener listener, ICheckNotifier checkNotifier, GracefulCancellationToken token)
        {
            CacheProgress cp           = repositoryLocator.CatalogueRepository.GetObjectByID <CacheProgress>(_options.CacheProgress);
            string        dataLoadTask = cp.GetDistinctLoggingTask();

            var defaults      = repositoryLocator.CatalogueRepository.GetServerDefaults();
            var loggingServer = defaults.GetDefaultFor(PermissableDefaults.LiveLoggingServer_ID);

            if (loggingServer == null)
            {
                throw new NotSupportedException("No default logging server specified, you must specify one in ");
            }

            var logManager = new LogManager(loggingServer);

            logManager.CreateNewLoggingTaskIfNotExists(dataLoadTask);

            switch (_options.Command)
            {
            case CommandLineActivity.run:

                //Setup dual listeners for the Cache process, one ticks the lifeline one very message and one logs to the logging db
                var toLog        = new ToLoggingDatabaseDataLoadEventListener(this, logManager, dataLoadTask, cp.GetLoggingRunName());
                var forkListener = new ForkDataLoadEventListener(toLog, listener);
                try
                {
                    var cachingHost = new CachingHost(repositoryLocator.CatalogueRepository);
                    cachingHost.RetryMode     = _options.RetryMode;
                    cachingHost.CacheProgress = cp;      //run the cp

                    //By default caching host will block
                    cachingHost.TerminateIfOutsidePermissionWindow = true;

                    cachingHost.Start(forkListener, token);
                }
                finally
                {
                    //finish everything
                    toLog.FinalizeTableLoadInfos();
                }

                break;

            case CommandLineActivity.check:
                var checkable = new CachingPreExecutionChecker(cp);
                checkable.Check(checkNotifier);
                break;

            default:
                throw new ArgumentOutOfRangeException();
            }

            return(0);
        }
Beispiel #28
0
 /// <summary>
 /// Gets called once only per extraction pipeline run (at the time globals start being extracted)
 /// </summary>
 /// <param name="command"></param>
 /// <param name="listener"></param>
 /// <param name="cancellationToken"></param>
 protected abstract void MoveFiles(ExtractGlobalsCommand command, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken);
Beispiel #29
0
        public DataTable GetChunk(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            try
            {
                _listener = listener;

                int rowsRead = 0;

                if (_fileToLoad == null)
                {
                    throw new Exception(
                              "_fileToLoad was not set, it is supposed to be set because of IPipelineRequirement<FlatFileToLoad> - maybe this PreInitialize method was not called?");
                }

                if (Headers == null)
                {
                    InitializeComponents();

                    //open the file
                    OpenFile(_fileToLoad.File);

                    if (Headers.FileIsEmpty)
                    {
                        EventHandlers.FileIsEmpty();
                        return(null);
                    }
                }

                //if we do not yet have a data table to load
                if (_workingTable == null)
                {
                    //create a table with the name of the file
                    _workingTable           = Headers.GetDataTableWithHeaders(_listener);
                    _workingTable.TableName = QuerySyntaxHelper.MakeHeaderNameSane(Path.GetFileNameWithoutExtension(_fileToLoad.File.Name));

                    //set the data table to the new untyped but correctly headered table
                    SetDataTable(_workingTable);

                    //Now we must read some data
                    if (StronglyTypeInput && StronglyTypeInputBatchSize != 0)
                    {
                        int batchSizeToLoad = StronglyTypeInputBatchSize == -1
                            ? int.MaxValue
                            : StronglyTypeInputBatchSize;

                        if (batchSizeToLoad < 500)
                        {
                            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "You set StronglyTypeInputBatchSize to " + batchSizeToLoad + " this will be increased to 500 because that number is too small!", null));
                            batchSizeToLoad = 500;
                        }

                        //user want's to strongly type input with a custom batch size
                        rowsRead = IterativelyBatchLoadDataIntoDataTable(_workingTable, batchSizeToLoad);
                    }
                    else
                    {
                        //user does not want to strongly type or is strongly typing with regular batch size
                        rowsRead = IterativelyBatchLoadDataIntoDataTable(_workingTable, MaxBatchSize);
                    }

                    if (StronglyTypeInput)
                    {
                        StronglyTypeWorkingTable();
                    }

                    if (rowsRead == 0)
                    {
                        EventHandlers.FileIsEmpty();
                    }
                }
                else
                {
                    //this isn't the first pass, so we have everything set up and can just read more data

                    //data table has been set so has a good schema or no schema depending on what user wanted, at least it has all the headers etc setup correctly
                    //so just clear the rows we loaded last chunk and load more
                    _workingTable.Rows.Clear();

                    //get more rows
                    rowsRead = IterativelyBatchLoadDataIntoDataTable(_workingTable, MaxBatchSize);
                }

                //however we read

                //if rows were not read
                if (rowsRead == 0)
                {
                    return(null);//we are done
                }
                //rows were read so return a copy of the DataTable, because we will continually reload the same DataTable schema throughout the file we don't want to give up our reference to good headers incase someone mutlates it
                var copy = _workingTable.Copy();

                foreach (DataColumn unamed in Headers.UnamedColumns)
                {
                    copy.Columns.Remove(unamed.ColumnName);
                }

                return(copy);
            }
            catch (Exception)
            {
                //make sure file is closed if it crashes
                if (_reader != null)
                {
                    _reader.Dispose();
                }
                throw;
            }
        }
Beispiel #30
0
 public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
 {
     BatchesReadyForProcessing.Add(toProcess.Copy());
     return(null);
 }