コード例 #1
0
        public DataTable GetChunk(IDataLoadEventListener job, GracefulCancellationToken cancellationToken)
        {
            if (_reader == null)
            {
                _con = DatabaseCommandHelper.GetConnection(_builder);
                _con.Open();

                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Running SQL:" + Environment.NewLine + Sql));

                cmd = DatabaseCommandHelper.GetCommand(Sql, _con);
                cmd.CommandTimeout = _timeout;

                _reader          = cmd.ExecuteReaderAsync(cancellationToken.AbortToken).Result;
                _numberOfColumns = _reader.FieldCount;
            }

            int readThisBatch = 0;

            timer.Start();
            try
            {
                DataTable chunk = GetChunkSchema(_reader);

                while (_reader.Read())
                {
                    AddRowToDataTable(chunk, _reader);
                    readThisBatch++;

                    //we reached batch limit
                    if (readThisBatch == BatchSize)
                    {
                        return(chunk);
                    }
                }

                //if data was read
                if (readThisBatch > 0)
                {
                    return(chunk);
                }

                //data is exhausted

                //if data was exhausted on first read and we are allowing empty result sets
                if (firstChunk && AllowEmptyResultSets)
                {
                    return(chunk);//return the empty chunk
                }
                //data exhausted
                return(null);
            }
            catch (Exception e)
            {
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Source read failed", e));
                throw;
            }
            finally
            {
                firstChunk = false;
                timer.Stop();
                job.OnProgress(this, new ProgressEventArgs(_taskBeingPerformed, new ProgressMeasurement(TotalRowsRead, ProgressType.Records), timer.Elapsed));
            }
        }
コード例 #2
0
        protected override void TryExtractLookupTableImpl(BundledLookupTable lookup, DirectoryInfo lookupDir,
                                                          IExtractionConfiguration requestConfiguration, IDataLoadEventListener listener, out int linesWritten, out string destinationDescription)
        {
            var tbl = lookup.TableInfo.Discover(DataAccessContext.DataExport);
            var dt  = tbl.GetDataTable();

            dt.TableName = GetTableName(_destinationDatabase.Server.GetQuerySyntaxHelper().GetSensibleEntityNameFromString(lookup.TableInfo.Name));

            //describe the destination for the abstract base
            destinationDescription = TargetDatabaseServer.ID + "|" + GetDatabaseName() + "|" + dt.TableName;
            linesWritten           = dt.Rows.Count;

            var destinationDb = GetDestinationDatabase(listener);
            var existing      = destinationDb.ExpectTable(dt.TableName);

            if (existing.Exists())
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Dropping existing Lookup table '" + existing.GetFullyQualifiedName() + "'"));
                existing.Drop();
            }

            destinationDb.CreateTable(dt.TableName, dt);
        }
コード例 #3
0
 public void Dispose(IDataLoadEventListener listener, Exception pipelineFailureExceptionIfAny)
 {
 }
コード例 #4
0
        private DataTableUploadDestination PrepareDestination(IDataLoadEventListener listener, DataTable toProcess)
        {
            //see if the user has entered an extraction server/database
            if (TargetDatabaseServer == null)
            {
                throw new Exception("TargetDatabaseServer (the place you want to extract the project data to) property has not been set!");
            }

            try
            {
                if (!_destinationDatabase.Exists())
                {
                    _destinationDatabase.Create();
                }

                if (_request is ExtractGlobalsCommand)
                {
                    return(null);
                }

                var tblName = _toProcess.TableName;

                //See if table already exists on the server (likely to cause problems including duplication, schema changes in configuration etc)
                var existing = _destinationDatabase.ExpectTable(tblName);
                if (existing.Exists())
                {
                    if (_request.IsBatchResume)
                    {
                        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information,
                                                                    $"Table {existing.GetFullyQualifiedName()} already exists but it IsBatchResume so no problem."));
                    }
                    else
                    if (AlwaysDropExtractionTables)
                    {
                        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning,
                                                                    $"Table {existing.GetFullyQualifiedName()} already exists, dropping because setting {nameof(AlwaysDropExtractionTables)} is on"));
                        existing.Drop();

                        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning,
                                                                    $"Table {existing.GetFullyQualifiedName()} was dropped"));

                        // since we dropped it we should treat it as if it was never there to begin with
                        _tableDidNotExistAtStartOfLoad = true;
                    }
                    else
                    {
                        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning,
                                                                    "A table called " + tblName + " already exists on server " + TargetDatabaseServer +
                                                                    ", data load might crash if it is populated and/or has an incompatible schema"));
                    }
                }
                else
                {
                    _tableDidNotExistAtStartOfLoad = true;
                }
            }
            catch (Exception e)
            {
                //Probably the database didn't exist or the credentials were wrong or something
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Failed to inspect destination for already existing datatables", e));
            }

            _destination = new DataTableUploadDestination();

            PrimeDestinationTypesBasedOnCatalogueTypes(listener, toProcess);

            _destination.AllowResizingColumnsAtUploadTime = true;
            _destination.AlterTimeout = AlterTimeout;

            _destination.PreInitialize(_destinationDatabase, listener);

            return(_destination);
        }
コード例 #5
0
 protected override void PreInitializeImpl(IExtractCommand value, IDataLoadEventListener listener)
 {
 }
コード例 #6
0
 public void SetAdditionalProgressListener(IDataLoadEventListener listener)
 {
     fork = new ForkDataLoadEventListener(progressUI1, listener);
 }
コード例 #7
0
        public void Execute(IDataLoadEventListener listener)
        {
            try
            {
                ExtractCommand.ElevateState(ExtractCommandState.WaitingToExecute);
                var engine = GetEngine(_pipeline, listener);

                try
                {
                    engine.ExecutePipeline(Token ?? new GracefulCancellationToken());
                    listener.OnNotify(Destination, new NotifyEventArgs(ProgressEventType.Information, "Extraction completed successfully into : " + Destination.GetDestinationDescription()));
                }
                catch (Exception e)
                {
                    ExtractCommand.ElevateState(ExtractCommandState.Crashed);
                    _dataLoadInfo.LogFatalError("Execute extraction pipeline", ExceptionHelper.ExceptionToListOfInnerMessages(e, true));

                    if (ExtractCommand is ExtractDatasetCommand)
                    {
                        //audit to extraction results
                        var result = (ExtractCommand as ExtractDatasetCommand).CumulativeExtractionResults;
                        result.Exception = ExceptionHelper.ExceptionToListOfInnerMessages(e, true);
                        result.SaveToDatabase();
                    }
                    else
                    {
                        //audit to extraction results
                        var result = (ExtractCommand as ExtractGlobalsCommand).ExtractionResults;
                        foreach (var extractionResults in result)
                        {
                            extractionResults.Exception = ExceptionHelper.ExceptionToListOfInnerMessages(e, true);
                            extractionResults.SaveToDatabase();
                        }
                    }

                    //throw so it can be audited to UI (triple audit yay!)
                    throw new Exception("An error occurred while executing pipeline", e);
                }

                if (Source == null)
                {
                    throw new Exception("Execute Pipeline completed without Exception but Source was null somehow?!");
                }

                if (Source.WasCancelled)
                {
                    Destination.TableLoadInfo.DataLoadInfoParent.LogFatalError(this.GetType().Name, "User Cancelled Extraction");
                    ExtractCommand.ElevateState(ExtractCommandState.UserAborted);

                    if (ExtractCommand is ExtractDatasetCommand)
                    {
                        //audit to extraction results
                        var result = (ExtractCommand as ExtractDatasetCommand).CumulativeExtractionResults;
                        result.Exception = "User Cancelled Extraction";
                        result.SaveToDatabase();
                    }
                    else
                    {
                        //audit to extraction results
                        var result = (ExtractCommand as ExtractGlobalsCommand).ExtractionResults;
                        foreach (var extractionResults in result)
                        {
                            extractionResults.Exception = "User Cancelled Extraction";
                            extractionResults.SaveToDatabase();
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Execute pipeline failed with Exception", ex));
                ExtractCommand.ElevateState(ExtractCommandState.Crashed);
            }

            //if it didn't crash / get aborted etc
            if (ExtractCommand.State < ExtractCommandState.WritingMetadata)
            {
                if (ExtractCommand is ExtractDatasetCommand)
                {
                    WriteMetadata(listener);
                }
                else
                {
                    ExtractCommand.ElevateState(ExtractCommandState.Completed);
                }
            }
        }
コード例 #8
0
        private bool DealWithTooFewCellsOnCurrentLine(CsvReader reader, FlatFileLine lineToPush, IDataLoadEventListener listener, FlatFileEventHandlers eventHandlers)
        {
            if (!_attemptToResolveNewlinesInRecords)
            {
                //we read too little cell count but we don't want to solve the problem
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Too few columns on line " + reader.Context.RawRow + " of file '" + _fileToLoad + "', it has too many columns (expected " + _headers.Length + " columns but line had " + lineToPush.Cells.Length + ")." + (_bufferOverrunsWhereColumnValueWasBlank > 0 ? "( " + _bufferOverrunsWhereColumnValueWasBlank + " Previously lines also suffered from buffer overruns but the overrunning values were empty so we had ignored them up until now)" : "")));
                eventHandlers.BadDataFound(lineToPush);

                //didn't bother trying to fix the problem
                return(false);
            }

            //We want to try to fix the problem by reading more data

            //Create a composite row
            List <string> newCells = new List <string>(lineToPush.Cells);

            //track what we are Reading incase it doesn't work
            var allPeekedLines = new List <FlatFileLine>();

            do
            {
                FlatFileLine peekedLine;

                //try adding the next row
                if (reader.Read())
                {
                    peekedLine = new FlatFileLine(reader.Context);

                    //peeked line was 'valid' on it's own
                    if (peekedLine.Cells.Length >= _headers.Length)
                    {
                        //queue it for reprocessing
                        PeekedRecord = peekedLine;

                        //and mark everything else as bad
                        AllBad(lineToPush, allPeekedLines, eventHandlers);
                        return(false);
                    }

                    //peeked line was invalid (too short) so we can add it onto ourselves
                    allPeekedLines.Add(peekedLine);
                }
                else
                {
                    //Ran out of space in the file without fixing the problem so it's all bad
                    AllBad(lineToPush, allPeekedLines, eventHandlers);

                    //couldn't fix the problem
                    return(false);
                }

                //add the peeked line to the current cells
                //add the first record as an extension of the last cell in current row
                if (peekedLine.Cells.Length != 0)
                {
                    newCells[newCells.Count - 1] += Environment.NewLine + peekedLine.Cells[0];
                }
                else
                {
                    newCells[newCells.Count - 1] += Environment.NewLine; //the next line was completely blank! just add a new line
                }
                //add any further cells on after that
                newCells.AddRange(peekedLine.Cells.Skip(1));
            } while (newCells.Count() < _headers.Length);


            //if we read too much or reached the end of the file
            if (newCells.Count() > _headers.Length)
            {
                AllBadExceptLastSoRequeueThatOne(lineToPush, allPeekedLines, eventHandlers);
                return(false);
            }

            if (newCells.Count() != _headers.Length)
            {
                throw new Exception("We didn't over read or reach end of file, how did we get here?");
            }

            //we managed to create a full row
            lineToPush.Cells = newCells.ToArray();

            //problem was fixed
            return(true);
        }
コード例 #9
0
        public int PushCurrentLine(CsvReader reader, FlatFileLine lineToPush, DataTable dt, IDataLoadEventListener listener, FlatFileEventHandlers eventHandlers)
        {
            //skip the blank lines
            if (lineToPush.Cells.Length == 0 || lineToPush.Cells.All(h => h.IsBasicallyNull()))
            {
                return(0);
            }

            int headerCount = _headers.CountNotNull;

            //if the number of not empty headers doesn't match the headers in the data table
            if (dt.Columns.Count != headerCount)
            {
                if (!_haveComplainedAboutColumnMismatch)
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Flat file '" + _fileToLoad.File.Name + "' line number '" + reader.Context.RawRow + "' had  " + headerCount + " columns while the destination DataTable had " + dt.Columns.Count + " columns.  This message apperas only once per file"));
                    _haveComplainedAboutColumnMismatch = true;
                }
            }

            Dictionary <string, object> rowValues = new Dictionary <string, object>();

            if (lineToPush.Cells.Length < headerCount)
            {
                if (!DealWithTooFewCellsOnCurrentLine(reader, lineToPush, listener, eventHandlers))
                {
                    return(0);
                }
            }

            bool haveIncremented_bufferOverrunsWhereColumnValueWasBlank = false;


            for (int i = 0; i < lineToPush.Cells.Length; i++)
            {
                //about to do a buffer overrun
                if (i >= _headers.Length)
                {
                    if (lineToPush[i].IsBasicallyNull())
                    {
                        if (!haveIncremented_bufferOverrunsWhereColumnValueWasBlank)
                        {
                            _bufferOverrunsWhereColumnValueWasBlank++;
                            haveIncremented_bufferOverrunsWhereColumnValueWasBlank = true;
                        }

                        continue; //do not bother buffer overruning with null whitespace stuff
                    }
                    else
                    {
                        string errorMessage = string.Format("Column mismatch on line {0} of file '{1}', it has too many columns (expected {2} columns but line had  {3})",
                                                            reader.Context.RawRow,
                                                            dt.TableName,
                                                            _headers.Length,
                                                            lineToPush.Cells.Length);

                        if (_bufferOverrunsWhereColumnValueWasBlank > 0)
                        {
                            errorMessage += " ( " + _bufferOverrunsWhereColumnValueWasBlank +
                                            " Previously lines also suffered from buffer overruns but the overrunning values were empty so we had ignored them up until now)";
                        }

                        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, errorMessage));
                        eventHandlers.BadDataFound(lineToPush);
                        break;
                    }
                }

                //if we are ignoring this header
                if (_headers.IgnoreColumnsList.Contains(_headers[i]))
                {
                    continue;
                }

                //its an empty header, dont bother populating it
                if (_headers[i].IsBasicallyNull())
                {
                    if (!lineToPush[i].IsBasicallyNull())
                    {
                        throw new FileLoadException("The header at index " + i + " in flat file '" + dt.TableName + "' had no name but there was a value in the data column (on Line number " + reader.Context.RawRow + ")");
                    }
                    else
                    {
                        continue;
                    }
                }

                //sometimes flat files have ,NULL,NULL,"bob" in instead of ,,"bob"
                if (lineToPush[i].IsBasicallyNull())
                {
                    rowValues.Add(_headers[i], DBNull.Value);
                }
                else
                {
                    object hackedValue = _hackValuesFunc(lineToPush[i]);

                    if (hackedValue is string)
                    {
                        hackedValue = ((string)hackedValue).Trim();
                    }

                    try
                    {
                        if (hackedValue is string s && typeDeciderFactory.Dictionary.ContainsKey(dt.Columns[_headers[i]].DataType))
                        {
                            hackedValue = typeDeciderFactory.Dictionary[dt.Columns[_headers[i]].DataType].Parse(s);
                        }

                        rowValues.Add(_headers[i], hackedValue);
                    }
                    catch (Exception e)
                    {
                        throw new FileLoadException("Error reading file '" + dt.TableName + "'.  Problem loading value " + lineToPush[i] + " into data table (on Line number " + reader.Context.RawRow + ") the header we were trying to populate was " + _headers[i] + " and was of datatype " + dt.Columns[_headers[i]].DataType, e);
                    }
                }
            }

            if (!BadLines.Contains(reader.Context.RawRow))
            {
                DataRow currentRow = dt.Rows.Add();
                foreach (KeyValuePair <string, object> kvp in rowValues)
                {
                    currentRow[kvp.Key] = kvp.Value;
                }

                return(1);
            }

            return(0);
        }
コード例 #10
0
 public void PreInitialize(IExtractCommand value, IDataLoadEventListener listener)
 {
     // We only want to extract the files once so lets do it as part of extracting globals
     _command = value as ExtractGlobalsCommand;
 }
コード例 #11
0
 /// <summary>
 /// Gets called once only per extraction pipeline run (at the time globals start being extracted)
 /// </summary>
 /// <param name="command"></param>
 /// <param name="listener"></param>
 /// <param name="cancellationToken"></param>
 protected abstract void MoveFiles(ExtractGlobalsCommand command, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken);
コード例 #12
0
 public void PreInitialize(ExtractionInformation value, IDataLoadEventListener listener)
 {
     _extractionInformation = value;
 }
コード例 #13
0
ファイル: ManyRunner.cs プロジェクト: 24418863/rdm
        public int Run(IRDMPPlatformRepositoryServiceLocator repositoryLocator, IDataLoadEventListener listener, ICheckNotifier checkNotifier, GracefulCancellationToken token)
        {
            RepositoryLocator = repositoryLocator;
            Token             = token;
            List <Task> tasks = new List <Task>();

            Semaphore semaphore = null;

            if (_options.MaxConcurrentExtractions != null)
            {
                semaphore = new Semaphore(_options.MaxConcurrentExtractions.Value, _options.MaxConcurrentExtractions.Value);
            }

            Initialize();

            switch (_options.Command)
            {
            case CommandLineActivity.none:
                break;

            case CommandLineActivity.run:

                object[] runnables = GetRunnables();

                foreach (object runnable in runnables)
                {
                    if (semaphore != null)
                    {
                        semaphore.WaitOne();
                    }

                    object r = runnable;
                    tasks.Add(Task.Run(() =>
                    {
                        try
                        {
                            ExecuteRun(r, new OverrideSenderIDataLoadEventListener(r.ToString(), listener));
                        }
                        finally
                        {
                            if (semaphore != null)
                            {
                                semaphore.Release();
                            }
                        }
                    }));
                }

                break;

            case CommandLineActivity.check:

                lock (_oLock)
                    _checksDictionary.Clear();

                ICheckable[] checkables = GetCheckables(checkNotifier);
                foreach (ICheckable checkable in checkables)
                {
                    if (semaphore != null)
                    {
                        semaphore.WaitOne();
                    }

                    ICheckable checkable1 = checkable;
                    var        memory     = new ToMemoryCheckNotifier(checkNotifier);

                    lock (_oLock)
                        _checksDictionary.Add(checkable1, memory);

                    tasks.Add(Task.Run(() =>
                    {
                        try
                        {
                            checkable1.Check(memory);
                        }
                        finally
                        {
                            if (semaphore != null)
                            {
                                semaphore.Release();
                            }
                        }
                    }));
                }

                break;

            default:
                throw new ArgumentOutOfRangeException();
            }

            Task.WaitAll(tasks.ToArray());

            AfterRun();

            return(0);
        }
コード例 #14
0
ファイル: FlatFileAttacher.cs プロジェクト: 24418863/rdm
 protected abstract void OpenFile(FileInfo fileToLoad, IDataLoadEventListener listener);
コード例 #15
0
        protected override void WriteRows(DataTable toProcess, IDataLoadEventListener job, GracefulCancellationToken cancellationToken, Stopwatch stopwatch)
        {
            _destination.ProcessPipelineData(toProcess, job, cancellationToken);

            LinesWritten += toProcess.Rows.Count;
        }
コード例 #16
0
        protected virtual ICacheLayout CreateCacheLayout(ICacheProgress cacheProgress, IDataLoadEventListener listener)
        {
            var pipelineFactory = new CachingPipelineUseCase(cacheProgress);
            var destination     = pipelineFactory.CreateDestinationOnly(listener);

            return(destination.CreateCacheLayout());
        }
コード例 #17
0
        public virtual void PreInitialize(AggregateConfiguration value, IDataLoadEventListener listener)
        {
            AggregateConfiguration = value;

            CohortIdentificationConfigurationIfAny = value.GetCohortIdentificationConfigurationIfAny();
        }
コード例 #18
0
        public virtual DataTable GetChunk(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            // we are in the Global Commands case, let's return an empty DataTable (not null)
            // so we can trigger the destination to extract the globals docs and sql
            if (GlobalsRequest != null)
            {
                GlobalsRequest.ElevateState(ExtractCommandState.WaitingForSQLServer);
                if (firstGlobalChunk)
                {
                    //unless we are checking, start auditing
                    StartAuditGlobals();

                    firstGlobalChunk = false;
                    return(new DataTable(ExtractionDirectory.GLOBALS_DATA_NAME));
                }

                return(null);
            }

            if (Request == null)
            {
                throw new Exception("Component has not been initialized before being asked to GetChunk(s)");
            }

            Request.ElevateState(ExtractCommandState.WaitingForSQLServer);

            if (_cancel)
            {
                throw new Exception("User cancelled data extraction");
            }

            if (_hostedSource == null)
            {
                StartAudit(Request.QueryBuilder.SQL);

                if (Request.DatasetBundle.DataSet.DisableExtraction)
                {
                    throw new Exception("Cannot extract " + Request.DatasetBundle.DataSet + " because DisableExtraction is set to true");
                }

                _hostedSource = new DbDataCommandDataFlowSource(GetCommandSQL(listener),
                                                                "ExecuteDatasetExtraction " + Request.DatasetBundle.DataSet,
                                                                Request.GetDistinctLiveDatabaseServer().Builder,
                                                                ExecutionTimeout);

                _hostedSource.AllowEmptyResultSets = AllowEmptyExtractions;
                _hostedSource.BatchSize            = BatchSize;
            }

            DataTable chunk = null;

            try
            {
                chunk = _hostedSource.GetChunk(listener, cancellationToken);

                chunk = _peeker.AddPeekedRowsIfAny(chunk);

                //if we are trying to distinct the records in memory based on release id
                if (DistinctStrategy == DistinctStrategy.OrderByAndDistinctInMemory)
                {
                    var releaseIdentifierColumn = Request.ReleaseIdentifierSubstitutions.First().GetRuntimeName();

                    if (chunk != null)
                    {
                        //last release id in the current chunk
                        var lastReleaseId = chunk.Rows[chunk.Rows.Count - 1][releaseIdentifierColumn];

                        _peeker.AddWhile(_hostedSource, r => Equals(r[releaseIdentifierColumn], lastReleaseId), chunk);
                        chunk = MakeDistinct(chunk, listener, cancellationToken);
                    }
                }
            }
            catch (AggregateException a)
            {
                if (a.GetExceptionIfExists <TaskCanceledException>() != null)
                {
                    _cancel = true;
                }

                throw;
            }
            catch (Exception e)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Read from source failed", e));
            }

            if (cancellationToken.IsCancellationRequested)
            {
                throw new Exception("Data read cancelled because our cancellationToken was set, aborting data reading");
            }

            //if the first chunk is null
            if (firstChunk && chunk == null)
            {
                throw new Exception("There is no data to load, query returned no rows, query was:" + Environment.NewLine +
                                    (_hostedSource.Sql ?? Request.QueryBuilder.SQL));
            }

            //not the first chunk anymore
            firstChunk = false;

            //data exhausted
            if (chunk == null)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Data exhausted after reading " + _rowsRead + " rows of data (" + UniqueReleaseIdentifiersEncountered.Count + " unique release identifiers seen)"));
                if (Request != null)
                {
                    Request.CumulativeExtractionResults.DistinctReleaseIdentifiersEncountered = UniqueReleaseIdentifiersEncountered.Count;
                }
                return(null);
            }

            _rowsRead += chunk.Rows.Count;
            //chunk will have datatypes for all the things in the buffer so we can populate our dictionary of facts about what columns/catalogue items have spontaneously changed name/type etc
            if (ExtractTimeTransformationsObserved == null)
            {
                GenerateExtractionTransformObservations(chunk);
            }


            //see if the SqlDataReader has a column with the same name as the ReleaseIdentifierSQL (if so then we can use it to count the number of distinct subjects written out to the csv)
            bool includesReleaseIdentifier = _extractionIdentifiersidx.Count > 0;


            //first line - lets see what columns we wrote out
            //looks at the buffer and computes any transforms performed on the column


            _timeSpentValidating.Start();
            //build up the validation report (Missing/Wrong/Etc) - this has no mechanical effect on the extracted data just some metadata that goes into a flat file
            if (ExtractionTimeValidator != null && Request.IncludeValidation)
            {
                try
                {
                    chunk.Columns.Add(ValidationColumnName);

                    ExtractionTimeValidator.Validate(chunk, ValidationColumnName);

                    _rowsValidated += chunk.Rows.Count;
                    listener.OnProgress(this, new ProgressEventArgs("Validation", new ProgressMeasurement(_rowsValidated, ProgressType.Records), _timeSpentValidating.Elapsed));
                }
                catch (Exception ex)
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Could not validate data chunk", ex));
                    ValidationFailureException = ex;
                    ExtractionTimeValidator    = null;
                }
            }
            _timeSpentValidating.Stop();

            _timeSpentBuckettingDates.Start();
            if (ExtractionTimeTimeCoverageAggregator != null)
            {
                _rowsBucketted += chunk.Rows.Count;

                foreach (DataRow row in chunk.Rows)
                {
                    ExtractionTimeTimeCoverageAggregator.ProcessRow(row);
                }

                listener.OnProgress(this, new ProgressEventArgs("Bucketting Dates", new ProgressMeasurement(_rowsBucketted, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed));
            }
            _timeSpentBuckettingDates.Stop();

            _timeSpentCalculatingDISTINCT.Start();
            //record unique release identifiers found
            if (includesReleaseIdentifier)
            {
                foreach (string idx in _extractionIdentifiersidx)
                {
                    foreach (DataRow r in chunk.Rows)
                    {
                        if (r[idx] == DBNull.Value)
                        {
                            if (_extractionIdentifiersidx.Count == 1)
                            {
                                throw new Exception("Null release identifier found in extract of dataset " + Request.DatasetBundle.DataSet);
                            }
                            else
                            {
                                continue; //there are multiple extraction identifiers thats fine if one or two are null
                            }
                        }
                        if (!UniqueReleaseIdentifiersEncountered.Contains(r[idx]))
                        {
                            UniqueReleaseIdentifiersEncountered.Add(r[idx]);
                        }
                    }

                    listener.OnProgress(this, new ProgressEventArgs("Calculating Distinct Release Identifiers", new ProgressMeasurement(UniqueReleaseIdentifiersEncountered.Count, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed));
                }
            }
            _timeSpentCalculatingDISTINCT.Stop();

            return(chunk);
        }
コード例 #19
0
 public int Run(IRDMPPlatformRepositoryServiceLocator repositoryLocator, IDataLoadEventListener listener, ICheckNotifier checkNotifier, GracefulCancellationToken token)
 {
     Activator.ShowDialog(new SingleControlForm(this));
     return(0);
 }
コード例 #20
0
 public virtual string HackExtractionSQL(string sql, IDataLoadEventListener listener)
 {
     return(sql);
 }
コード例 #21
0
 public override DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener job,
                                               GracefulCancellationToken cancellationToken)
 {
     _destinationDatabase = GetDestinationDatabase(job);
     return(base.ProcessPipelineData(toProcess, job, cancellationToken));
 }
コード例 #22
0
 public virtual void Dispose(IDataLoadEventListener job, Exception pipelineFailureExceptionIfAny)
 {
 }
コード例 #23
0
        private void PrimeDestinationTypesBasedOnCatalogueTypes(IDataLoadEventListener listener, DataTable toProcess)
        {
            //if the extraction is of a Catalogue
            var datasetCommand = _request as IExtractDatasetCommand;

            if (datasetCommand == null)
            {
                return;
            }

            //for every extractable column in the Catalogue
            foreach (var extractionInformation in datasetCommand.ColumnsToExtract.OfType <ExtractableColumn>().Select(ec => ec.CatalogueExtractionInformation))//.GetAllExtractionInformation(ExtractionCategory.Any))
            {
                if (extractionInformation == null)
                {
                    continue;
                }

                var catItem = extractionInformation.CatalogueItem;

                //if we do not know the data type or the ei is a transform
                if (catItem == null)
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, $"Did not copy Types for ExtractionInformation {extractionInformation} (ID={extractionInformation.ID}) because it had no associated CatalogueItem"));
                    continue;
                }
                if (catItem.ColumnInfo == null)
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, $"Did not copy Types for ExtractionInformation {extractionInformation} (ID={extractionInformation.ID}) because it had no associated ColumnInfo"));
                    continue;
                }

                if (extractionInformation.IsProperTransform())
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, $"Did not copy Types for ExtractionInformation {extractionInformation} (ID={extractionInformation.ID}) because it is a Transform"));
                    continue;
                }

                string destinationType = GetDestinationDatabaseType(extractionInformation);

                //Tell the destination the datatype of the ColumnInfo that underlies the ExtractionInformation (this might be changed by the ExtractionInformation e.g. as a
                //transform but it is a good starting point.  We don't want to create a varchar(10) column in the destination if the origin dataset (Catalogue) is a varchar(100)
                //since it will just confuse the user.  Bear in mind these data types can be degraded later by the destination
                var columnName = extractionInformation.Alias ?? catItem.ColumnInfo.GetRuntimeName();
                var addedType  = _destination.AddExplicitWriteType(columnName, destinationType);
                addedType.IsPrimaryKey = toProcess.PrimaryKey.Any(dc => dc.ColumnName == columnName);

                //if user wants to copy collation types and the destination server is the same type as the origin server
                if (CopyCollations && _destinationDatabase.Server.DatabaseType == catItem.ColumnInfo.TableInfo.DatabaseType)
                {
                    addedType.Collation = catItem.ColumnInfo.Collation;
                }

                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Set Type for {columnName} to {destinationType} (IsPrimaryKey={(addedType.IsPrimaryKey ? "true":"false")}) to match the source table"));
            }

            foreach (ReleaseIdentifierSubstitution sub in datasetCommand.QueryBuilder.SelectColumns.Where(sc => sc.IColumn is ReleaseIdentifierSubstitution).Select(sc => sc.IColumn))
            {
                var  columnName = sub.GetRuntimeName();
                bool isPk       = toProcess.PrimaryKey.Any(dc => dc.ColumnName == columnName);

                var addedType = _destination.AddExplicitWriteType(columnName, datasetCommand.ExtractableCohort.GetReleaseIdentifierDataType());
                addedType.IsPrimaryKey = isPk;
                addedType.AllowNulls   = !isPk;
            }
        }
コード例 #24
0
        public override void GenerateReport(Catalogue c, IDataLoadEventListener listener, CancellationToken cancellationToken)
        {
            SetupLogging(c.CatalogueRepository);

            var toDatabaseLogger = new ToLoggingDatabaseDataLoadEventListener(this, _logManager, _loggingTask, "DQE evaluation of " + c);

            var forker = new ForkDataLoadEventListener(listener, toDatabaseLogger);

            try
            {
                _catalogue = c;
                var dqeRepository = new DQERepository(c.CatalogueRepository);

                byPivotCategoryCubesOverTime.Add("ALL", new PeriodicityCubesOverTime("ALL"));
                byPivotRowStatesOverDataLoadRunId.Add("ALL", new DQEStateOverDataLoadRunId("ALL"));

                Check(new FromDataLoadEventListenerToCheckNotifier(forker));

                var sw = Stopwatch.StartNew();
                using (var con = _server.GetConnection())
                {
                    con.Open();

                    var cmd = _server.GetCommand(_queryBuilder.SQL, con);
                    cmd.CommandTimeout = 500000;

                    var t = cmd.ExecuteReaderAsync(cancellationToken);
                    t.Wait(cancellationToken);

                    if (cancellationToken.IsCancellationRequested)
                    {
                        throw new OperationCanceledException("User cancelled DQE while fetching data");
                    }

                    var r = t.Result;

                    int progress = 0;

                    while (r.Read())
                    {
                        cancellationToken.ThrowIfCancellationRequested();

                        progress++;
                        int dataLoadRunIDOfCurrentRecord = 0;
                        //to start with assume we will pass the results for the 'unknown batch' (where data load run ID is null or not available)

                        //if the DataReader is likely to have a data load run ID column
                        if (_containsDataLoadID)
                        {
                            //get data load run id
                            int?runID = dqeRepository.ObjectToNullableInt(r[_dataLoadRunFieldName]);

                            //if it has a value use it (otherwise it is null so use 0 - ugh I know, it's a primary key constraint issue)
                            if (runID != null)
                            {
                                dataLoadRunIDOfCurrentRecord = (int)runID;
                            }
                        }

                        string pivotValue = null;

                        //if the user has a pivot category configured
                        if (_pivotCategory != null)
                        {
                            pivotValue = GetStringValueForPivotField(r[_pivotCategory], forker);

                            if (!haveComplainedAboutNullCategories && string.IsNullOrWhiteSpace(pivotValue))
                            {
                                forker.OnNotify(this,
                                                new NotifyEventArgs(ProgressEventType.Warning,
                                                                    "Found a null/empty value for pivot category '" + _pivotCategory +
                                                                    "', this record will ONLY be recorded under ALL and not it's specific category, you will not be warned of further nulls because there are likely to be many if there are any"));
                                haveComplainedAboutNullCategories = true;
                                pivotValue = null;
                            }
                        }

                        //always increase the "ALL" category
                        ProcessRecord(dqeRepository, dataLoadRunIDOfCurrentRecord, r,
                                      byPivotCategoryCubesOverTime["ALL"], byPivotRowStatesOverDataLoadRunId["ALL"]);

                        //if there is a value in the current record for the pivot column
                        if (pivotValue != null)
                        {
                            //if it is a novel
                            if (!byPivotCategoryCubesOverTime.ContainsKey(pivotValue))
                            {
                                //we will need to expand the dictionaries
                                if (byPivotCategoryCubesOverTime.Keys.Count > MaximumPivotValues)
                                {
                                    throw new OverflowException(
                                              "Encountered more than " + MaximumPivotValues + " values for the pivot column " + _pivotCategory +
                                              " this will result in crazy space usage since it is a multiplicative scale of DQE tesseracts");
                                }

                                //expand both the time periodicity and the state results
                                byPivotRowStatesOverDataLoadRunId.Add(pivotValue,
                                                                      new DQEStateOverDataLoadRunId(pivotValue));
                                byPivotCategoryCubesOverTime.Add(pivotValue, new PeriodicityCubesOverTime(pivotValue));
                            }

                            //now we are sure that the dictionaries have the category field we can increment it
                            ProcessRecord(dqeRepository, dataLoadRunIDOfCurrentRecord, r,
                                          byPivotCategoryCubesOverTime[pivotValue], byPivotRowStatesOverDataLoadRunId[pivotValue]);
                        }

                        if (progress % 5000 == 0)
                        {
                            forker.OnProgress(this,
                                              new ProgressEventArgs("Processing " + _catalogue,
                                                                    new ProgressMeasurement(progress, ProgressType.Records), sw.Elapsed));
                        }
                    }
                    //final value
                    forker.OnProgress(this,
                                      new ProgressEventArgs("Processing " + _catalogue,
                                                            new ProgressMeasurement(progress, ProgressType.Records), sw.Elapsed));
                    con.Close();
                }
                sw.Stop();

                foreach (var state in byPivotRowStatesOverDataLoadRunId.Values)
                {
                    state.CalculateFinalValues();
                }

                //now commit results
                using (var con = dqeRepository.BeginNewTransactedConnection())
                {
                    try
                    {
                        //mark down that we are beginning an evaluation on this the day of our lord etc...
                        Evaluation evaluation = new Evaluation(dqeRepository, _catalogue);

                        foreach (var state in byPivotRowStatesOverDataLoadRunId.Values)
                        {
                            state.CommitToDatabase(evaluation, _catalogue, con.Connection, con.Transaction);
                        }

                        if (_timePeriodicityField != null)
                        {
                            foreach (PeriodicityCubesOverTime periodicity in byPivotCategoryCubesOverTime.Values)
                            {
                                periodicity.CommitToDatabase(evaluation);
                            }
                        }

                        con.ManagedTransaction.CommitAndCloseConnection();
                    }
                    catch (Exception)
                    {
                        con.ManagedTransaction.AbandonAndCloseConnection();
                        throw;
                    }
                }

                forker.OnNotify(this,
                                new NotifyEventArgs(ProgressEventType.Information,
                                                    "CatalogueConstraintReport completed successfully  and committed results to DQE server"));
            }
            catch (Exception e)
            {
                if (!(e is OperationCanceledException))
                {
                    forker.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Fatal Crash", e));
                }
                else
                {
                    forker.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "DQE Execution Cancelled", e));
                }
            }
            finally
            {
                toDatabaseLogger.FinalizeTableLoadInfos();
            }
        }
コード例 #25
0
        protected override void TryExtractSupportingSQLTableImpl(SupportingSQLTable sqlTable, DirectoryInfo directory, IExtractionConfiguration configuration, IDataLoadEventListener listener, out int linesWritten,
                                                                 out string destinationDescription)
        {
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to download SQL for global SupportingSQL " + sqlTable.SQL));
            using (var con = sqlTable.GetServer().GetConnection())
            {
                con.Open();

                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Connection opened successfully, about to send SQL command " + sqlTable.SQL));

                using (DataTable dt = new DataTable())
                {
                    using (var cmd = DatabaseCommandHelper.GetCommand(sqlTable.SQL, con))
                        using (var da = DatabaseCommandHelper.GetDataAdapter(cmd))
                        {
                            var sw = new Stopwatch();

                            sw.Start();
                            da.Fill(dt);
                        }

                    dt.TableName = GetTableName(_destinationDatabase.Server.GetQuerySyntaxHelper().GetSensibleEntityNameFromString(sqlTable.Name));
                    linesWritten = dt.Rows.Count;

                    var destinationDb = GetDestinationDatabase(listener);
                    var tbl           = destinationDb.ExpectTable(dt.TableName);

                    if (tbl.Exists())
                    {
                        tbl.Drop();
                    }

                    destinationDb.CreateTable(dt.TableName, dt);
                    destinationDescription = TargetDatabaseServer.ID + "|" + GetDatabaseName() + "|" + dt.TableName;
                }
            }
        }
コード例 #26
0
 public void PreInitialize(IExtractCommand value, IDataLoadEventListener listener)
 {
     _extractCommand = value as IExtractDatasetCommand;
 }
コード例 #27
0
 public override void LoadCompletedSoDispose(ExitCodeType exitCode, IDataLoadEventListener postLoadEventListener)
 {
 }
コード例 #28
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            //Things we ignore, Lookups, SupportingSql etc
            if (_extractCommand == null)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Ignoring non dataset command "));
                return(toProcess);
            }

            //if it isn't a dicom dataset don't process it
            if (!toProcess.Columns.Contains(RelativeArchiveColumnName))
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Dataset " + _extractCommand.DatasetBundle.DataSet + " did not contain field '" + RelativeArchiveColumnName + "' so we will not attempt to extract images"));
                return(toProcess);
            }

            if (_putter == null)
            {
                _putter = (IPutDicomFilesInExtractionDirectories)  new ObjectConstructor().Construct(PutterType);
            }

            var projectNumber = _extractCommand.Configuration.Project.ProjectNumber.Value;

            var mappingServer        = new MappingRepository(UIDMappingServer);
            var destinationDirectory = new DirectoryInfo(Path.Combine(_extractCommand.GetExtractionDirectory().FullName, "Images"));

            var releaseCol = _extractCommand.QueryBuilder.SelectColumns.Select(c => c.IColumn).Single(c => c.IsExtractionIdentifier);

            // See: ftp://medical.nema.org/medical/dicom/2011/11_15pu.pdf

            var flags = DicomAnonymizer.SecurityProfileOptions.BasicProfile |
                        DicomAnonymizer.SecurityProfileOptions.CleanStructdCont |
                        DicomAnonymizer.SecurityProfileOptions.CleanDesc |
                        DicomAnonymizer.SecurityProfileOptions.CleanGraph |
                        DicomAnonymizer.SecurityProfileOptions.RetainLongFullDates |
                        DicomAnonymizer.SecurityProfileOptions.RetainUIDs;

            if (RetainDates)
            {
                flags = flags | DicomAnonymizer.SecurityProfileOptions.RetainLongFullDates;
            }

            var profile = DicomAnonymizer.SecurityProfile.LoadProfile(null, flags);

            var anonymiser = new DicomAnonymizer(profile);

            using (var pool = new ZipPool())
            {
                _sw.Start();

                foreach (DataRow row in toProcess.Rows)
                {
                    cancellationToken.ThrowIfAbortRequested();

                    var path = new AmbiguousFilePath(ArchiveRootIfAny, (string)row[RelativeArchiveColumnName]);

                    var dicomFile = path.GetDataset(pool);

                    //get the new patient ID
                    var releaseId = row[releaseCol.GetRuntimeName()].ToString();

                    var ds = anonymiser.Anonymize(dicomFile.Dataset);

                    //now we want to explicitly use our own release Id regardless of what FoDicom said
                    ds.AddOrUpdate(DicomTag.PatientID, releaseId);

                    //rewrite the UIDs
                    foreach (var kvp in UIDMapping.SupportedTags)
                    {
                        if (!ds.Contains(kvp.Key))
                        {
                            continue;
                        }

                        var value = ds.GetValue <string>(kvp.Key, 0);

                        //if it has a value for this UID
                        if (value != null)
                        {
                            var releaseValue = mappingServer.GetOrAllocateMapping(value, projectNumber, kvp.Value);

                            //change value in dataset
                            ds.AddOrUpdate(kvp.Key, releaseValue);

                            //and change value in DataTable
                            if (toProcess.Columns.Contains(kvp.Key.DictionaryEntry.Keyword))
                            {
                                row[kvp.Key.DictionaryEntry.Keyword] = releaseValue;
                            }
                        }
                    }

                    var newPath = _putter.WriteOutDataset(destinationDirectory, releaseId, ds);
                    row[RelativeArchiveColumnName] = newPath;

                    _anonymisedImagesCount++;

                    listener.OnProgress(this, new ProgressEventArgs("Writing ANO images", new ProgressMeasurement(_anonymisedImagesCount, ProgressType.Records), _sw.Elapsed));
                }

                _sw.Stop();
            }

            return(toProcess);
        }
コード例 #29
0
 public void Abort(IDataLoadEventListener listener)
 {
 }
コード例 #30
0
 public void Abort(IDataLoadEventListener listener)
 {
     CloseReader(listener);
 }