Exemple #1
0
        public virtual DataTable TryGetPreview()
        {
            if (Request == ExtractDatasetCommand.EmptyCommand)
            {
                return(new DataTable());
            }

            DataTable toReturn = new DataTable();
            var       server   = _catalogue.GetDistinctLiveDatabaseServer(DataAccessContext.DataExport, false);

            using (var con = server.GetConnection())
            {
                con.Open();

                var da = server.GetDataAdapter(Request.QueryBuilder.SQL, con);

                //get up to 1000 records
                da.Fill(0, 1000, toReturn);

                con.Close();
            }

            return(toReturn);
        }
Exemple #2
0
        private void Initialize()
        {
            //Figure out which UID columns exist in the Catalogue, do not require file path to be in Catalogue
            _columnSet = QueryToExecuteColumnSet.Create(_catalogue, false);

            //Tells us the DBMS type
            var syntax = _catalogue.GetQuerySyntaxHelper();

            //For storing the OR container and filter(s)
            var memory = new MemoryCatalogueRepository();

            //builds SQL we will run in lookup stage
            _queryBuilder = new QueryBuilder(null, null);

            //all we care about is if the uid appears if it does then we are rejecting it
            _queryBuilder.TopX = 1;

            //Filter is OR i.e. StudyInstanceUID = @StudyInstanceUID OR SeriesInstanceUID = @SeriesInstanceUID
            var container = _queryBuilder.RootFilterContainer = new SpontaneouslyInventedFilterContainer(memory, null, null, FilterContainerOperation.OR);

            //Build SELECT and WHERE bits of the query
            if (_columnSet.StudyTagColumn != null)
            {
                _queryBuilder.AddColumn(_columnSet.StudyTagColumn);

                string whereSql =
                    $"{_columnSet.StudyTagColumn.SelectSQL} = {syntax.ParameterSymbol}{QueryToExecuteColumnSet.DefaultStudyIdColumnName}";

                _studyFilter = new SpontaneouslyInventedFilter(memory, container, whereSql, "Study UID Filter", "", null);
                container.AddChild(_studyFilter);
            }


            if (_columnSet.SeriesTagColumn != null)
            {
                _queryBuilder.AddColumn(_columnSet.SeriesTagColumn);

                string whereSql =
                    $"{_columnSet.SeriesTagColumn.SelectSQL} = {syntax.ParameterSymbol}{QueryToExecuteColumnSet.DefaultSeriesIdColumnName}";

                _seriesFilter = new SpontaneouslyInventedFilter(memory, container, whereSql, "Series UID Filter", "", null);
                container.AddChild(_seriesFilter);
            }

            if (_columnSet.InstanceTagColumn != null)
            {
                _queryBuilder.AddColumn(_columnSet.InstanceTagColumn);

                string whereSql =
                    $"{_columnSet.InstanceTagColumn.SelectSQL} = {syntax.ParameterSymbol}{QueryToExecuteColumnSet.DefaultInstanceIdColumnName}";

                _instanceFilter = new SpontaneouslyInventedFilter(memory, container, whereSql, "Instance UID Filter", "", null);
                container.AddChild(_instanceFilter);
            }

            // Make sure the query builder looks valid
            if (!_queryBuilder.SelectColumns.Any())
            {
                throw new NotSupportedException($"Blacklist Catalogue {_catalogue} (ID={_catalogue.ID}) did not have any Core ExtractionInformation columns corresponding to any of the image UID tags (e.g. StudyInstanceUID, SeriesInstanceUID, SOPInstanceUID).");
            }

            try
            {
                // make sure we can connect to the server
                _server = _catalogue.GetDistinctLiveDatabaseServer(DataAccessContext.DataExport, true);
                _server.TestConnection();
            }
            catch (Exception e)
            {
                throw new Exception($"Failed to test connection for Catalogue {_catalogue}", e);
            }

            // run a test lookup query against the remote database
            DoLookup("test1", "test2", "test3");
        }
Exemple #3
0
        public virtual DataTable GetChunk(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            // we are in the Global Commands case, let's return an empty DataTable (not null)
            // so we can trigger the destination to extract the globals docs and sql
            if (GlobalsRequest != null)
            {
                GlobalsRequest.ElevateState(ExtractCommandState.WaitingForSQLServer);
                if (firstGlobalChunk)
                {
                    //unless we are checking, start auditing
                    StartAuditGlobals();

                    firstGlobalChunk = false;
                    return(new DataTable(ExtractionDirectory.GLOBALS_DATA_NAME));
                }

                return(null);
            }

            if (Request == null)
            {
                throw new Exception("Component has not been initialized before being asked to GetChunk(s)");
            }

            Request.ElevateState(ExtractCommandState.WaitingForSQLServer);

            if (_cancel)
            {
                throw new Exception("User cancelled data extraction");
            }

            if (_hostedSource == null)
            {
                StartAudit(Request.QueryBuilder.SQL);

                if (Request.DatasetBundle.DataSet.DisableExtraction)
                {
                    throw new Exception("Cannot extract " + Request.DatasetBundle.DataSet + " because DisableExtraction is set to true");
                }

                _hostedSource = new DbDataCommandDataFlowSource(GetCommandSQL(listener),
                                                                "ExecuteDatasetExtraction " + Request.DatasetBundle.DataSet,
                                                                _catalogue.GetDistinctLiveDatabaseServer(DataAccessContext.DataExport, false).Builder,
                                                                ExecutionTimeout);

                _hostedSource.AllowEmptyResultSets = AllowEmptyExtractions;
                _hostedSource.BatchSize            = BatchSize;
            }

            DataTable chunk = null;

            try
            {
                chunk = _hostedSource.GetChunk(listener, cancellationToken);

                chunk = _peeker.AddPeekedRowsIfAny(chunk);

                //if we are trying to distinct the records in memory based on release id
                if (DistinctStrategy == DistinctStrategy.OrderByAndDistinctInMemory)
                {
                    var releaseIdentifierColumn = Request.ReleaseIdentifierSubstitutions.First().GetRuntimeName();

                    if (chunk != null)
                    {
                        //last release id in the current chunk
                        var lastReleaseId = chunk.Rows[chunk.Rows.Count - 1][releaseIdentifierColumn];

                        _peeker.AddWhile(_hostedSource, r => Equals(r[releaseIdentifierColumn], lastReleaseId), chunk);
                        chunk = MakeDistinct(chunk, listener, cancellationToken);
                    }
                }
            }
            catch (AggregateException a)
            {
                if (a.GetExceptionIfExists <TaskCanceledException>() != null)
                {
                    _cancel = true;
                }

                throw;
            }
            catch (Exception e)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Read from source failed", e));
            }

            if (cancellationToken.IsCancellationRequested)
            {
                throw new Exception("Data read cancelled because our cancellationToken was set, aborting data reading");
            }

            //if the first chunk is null
            if (firstChunk && chunk == null)
            {
                throw new Exception("There is no data to load, query returned no rows, query was:" + Environment.NewLine + Request.QueryBuilder.SQL);
            }

            //not the first chunk anymore
            firstChunk = false;

            //data exhausted
            if (chunk == null)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Data exhausted after reading " + _rowsRead + " rows of data (" + UniqueReleaseIdentifiersEncountered.Count + " unique release identifiers seen)"));
                if (Request != null)
                {
                    Request.CumulativeExtractionResults.DistinctReleaseIdentifiersEncountered = UniqueReleaseIdentifiersEncountered.Count;
                }
                return(null);
            }

            _rowsRead += chunk.Rows.Count;
            //chunk will have datatypes for all the things in the buffer so we can populate our dictionary of facts about what columns/catalogue items have spontaneously changed name/type etc
            if (ExtractTimeTransformationsObserved == null)
            {
                GenerateExtractionTransformObservations(chunk);
            }


            //see if the SqlDataReader has a column with the same name as the ReleaseIdentifierSQL (if so then we can use it to count the number of distinct subjects written out to the csv)
            bool includesReleaseIdentifier = _extractionIdentifiersidx.Count > 0;


            //first line - lets see what columns we wrote out
            //looks at the buffer and computes any transforms performed on the column


            _timeSpentValidating.Start();
            //build up the validation report (Missing/Wrong/Etc) - this has no mechanical effect on the extracted data just some metadata that goes into a flat file
            if (ExtractionTimeValidator != null && Request.IncludeValidation)
            {
                try
                {
                    chunk.Columns.Add(ValidationColumnName);

                    ExtractionTimeValidator.Validate(chunk, ValidationColumnName);

                    _rowsValidated += chunk.Rows.Count;
                    listener.OnProgress(this, new ProgressEventArgs("Validation", new ProgressMeasurement(_rowsValidated, ProgressType.Records), _timeSpentValidating.Elapsed));
                }
                catch (Exception ex)
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Could not validate data chunk", ex));
                    ValidationFailureException = ex;
                    ExtractionTimeValidator    = null;
                }
            }
            _timeSpentValidating.Stop();

            _timeSpentBuckettingDates.Start();
            if (ExtractionTimeTimeCoverageAggregator != null)
            {
                _rowsBucketted += chunk.Rows.Count;

                foreach (DataRow row in chunk.Rows)
                {
                    ExtractionTimeTimeCoverageAggregator.ProcessRow(row);
                }

                listener.OnProgress(this, new ProgressEventArgs("Bucketting Dates", new ProgressMeasurement(_rowsBucketted, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed));
            }
            _timeSpentBuckettingDates.Stop();

            _timeSpentCalculatingDISTINCT.Start();
            //record unique release identifiers found
            if (includesReleaseIdentifier)
            {
                foreach (string idx in _extractionIdentifiersidx)
                {
                    foreach (DataRow r in chunk.Rows)
                    {
                        if (r[idx] == DBNull.Value)
                        {
                            if (_extractionIdentifiersidx.Count == 1)
                            {
                                throw new Exception("Null release identifier found in extract of dataset " + Request.DatasetBundle.DataSet);
                            }
                            else
                            {
                                continue; //there are multiple extraction identifiers thats fine if one or two are null
                            }
                        }
                        if (!UniqueReleaseIdentifiersEncountered.Contains(r[idx]))
                        {
                            UniqueReleaseIdentifiersEncountered.Add(r[idx]);
                        }
                    }

                    listener.OnProgress(this, new ProgressEventArgs("Calculating Distinct Release Identifiers", new ProgressMeasurement(UniqueReleaseIdentifiersEncountered.Count, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed));
                }
            }
            _timeSpentCalculatingDISTINCT.Stop();

            return(chunk);
        }