public virtual DataTable TryGetPreview() { if (Request == ExtractDatasetCommand.EmptyCommand) { return(new DataTable()); } DataTable toReturn = new DataTable(); var server = _catalogue.GetDistinctLiveDatabaseServer(DataAccessContext.DataExport, false); using (var con = server.GetConnection()) { con.Open(); var da = server.GetDataAdapter(Request.QueryBuilder.SQL, con); //get up to 1000 records da.Fill(0, 1000, toReturn); con.Close(); } return(toReturn); }
private void Initialize() { //Figure out which UID columns exist in the Catalogue, do not require file path to be in Catalogue _columnSet = QueryToExecuteColumnSet.Create(_catalogue, false); //Tells us the DBMS type var syntax = _catalogue.GetQuerySyntaxHelper(); //For storing the OR container and filter(s) var memory = new MemoryCatalogueRepository(); //builds SQL we will run in lookup stage _queryBuilder = new QueryBuilder(null, null); //all we care about is if the uid appears if it does then we are rejecting it _queryBuilder.TopX = 1; //Filter is OR i.e. StudyInstanceUID = @StudyInstanceUID OR SeriesInstanceUID = @SeriesInstanceUID var container = _queryBuilder.RootFilterContainer = new SpontaneouslyInventedFilterContainer(memory, null, null, FilterContainerOperation.OR); //Build SELECT and WHERE bits of the query if (_columnSet.StudyTagColumn != null) { _queryBuilder.AddColumn(_columnSet.StudyTagColumn); string whereSql = $"{_columnSet.StudyTagColumn.SelectSQL} = {syntax.ParameterSymbol}{QueryToExecuteColumnSet.DefaultStudyIdColumnName}"; _studyFilter = new SpontaneouslyInventedFilter(memory, container, whereSql, "Study UID Filter", "", null); container.AddChild(_studyFilter); } if (_columnSet.SeriesTagColumn != null) { _queryBuilder.AddColumn(_columnSet.SeriesTagColumn); string whereSql = $"{_columnSet.SeriesTagColumn.SelectSQL} = {syntax.ParameterSymbol}{QueryToExecuteColumnSet.DefaultSeriesIdColumnName}"; _seriesFilter = new SpontaneouslyInventedFilter(memory, container, whereSql, "Series UID Filter", "", null); container.AddChild(_seriesFilter); } if (_columnSet.InstanceTagColumn != null) { _queryBuilder.AddColumn(_columnSet.InstanceTagColumn); string whereSql = $"{_columnSet.InstanceTagColumn.SelectSQL} = {syntax.ParameterSymbol}{QueryToExecuteColumnSet.DefaultInstanceIdColumnName}"; _instanceFilter = new SpontaneouslyInventedFilter(memory, container, whereSql, "Instance UID Filter", "", null); container.AddChild(_instanceFilter); } // Make sure the query builder looks valid if (!_queryBuilder.SelectColumns.Any()) { throw new NotSupportedException($"Blacklist Catalogue {_catalogue} (ID={_catalogue.ID}) did not have any Core ExtractionInformation columns corresponding to any of the image UID tags (e.g. StudyInstanceUID, SeriesInstanceUID, SOPInstanceUID)."); } try { // make sure we can connect to the server _server = _catalogue.GetDistinctLiveDatabaseServer(DataAccessContext.DataExport, true); _server.TestConnection(); } catch (Exception e) { throw new Exception($"Failed to test connection for Catalogue {_catalogue}", e); } // run a test lookup query against the remote database DoLookup("test1", "test2", "test3"); }
public virtual DataTable GetChunk(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken) { // we are in the Global Commands case, let's return an empty DataTable (not null) // so we can trigger the destination to extract the globals docs and sql if (GlobalsRequest != null) { GlobalsRequest.ElevateState(ExtractCommandState.WaitingForSQLServer); if (firstGlobalChunk) { //unless we are checking, start auditing StartAuditGlobals(); firstGlobalChunk = false; return(new DataTable(ExtractionDirectory.GLOBALS_DATA_NAME)); } return(null); } if (Request == null) { throw new Exception("Component has not been initialized before being asked to GetChunk(s)"); } Request.ElevateState(ExtractCommandState.WaitingForSQLServer); if (_cancel) { throw new Exception("User cancelled data extraction"); } if (_hostedSource == null) { StartAudit(Request.QueryBuilder.SQL); if (Request.DatasetBundle.DataSet.DisableExtraction) { throw new Exception("Cannot extract " + Request.DatasetBundle.DataSet + " because DisableExtraction is set to true"); } _hostedSource = new DbDataCommandDataFlowSource(GetCommandSQL(listener), "ExecuteDatasetExtraction " + Request.DatasetBundle.DataSet, _catalogue.GetDistinctLiveDatabaseServer(DataAccessContext.DataExport, false).Builder, ExecutionTimeout); _hostedSource.AllowEmptyResultSets = AllowEmptyExtractions; _hostedSource.BatchSize = BatchSize; } DataTable chunk = null; try { chunk = _hostedSource.GetChunk(listener, cancellationToken); chunk = _peeker.AddPeekedRowsIfAny(chunk); //if we are trying to distinct the records in memory based on release id if (DistinctStrategy == DistinctStrategy.OrderByAndDistinctInMemory) { var releaseIdentifierColumn = Request.ReleaseIdentifierSubstitutions.First().GetRuntimeName(); if (chunk != null) { //last release id in the current chunk var lastReleaseId = chunk.Rows[chunk.Rows.Count - 1][releaseIdentifierColumn]; _peeker.AddWhile(_hostedSource, r => Equals(r[releaseIdentifierColumn], lastReleaseId), chunk); chunk = MakeDistinct(chunk, listener, cancellationToken); } } } catch (AggregateException a) { if (a.GetExceptionIfExists <TaskCanceledException>() != null) { _cancel = true; } throw; } catch (Exception e) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Read from source failed", e)); } if (cancellationToken.IsCancellationRequested) { throw new Exception("Data read cancelled because our cancellationToken was set, aborting data reading"); } //if the first chunk is null if (firstChunk && chunk == null) { throw new Exception("There is no data to load, query returned no rows, query was:" + Environment.NewLine + Request.QueryBuilder.SQL); } //not the first chunk anymore firstChunk = false; //data exhausted if (chunk == null) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Data exhausted after reading " + _rowsRead + " rows of data (" + UniqueReleaseIdentifiersEncountered.Count + " unique release identifiers seen)")); if (Request != null) { Request.CumulativeExtractionResults.DistinctReleaseIdentifiersEncountered = UniqueReleaseIdentifiersEncountered.Count; } return(null); } _rowsRead += chunk.Rows.Count; //chunk will have datatypes for all the things in the buffer so we can populate our dictionary of facts about what columns/catalogue items have spontaneously changed name/type etc if (ExtractTimeTransformationsObserved == null) { GenerateExtractionTransformObservations(chunk); } //see if the SqlDataReader has a column with the same name as the ReleaseIdentifierSQL (if so then we can use it to count the number of distinct subjects written out to the csv) bool includesReleaseIdentifier = _extractionIdentifiersidx.Count > 0; //first line - lets see what columns we wrote out //looks at the buffer and computes any transforms performed on the column _timeSpentValidating.Start(); //build up the validation report (Missing/Wrong/Etc) - this has no mechanical effect on the extracted data just some metadata that goes into a flat file if (ExtractionTimeValidator != null && Request.IncludeValidation) { try { chunk.Columns.Add(ValidationColumnName); ExtractionTimeValidator.Validate(chunk, ValidationColumnName); _rowsValidated += chunk.Rows.Count; listener.OnProgress(this, new ProgressEventArgs("Validation", new ProgressMeasurement(_rowsValidated, ProgressType.Records), _timeSpentValidating.Elapsed)); } catch (Exception ex) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Could not validate data chunk", ex)); ValidationFailureException = ex; ExtractionTimeValidator = null; } } _timeSpentValidating.Stop(); _timeSpentBuckettingDates.Start(); if (ExtractionTimeTimeCoverageAggregator != null) { _rowsBucketted += chunk.Rows.Count; foreach (DataRow row in chunk.Rows) { ExtractionTimeTimeCoverageAggregator.ProcessRow(row); } listener.OnProgress(this, new ProgressEventArgs("Bucketting Dates", new ProgressMeasurement(_rowsBucketted, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed)); } _timeSpentBuckettingDates.Stop(); _timeSpentCalculatingDISTINCT.Start(); //record unique release identifiers found if (includesReleaseIdentifier) { foreach (string idx in _extractionIdentifiersidx) { foreach (DataRow r in chunk.Rows) { if (r[idx] == DBNull.Value) { if (_extractionIdentifiersidx.Count == 1) { throw new Exception("Null release identifier found in extract of dataset " + Request.DatasetBundle.DataSet); } else { continue; //there are multiple extraction identifiers thats fine if one or two are null } } if (!UniqueReleaseIdentifiersEncountered.Contains(r[idx])) { UniqueReleaseIdentifiersEncountered.Add(r[idx]); } } listener.OnProgress(this, new ProgressEventArgs("Calculating Distinct Release Identifiers", new ProgressMeasurement(UniqueReleaseIdentifiersEncountered.Count, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed)); } } _timeSpentCalculatingDISTINCT.Stop(); return(chunk); }