private void AsyncCopyMDFFilesWithEvents(string MDFSource, string MDFDestination, string LDFSource, string LDFDestination, IDataLoadEventListener job) { Stopwatch s = new Stopwatch(); s.Start(); job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Starting copy from " + MDFSource + " to " + MDFDestination)); job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Starting copy from " + LDFSource + " to " + LDFDestination)); CopyWithProgress copyMDF = new CopyWithProgress(); copyMDF.Progress += (size, transferred, streamSize, bytesTransferred, number, reason, file, destinationFile, data) => { job.OnProgress(this, new ProgressEventArgs(MDFDestination, new ProgressMeasurement((int)(transferred * 0.001), ProgressType.Kilobytes), s.Elapsed)); return(CopyWithProgress.CopyProgressResult.PROGRESS_CONTINUE); }; copyMDF.XCopy(MDFSource, MDFDestination); s.Reset(); s.Start(); CopyWithProgress copyLDF = new CopyWithProgress(); copyLDF.Progress += (size, transferred, streamSize, bytesTransferred, number, reason, file, destinationFile, data) => { job.OnProgress(this, new ProgressEventArgs(LDFDestination, new ProgressMeasurement((int)(transferred * 0.001), ProgressType.Kilobytes), s.Elapsed)); return(CopyWithProgress.CopyProgressResult.PROGRESS_CONTINUE); }; copyLDF.XCopy(LDFSource, LDFDestination); s.Stop(); }
public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken) { bool didAno = false; stopwatch_TimeSpentTransforming.Start(); if (!_bInitialized) { throw new Exception("Not Initialized yet"); } recordsProcessedSoFar += toProcess.Rows.Count; var missingColumns = columnsToAnonymise.Keys.Where(k => !toProcess.Columns.Cast <DataColumn>().Any(c => c.ColumnName.Equals(k))).ToArray(); if (missingColumns.Any()) { throw new KeyNotFoundException("The following columns (which have ANO Transforms on them) were missing from the DataTable:" + Environment.NewLine + string.Join(Environment.NewLine, missingColumns) + Environment.NewLine + "The columns found in the DataTable were:" + Environment.NewLine + string.Join(Environment.NewLine, toProcess.Columns.Cast <DataColumn>().Select(c => c.ColumnName))); } //Dump Identifiers stopwatch_TimeSpentDumping.Start(); _dumper.DumpAllIdentifiersInTable(toProcess); //do the dumping of all the rest of the columns (those that must disapear from pipeline as opposed to those above which were substituted for ANO versions) stopwatch_TimeSpentDumping.Stop(); if (_dumper.HaveDumpedRecords) { listener.OnProgress(this, new ProgressEventArgs("Dump Identifiers", new ProgressMeasurement(recordsProcessedSoFar, ProgressType.Records), stopwatch_TimeSpentDumping.Elapsed));//time taken to dump identifiers } //Process ANO Identifier Substitutions //for each column with an ANOTrasformer foreach (KeyValuePair <string, ANOTransformer> kvp in columnsToAnonymise) { didAno = true; var column = kvp.Key; ANOTransformer transformer = kvp.Value; //add an ANO version DataColumn ANOColumn = new DataColumn(ANOTable.ANOPrefix + column); toProcess.Columns.Add(ANOColumn); //populate ANO version transformer.Transform(toProcess, toProcess.Columns[column], ANOColumn); //drop the non ANO version toProcess.Columns.Remove(column); } stopwatch_TimeSpentTransforming.Stop(); if (didAno) { listener.OnProgress(this, new ProgressEventArgs("Anonymise Identifiers", new ProgressMeasurement(recordsProcessedSoFar, ProgressType.Records), stopwatch_TimeSpentTransforming.Elapsed)); //time taken to swap ANO identifiers } return(toProcess); }
public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener job, GracefulCancellationToken cancellationToken) { _request.ElevateState(ExtractCommandState.WritingToFile); if (!haveWrittenBundleContents && _request is ExtractDatasetCommand) { WriteBundleContents(((ExtractDatasetCommand)_request).DatasetBundle, job, cancellationToken); } if (_request is ExtractGlobalsCommand) { ExtractGlobals((ExtractGlobalsCommand)_request, job, _dataLoadInfo); return(null); } stopwatch.Start(); if (!haveOpened) { haveOpened = true; _output.Open(); _output.WriteHeaders(toProcess); LinesWritten = 0; //create an audit object TableLoadInfo = new TableLoadInfo(_dataLoadInfo, "", OutputFile, new DataSource[] { new DataSource(_request.DescribeExtractionImplementation(), DateTime.Now) }, -1); } foreach (DataRow row in toProcess.Rows) { _output.Append(row); LinesWritten++; if (TableLoadInfo.IsClosed) { throw new Exception("TableLoadInfo was closed so could not write number of rows (" + LinesWritten + ") to audit object - most likely the extraction crashed?"); } else { TableLoadInfo.Inserts = LinesWritten; } if (LinesWritten % 1000 == 0) { job.OnProgress(this, new ProgressEventArgs("Write to file " + OutputFile, new ProgressMeasurement(LinesWritten, ProgressType.Records), stopwatch.Elapsed)); } } job.OnProgress(this, new ProgressEventArgs("Write to file " + OutputFile, new ProgressMeasurement(LinesWritten, ProgressType.Records), stopwatch.Elapsed)); stopwatch.Stop(); _output.Flush(); return(null); }
protected override void WriteRows(DataTable toProcess, IDataLoadEventListener job, GracefulCancellationToken cancellationToken, Stopwatch stopwatch) { foreach (DataRow row in toProcess.Rows) { _output.Append(row); LinesWritten++; if (LinesWritten % 1000 == 0) { job.OnProgress(this, new ProgressEventArgs("Write to file " + OutputFile, new ProgressMeasurement(LinesWritten, ProgressType.Records), stopwatch.Elapsed)); } } job.OnProgress(this, new ProgressEventArgs("Write to file " + OutputFile, new ProgressMeasurement(LinesWritten, ProgressType.Records), stopwatch.Elapsed)); }
private void ProcessDir(string dir, DataTable dt, IDataLoadEventListener listener) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Starting '{dir}'")); if (File.Exists(dir)) { // the inventory entry is a xml file directly :o XmlToRows(dir, dt, listener); return; } if (!Directory.Exists(dir)) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, $"'{dir}' was not a Directory or File")); return; } var matches = Directory.GetFiles(dir, SearchPattern, SearchOption.AllDirectories); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Found {matches.Length} CFind files in {dir}")); foreach (var file in matches) { XmlToRows(file, dt, listener); if (filesRead++ % 10000 == 0) { listener.OnProgress(this, new ProgressEventArgs("Reading files", new ProgressMeasurement(filesRead, ProgressType.Records, matches.Length), timer?.Elapsed ?? TimeSpan.Zero)); } } }
public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken) { _sw.Start(); DataTable outputTable = new DataTable(); foreach (DataColumn dataColumn in toProcess.Columns) { outputTable.Columns.Add(dataColumn.ColumnName, dataColumn.DataType); } Regex regex = DeleteRowsWhereValuesMatch ?? new Regex(DeleteRowsWhereValuesMatchStandard.Regex); foreach (DataRow row in toProcess.Rows) { var val = row[ColumnNameToFind]; //keep nulls, dbnulls or anything where ToString doesn't match the regex if (val == null || val == DBNull.Value || !regex.IsMatch(val.ToString())) { outputTable.ImportRow(row); } else { _deleted++; } } listener.OnProgress(this, new ProgressEventArgs("Deleting Rows", new ProgressMeasurement(_deleted, ProgressType.Records), _sw.Elapsed)); _sw.Stop(); return(outputTable); }
public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener job, GracefulCancellationToken cancellationToken) { timer.Start(); StartAgain: foreach (DataRow row in toProcess.Rows) { for (int i = 0; i < columnsToClean.Count; i++) { string toClean = columnsToClean[i]; string val = null; try { object o = row[toClean]; if (o == DBNull.Value || o == null) { continue; } if (!(o is string)) { throw new ArgumentException("Despite being marked as a string column, object found in column " + toClean + " was of type " + o.GetType()); } val = o as string; } catch (ArgumentException e) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, e.Message)); //column could not be found columnsToClean.Remove(columnsToClean[i]); goto StartAgain; } //it is empty if (string.IsNullOrWhiteSpace(val)) { row[toClean] = DBNull.Value; } else { //trim it var valAfterClean = val.Trim(); //set it if (val != valAfterClean) { row[toClean] = valAfterClean; } } } _rowsProcessed++; } timer.Stop(); job.OnProgress(this, new ProgressEventArgs(_taskDescription, new ProgressMeasurement(_rowsProcessed, ProgressType.Records), timer.Elapsed)); return(toProcess); }
protected override void Download(string file, ILoadDirectory destination, IDataLoadEventListener job) { if (file.Contains("/") || file.Contains("\\")) { throw new Exception("Was not expecting a relative path here"); } Stopwatch s = new Stopwatch(); s.Start(); using (var sftp = new SftpClient(_host, _username, _password)) { sftp.ConnectionInfo.Timeout = new TimeSpan(0, 0, 0, TimeoutInSeconds); sftp.Connect(); //if there is a specified remote directory then reference it otherwise reference it locally (or however we were told about it from GetFileList()) string fullFilePath = !string.IsNullOrWhiteSpace(RemoteDirectory) ? Path.Combine(RemoteDirectory, file) : file; string destinationFilePath = Path.Combine(destination.ForLoading.FullName, file); //register for events Action <ulong> callback = (totalBytes) => job.OnProgress(this, new ProgressEventArgs(destinationFilePath, new ProgressMeasurement((int)(totalBytes * 0.001), ProgressType.Kilobytes), s.Elapsed)); using (var fs = new FileStream(destinationFilePath, FileMode.CreateNew)) { //download sftp.DownloadFile(fullFilePath, fs, callback); fs.Close(); } _filesRetrieved.Add(fullFilePath); } s.Stop(); }
private void WriteBundleContents(IExtractableDatasetBundle datasetBundle, IDataLoadEventListener job, GracefulCancellationToken cancellationToken) { var rootDir = _request.GetExtractionDirectory(); var supportingSQLFolder = new DirectoryInfo(Path.Combine(rootDir.FullName, SupportingSQLTable.ExtractionFolderName)); var lookupDir = rootDir.CreateSubdirectory("Lookups"); //extract the documents foreach (SupportingDocument doc in datasetBundle.Documents) { datasetBundle.States[doc] = TryExtractSupportingDocument(rootDir, doc, job) ? ExtractCommandState.Completed : ExtractCommandState.Crashed; } //extract supporting SQL foreach (SupportingSQLTable sql in datasetBundle.SupportingSQL) { datasetBundle.States[sql] = TryExtractSupportingSQLTable(supportingSQLFolder, _request.Configuration, sql, job, _dataLoadInfo) ? ExtractCommandState.Completed : ExtractCommandState.Crashed; } //extract lookups foreach (BundledLookupTable lookup in datasetBundle.LookupTables) { try { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to extract lookup " + lookup)); var server = DataAccessPortal.GetInstance().ExpectServer(lookup.TableInfo, DataAccessContext.DataExport); Stopwatch sw = new Stopwatch(); sw.Start(); //extracts all of them var extractTableVerbatim = new ExtractTableVerbatim(server, new [] { lookup.TableInfo.Name }, lookupDir, _request.Configuration.Separator, DateFormat); int linesWritten = extractTableVerbatim.DoExtraction(); sw.Stop(); job.OnProgress(this, new ProgressEventArgs("Lookup " + lookup, new ProgressMeasurement(linesWritten, ProgressType.Records), sw.Elapsed)); if (_request is ExtractDatasetCommand) { var result = (_request as ExtractDatasetCommand).CumulativeExtractionResults; var supplementalResult = result.AddSupplementalExtractionResult("SELECT * FROM " + lookup.TableInfo.Name, lookup.TableInfo); supplementalResult.CompleteAudit(this.GetType(), extractTableVerbatim.OutputFilename, linesWritten); } datasetBundle.States[lookup] = ExtractCommandState.Completed; } catch (Exception e) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Error occurred trying to extract lookup " + lookup + " on server " + lookup.TableInfo.Server, e)); datasetBundle.States[lookup] = ExtractCommandState.Crashed; } } haveWrittenBundleContents = true; }
protected bool TryExtractSupportingSQLTable(SupportingSQLTable sql, DirectoryInfo directory, IExtractionConfiguration configuration, IDataLoadEventListener listener, DataLoadInfo dataLoadInfo) { try { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Preparing to extract Supporting SQL " + sql + " to directory " + directory.FullName)); Stopwatch sw = new Stopwatch(); sw.Start(); //start auditing it as a table load string target = Path.Combine(directory.FullName, sql.Name + ".csv"); var tableLoadInfo = dataLoadInfo.CreateTableLoadInfo("", target, new[] { new DataSource(sql.SQL, DateTime.Now) }, -1); TryExtractSupportingSQLTableImpl(sql, directory, configuration, listener, out int sqlLinesWritten, out string description); sw.Stop(); //end auditing it tableLoadInfo.Inserts = sqlLinesWritten; tableLoadInfo.CloseAndArchive(); if (_request is ExtractDatasetCommand) { var result = (_request as ExtractDatasetCommand).CumulativeExtractionResults; var supplementalResult = result.AddSupplementalExtractionResult(sql.SQL, sql); supplementalResult.CompleteAudit(this.GetType(), description, sqlLinesWritten); } else { var extractGlobalsCommand = (_request as ExtractGlobalsCommand); Debug.Assert(extractGlobalsCommand != null, "extractGlobalsCommand != null"); var result = new SupplementalExtractionResults(extractGlobalsCommand.RepositoryLocator.DataExportRepository, extractGlobalsCommand.Configuration, sql.SQL, sql); result.CompleteAudit(this.GetType(), description, sqlLinesWritten); extractGlobalsCommand.ExtractionResults.Add(result); } listener.OnProgress(this, new ProgressEventArgs("Extract " + sql, new ProgressMeasurement(sqlLinesWritten, ProgressType.Records), sw.Elapsed)); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Extracted " + sqlLinesWritten + " records from SupportingSQL " + sql + " into directory " + directory.FullName)); return(true); } catch (Exception e) { if (e is SqlException) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Failed to run extraction SQL (make sure to fully specify all database/table/column objects completely):" + Environment.NewLine + sql.SQL, e)); } else { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Failed to extract " + sql + " into directory " + directory.FullName, e)); } return(false); } }
public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken) { sw.Start(); DataTable toReturn = toProcess.Clone(); //now sort rows foreach (DataRow row in toProcess.Rows) { totalRecordsProcessed++; int hashOfItems = GetHashCode(row.ItemArray); if (unqiueHashesSeen.ContainsKey(hashOfItems)) { //GetHashCode on ItemArray of row has been seen before but it could be a collision so call Enumerable.SequenceEqual just incase. if (unqiueHashesSeen[hashOfItems].Any(r => r.ItemArray.SequenceEqual(row.ItemArray))) { totalDuplicatesFound++; continue; //it's a duplicate } unqiueHashesSeen[hashOfItems].Add(row); } else { //its not a duplicate hashcode so add it to the return array and the record of everything we have seen so far (in order that we do not run across issues across batches) unqiueHashesSeen.Add(hashOfItems, new List <DataRow>(new[] { row })); } toReturn.Rows.Add(row.ItemArray); } sw.Stop(); if (!NoLogging) { listener.OnProgress(this, new ProgressEventArgs("Evaluating For Duplicates", new ProgressMeasurement(totalRecordsProcessed, ProgressType.Records), sw.Elapsed)); listener.OnProgress(this, new ProgressEventArgs("Discarding Duplicates", new ProgressMeasurement(totalDuplicatesFound, ProgressType.Records), sw.Elapsed)); } return(toReturn); }
private void ExtractLookupTableSql(BundledLookupTable lookup, IDataLoadEventListener listener, DataLoadInfo dataLoadInfo) { try { var tempDestination = new DataTableUploadDestination(); var server = DataAccessPortal.GetInstance().ExpectServer(lookup.TableInfo, DataAccessContext.DataExport); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to download SQL for lookup " + lookup.TableInfo.Name)); using (var con = server.GetConnection()) { con.Open(); var sqlString = "SELECT * FROM " + lookup.TableInfo.Name; listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Connection opened successfully, about to send SQL command: " + sqlString)); var cmd = DatabaseCommandHelper.GetCommand(sqlString, con); var da = DatabaseCommandHelper.GetDataAdapter(cmd); var sw = new Stopwatch(); sw.Start(); DataTable dt = new DataTable(); da.Fill(dt); dt.TableName = GetTableName(_destinationDatabase.Server.GetQuerySyntaxHelper().GetSensibleTableNameFromString(lookup.TableInfo.Name)); var tableLoadInfo = dataLoadInfo.CreateTableLoadInfo("", dt.TableName, new[] { new DataSource(sqlString, DateTime.Now) }, -1); tableLoadInfo.Inserts = dt.Rows.Count; listener.OnProgress(this, new ProgressEventArgs("Reading from Lookup " + lookup.TableInfo.Name, new ProgressMeasurement(dt.Rows.Count, ProgressType.Records), sw.Elapsed)); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Decided on the following destination table name for Lookup: " + dt.TableName)); tempDestination.AllowResizingColumnsAtUploadTime = true; tempDestination.PreInitialize(GetDestinationDatabase(listener), listener); tempDestination.ProcessPipelineData(dt, listener, new GracefulCancellationToken()); tempDestination.Dispose(listener, null); //end auditing it tableLoadInfo.CloseAndArchive(); if (_request is ExtractDatasetCommand) { var result = (_request as ExtractDatasetCommand).CumulativeExtractionResults; var supplementalResult = result.AddSupplementalExtractionResult("SELECT * FROM " + lookup.TableInfo.Name, lookup.TableInfo); supplementalResult.CompleteAudit(this.GetType(), TargetDatabaseServer.ID + "|" + GetDatabaseName() + "|" + dt.TableName, dt.Rows.Count); } } } catch (Exception e) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Extraction of Lookup " + lookup.TableInfo.Name + " failed ", e)); throw; } }
private void RaiseEvents(DataTable chunk, IDataLoadEventListener job) { if (chunk != null) { _recordsWritten += chunk.Rows.Count; if (TableLoadInfo != null) { TableLoadInfo.Inserts = _recordsWritten; } } job.OnProgress(this, new ProgressEventArgs(_taskBeingPerformed, new ProgressMeasurement(_recordsWritten, ProgressType.Records), _timer.Elapsed)); }
public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken) { GetCommonNamesTable(new ThrowImmediatelyCheckNotifier()); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Ready to process batch with row count " + toProcess.Rows.Count)); _timeProcessing.Start(); //Go through each row in the table foreach (DataRow row in toProcess.Rows) { //for each cell in current row foreach (DataColumn col in toProcess.Columns) { //if it's not a column we are skipping if (ColumnsNotToEvaluate != null && ColumnsNotToEvaluate.IsMatch(col.ColumnName)) { continue; } //if it is a string var stringValue = row[col] as string; if (stringValue != null) { //replace any common names with REDACTED foreach (var name in _commonNames) { stringValue = Regex.Replace(stringValue, name, "REDACTED", RegexOptions.IgnoreCase); } //if string value changed if (!row[col].Equals(stringValue)) { //increment the counter of redactions made _redactionsMade++; //update the cell to the new value row[col] = stringValue; } } } } _timeProcessing.Stop(); listener.OnProgress(this, new ProgressEventArgs("REDACTING Names", new ProgressMeasurement(_redactionsMade, ProgressType.Records), _timeProcessing.Elapsed)); return(toProcess); }
public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken) { _sw.Start(); Regex regex = NullCellsWhereValuesMatch ?? new Regex(NullCellsWhereValuesMatchStandard.Regex); foreach (DataRow row in toProcess.Rows) { var val = row[ColumnNameToFind]; //keep nulls, dbnulls or anything where ToString doesn't match the regex if (val != null && val != DBNull.Value && regex.IsMatch(val.ToString())) { row[ColumnNameToFind] = DBNull.Value; _changes++; } } listener.OnProgress(this, new ProgressEventArgs("SetNull Rows", new ProgressMeasurement(_changes, ProgressType.Records), _sw.Elapsed)); _sw.Stop(); return(toProcess); }
protected bool TryExtractLookupTable(BundledLookupTable lookup, DirectoryInfo lookupDir, IDataLoadEventListener job) { Stopwatch sw = new Stopwatch(); sw.Start(); job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to extract lookup " + lookup)); try { TryExtractLookupTableImpl(lookup, lookupDir, _request.Configuration, job, out int linesWritten, out string destinationDescription); sw.Stop(); job.OnProgress(this, new ProgressEventArgs("Lookup " + lookup, new ProgressMeasurement(linesWritten, ProgressType.Records), sw.Elapsed)); //audit in the log the extraction var tableLoadInfo = _dataLoadInfo.CreateTableLoadInfo("", destinationDescription, new[] { new DataSource("SELECT * FROM " + lookup.TableInfo.Name, DateTime.Now) }, -1); tableLoadInfo.Inserts = linesWritten; tableLoadInfo.CloseAndArchive(); //audit in cumulative extraction results (determines release-ability of artifacts). if (_request is ExtractDatasetCommand) { var result = (_request as ExtractDatasetCommand).CumulativeExtractionResults; var supplementalResult = result.AddSupplementalExtractionResult("SELECT * FROM " + lookup.TableInfo.Name, lookup.TableInfo); supplementalResult.CompleteAudit(this.GetType(), destinationDescription, linesWritten); } return(true); } catch (Exception e) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Error occurred trying to extract lookup " + lookup + " on server " + lookup.TableInfo.Server, e)); return(false); } }
public virtual DataTable GetChunk(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken) { // we are in the Global Commands case, let's return an empty DataTable (not null) // so we can trigger the destination to extract the globals docs and sql if (GlobalsRequest != null) { GlobalsRequest.ElevateState(ExtractCommandState.WaitingForSQLServer); if (firstGlobalChunk) { //unless we are checking, start auditing StartAuditGlobals(); firstGlobalChunk = false; return(new DataTable(ExtractionDirectory.GLOBALS_DATA_NAME)); } return(null); } if (Request == null) { throw new Exception("Component has not been initialized before being asked to GetChunk(s)"); } Request.ElevateState(ExtractCommandState.WaitingForSQLServer); if (_cancel) { throw new Exception("User cancelled data extraction"); } if (_hostedSource == null) { StartAudit(Request.QueryBuilder.SQL); if (Request.DatasetBundle.DataSet.DisableExtraction) { throw new Exception("Cannot extract " + Request.DatasetBundle.DataSet + " because DisableExtraction is set to true"); } _hostedSource = new DbDataCommandDataFlowSource(GetCommandSQL(listener), "ExecuteDatasetExtraction " + Request.DatasetBundle.DataSet, Request.GetDistinctLiveDatabaseServer().Builder, ExecutionTimeout); // If we are running in batches then always allow empty extractions _hostedSource.AllowEmptyResultSets = AllowEmptyExtractions || Request.IsBatchResume; _hostedSource.BatchSize = BatchSize; } DataTable chunk = null; try { chunk = _hostedSource.GetChunk(listener, cancellationToken); chunk = _peeker.AddPeekedRowsIfAny(chunk); //if we are trying to distinct the records in memory based on release id if (DistinctStrategy == DistinctStrategy.OrderByAndDistinctInMemory) { var releaseIdentifierColumn = Request.ReleaseIdentifierSubstitutions.First().GetRuntimeName(); if (chunk != null) { //last release id in the current chunk var lastReleaseId = chunk.Rows[chunk.Rows.Count - 1][releaseIdentifierColumn]; _peeker.AddWhile(_hostedSource, r => Equals(r[releaseIdentifierColumn], lastReleaseId), chunk); chunk = MakeDistinct(chunk, listener, cancellationToken); } } } catch (AggregateException a) { if (a.GetExceptionIfExists <TaskCanceledException>() != null) { _cancel = true; } throw; } catch (Exception e) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Read from source failed", e)); } if (cancellationToken.IsCancellationRequested) { throw new Exception("Data read cancelled because our cancellationToken was set, aborting data reading"); } //if the first chunk is null if (firstChunk && chunk == null && !AllowEmptyExtractions) { throw new Exception("There is no data to load, query returned no rows, query was:" + Environment.NewLine + (_hostedSource.Sql ?? Request.QueryBuilder.SQL)); } //not the first chunk anymore firstChunk = false; //data exhausted if (chunk == null) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Data exhausted after reading " + _rowsRead + " rows of data (" + UniqueReleaseIdentifiersEncountered.Count + " unique release identifiers seen)")); if (Request != null) { Request.CumulativeExtractionResults.DistinctReleaseIdentifiersEncountered = Request.IsBatchResume ? -1 : UniqueReleaseIdentifiersEncountered.Count; } return(null); } _rowsRead += chunk.Rows.Count; //chunk will have datatypes for all the things in the buffer so we can populate our dictionary of facts about what columns/catalogue items have spontaneously changed name/type etc if (ExtractTimeTransformationsObserved == null) { GenerateExtractionTransformObservations(chunk); } //see if the SqlDataReader has a column with the same name as the ReleaseIdentifierSQL (if so then we can use it to count the number of distinct subjects written out to the csv) bool includesReleaseIdentifier = _extractionIdentifiersidx.Count > 0; //first line - lets see what columns we wrote out //looks at the buffer and computes any transforms performed on the column _timeSpentValidating.Start(); //build up the validation report (Missing/Wrong/Etc) - this has no mechanical effect on the extracted data just some metadata that goes into a flat file if (ExtractionTimeValidator != null && Request.IncludeValidation) { try { chunk.Columns.Add(ValidationColumnName); ExtractionTimeValidator.Validate(chunk, ValidationColumnName); _rowsValidated += chunk.Rows.Count; listener.OnProgress(this, new ProgressEventArgs("Validation", new ProgressMeasurement(_rowsValidated, ProgressType.Records), _timeSpentValidating.Elapsed)); } catch (Exception ex) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Could not validate data chunk", ex)); ValidationFailureException = ex; ExtractionTimeValidator = null; } } _timeSpentValidating.Stop(); _timeSpentBuckettingDates.Start(); if (ExtractionTimeTimeCoverageAggregator != null) { _rowsBucketted += chunk.Rows.Count; foreach (DataRow row in chunk.Rows) { ExtractionTimeTimeCoverageAggregator.ProcessRow(row); } listener.OnProgress(this, new ProgressEventArgs("Bucketting Dates", new ProgressMeasurement(_rowsBucketted, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed)); } _timeSpentBuckettingDates.Stop(); _timeSpentCalculatingDISTINCT.Start(); //record unique release identifiers found if (includesReleaseIdentifier) { foreach (string idx in _extractionIdentifiersidx) { foreach (DataRow r in chunk.Rows) { if (r[idx] == DBNull.Value) { if (_extractionIdentifiersidx.Count == 1) { throw new Exception("Null release identifier found in extract of dataset " + Request.DatasetBundle.DataSet); } else { continue; //there are multiple extraction identifiers thats fine if one or two are null } } if (!UniqueReleaseIdentifiersEncountered.Contains(r[idx])) { UniqueReleaseIdentifiersEncountered.Add(r[idx]); } } listener.OnProgress(this, new ProgressEventArgs("Calculating Distinct Release Identifiers", new ProgressMeasurement(UniqueReleaseIdentifiersEncountered.Count, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed)); } } _timeSpentCalculatingDISTINCT.Stop(); return(chunk); }
private void UpdateProgressListeners() { _listener.OnProgress(this, new ProgressEventArgs("Processing Files", new ProgressMeasurement(_filesProcessedSoFar, ProgressType.Records), _stopwatch.Elapsed)); }
public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken) { //Things we ignore, Lookups, SupportingSql etc if (_extractCommand == null) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Ignoring non dataset command ")); return(toProcess); } //if it isn't a dicom dataset don't process it if (!toProcess.Columns.Contains(RelativeArchiveColumnName)) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Dataset " + _extractCommand.DatasetBundle.DataSet + " did not contain field '" + RelativeArchiveColumnName + "' so we will not attempt to extract images")); return(toProcess); } if (_putter == null) { _putter = (IPutDicomFilesInExtractionDirectories) new ObjectConstructor().Construct(PutterType); } var projectNumber = _extractCommand.Configuration.Project.ProjectNumber.Value; var mappingServer = new MappingRepository(UIDMappingServer); var destinationDirectory = new DirectoryInfo(Path.Combine(_extractCommand.GetExtractionDirectory().FullName, "Images")); var releaseCol = _extractCommand.QueryBuilder.SelectColumns.Select(c => c.IColumn).Single(c => c.IsExtractionIdentifier); // See: ftp://medical.nema.org/medical/dicom/2011/11_15pu.pdf var flags = DicomAnonymizer.SecurityProfileOptions.BasicProfile | DicomAnonymizer.SecurityProfileOptions.CleanStructdCont | DicomAnonymizer.SecurityProfileOptions.CleanDesc | DicomAnonymizer.SecurityProfileOptions.RetainUIDs; if (RetainDates) { flags |= DicomAnonymizer.SecurityProfileOptions.RetainLongFullDates; } var profile = DicomAnonymizer.SecurityProfile.LoadProfile(null, flags); var anonymiser = new DicomAnonymizer(profile); using (var pool = new ZipPool()) { _sw.Start(); foreach (DataRow row in toProcess.Rows) { if (_errors > 0 && _errors > ErrorThreshold) { throw new Exception($"Number of errors reported ({_errors}) reached the threshold ({ErrorThreshold})"); } cancellationToken.ThrowIfAbortRequested(); var path = new AmbiguousFilePath(ArchiveRootIfAny, (string)row[RelativeArchiveColumnName]); DicomFile dicomFile; try { dicomFile = path.GetDataset(pool); } catch (Exception e) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, $"Failed to get image at path '{path.FullPath}'", e)); _errors++; continue; } //get the new patient ID var releaseId = row[releaseCol.GetRuntimeName()].ToString(); DicomDataset ds; try { ds = anonymiser.Anonymize(dicomFile.Dataset); } catch (Exception e) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, $"Failed to anonymize image at path '{path.FullPath}'", e)); _errors++; continue; } //now we want to explicitly use our own release Id regardless of what FoDicom said ds.AddOrUpdate(DicomTag.PatientID, releaseId); //rewrite the UIDs foreach (var kvp in UIDMapping.SupportedTags) { if (!ds.Contains(kvp.Key)) { continue; } var value = ds.GetValue <string>(kvp.Key, 0); //if it has a value for this UID if (value == null) { continue; } var releaseValue = mappingServer.GetOrAllocateMapping(value, projectNumber, kvp.Value); //change value in dataset ds.AddOrUpdate(kvp.Key, releaseValue); //and change value in DataTable if (toProcess.Columns.Contains(kvp.Key.DictionaryEntry.Keyword)) { row[kvp.Key.DictionaryEntry.Keyword] = releaseValue; } } var newPath = _putter.WriteOutDataset(destinationDirectory, releaseId, ds); row[RelativeArchiveColumnName] = newPath; _anonymisedImagesCount++; listener.OnProgress(this, new ProgressEventArgs("Writing ANO images", new ProgressMeasurement(_anonymisedImagesCount, ProgressType.Records), _sw.Elapsed)); } _sw.Stop(); } return(toProcess); }
public void OnProgress(object sender, ProgressEventArgs e) { _child.OnProgress(_overridingSender, e); }
/// <summary> /// Generates the dita files and logs progress / errors to the <paramref name="listener"/> /// </summary> /// <param name="listener"></param> public void Extract(IDataLoadEventListener listener) { string xml = ""; xml += @"<?xml version=""1.0"" encoding=""UTF-8""?> <!DOCTYPE map PUBLIC ""-//OASIS//DTD DITA Map//EN"" ""map.dtd"">" + Environment.NewLine; xml += "<map>" + Environment.NewLine; xml += "<title>HIC Data Catalogue</title>" + Environment.NewLine; xml += @"<topicmeta product=""hicdc"" rev=""1"">" + Environment.NewLine; xml += "<author>Wilfred Bonney; Thomas Nind; Mikhail Ghattas</author>" + Environment.NewLine; xml += "<publisher>Health Informatics Centre (HIC), University of Dundee</publisher>" + Environment.NewLine; xml += "</topicmeta>" + Environment.NewLine; xml += @"<topicref href=""introduction.dita""/>" + Environment.NewLine; GenerateIntroductionFile("introduction.dita"); xml += @"<topicref href=""dataset.dita"">" + Environment.NewLine; GenerateDataSetFile("dataset.dita"); xml += Environment.NewLine; //get all the catalogues then sort them alphabetically List <Catalogue> catas = new List <Catalogue>(_repository.GetAllObjects <Catalogue>().Where(c => !(c.IsDeprecated || c.IsInternalDataset || c.IsColdStorageDataset))); catas.Sort(); Stopwatch sw = Stopwatch.StartNew(); int cataloguesCompleted = 0; foreach (Catalogue c in catas) { listener.OnProgress(this, new ProgressEventArgs("Extracting", new ProgressMeasurement(cataloguesCompleted++, ProgressType.Records, catas.Count), sw.Elapsed)); //ensure that it has an acryonym if (string.IsNullOrWhiteSpace(c.Acronym)) { throw new Exception("Dita Extraction requires that each catalogue have a unique Acronym, the catalogue " + c.Name + " is missing an Acronym"); } if (c.Name.Contains("\\") || c.Name.Contains("/")) { throw new Exception("Dita Extractor does not support catalogues with backslashes or forward slashs in their name"); } //catalogue main file xml += "<topicref href=\"" + GetFileNameForCatalogue(c) + "\">" + Environment.NewLine; CreateCatalogueFile(c); //catalogue items List <CatalogueItem> cataItems = c.CatalogueItems.ToList(); cataItems.Sort(); foreach (CatalogueItem ci in cataItems) { xml += "<topicref href=\"" + GetFileNameForCatalogueItem(c, ci) + "\"/>" + Environment.NewLine; CreateCatalogueItemFile(c, ci); } xml += "</topicref>" + Environment.NewLine; //completed - mostly for end of loop tbh listener.OnProgress(this, new ProgressEventArgs("Extracting", new ProgressMeasurement(cataloguesCompleted, ProgressType.Records, catas.Count), sw.Elapsed)); } xml += Environment.NewLine; xml += @"</topicref>" + Environment.NewLine; xml += "</map>"; File.WriteAllText(Path.Combine(_folderToCreateIn.FullName, "hic_data_catalogue.ditamap"), xml); }
protected virtual void Download(string file, ILoadDirectory destination, IDataLoadEventListener job) { Stopwatch s = new Stopwatch(); s.Start(); string uri; if (!string.IsNullOrWhiteSpace(RemoteDirectory)) { uri = "ftp://" + _host + "/" + RemoteDirectory + "/" + file; } else { uri = "ftp://" + _host + "/" + file; } if (_useSSL) { uri = "s" + uri; } Uri serverUri = new Uri(uri); if (serverUri.Scheme != Uri.UriSchemeFtp) { return; } FtpWebRequest reqFTP; reqFTP = (FtpWebRequest)FtpWebRequest.Create(new Uri(uri)); reqFTP.Credentials = new NetworkCredential(_username, _password); reqFTP.KeepAlive = false; reqFTP.Method = WebRequestMethods.Ftp.DownloadFile; reqFTP.UseBinary = true; reqFTP.Proxy = null; reqFTP.UsePassive = true; reqFTP.EnableSsl = _useSSL; reqFTP.Timeout = TimeoutInSeconds * 1000; FtpWebResponse response = (FtpWebResponse)reqFTP.GetResponse(); Stream responseStream = response.GetResponseStream(); string destinationFileName = Path.Combine(destination.ForLoading.FullName, file); using (FileStream writeStream = new FileStream(destinationFileName, FileMode.Create)) { int Length = 2048; Byte[] buffer = new Byte[Length]; int bytesRead = responseStream.Read(buffer, 0, Length); int totalBytesReadSoFar = bytesRead; while (bytesRead > 0) { writeStream.Write(buffer, 0, bytesRead); bytesRead = responseStream.Read(buffer, 0, Length); //notify whoever is listening of how far along the process we are totalBytesReadSoFar += bytesRead; job.OnProgress(this, new ProgressEventArgs(destinationFileName, new ProgressMeasurement(totalBytesReadSoFar / 1024, ProgressType.Kilobytes), s.Elapsed)); } writeStream.Close(); } response.Close(); _filesRetrieved.Add(serverUri.ToString()); s.Stop(); }
public DataTable GetChunk(IDataLoadEventListener job, GracefulCancellationToken cancellationToken) { if (_reader == null) { _con = DatabaseCommandHelper.GetConnection(_builder); _con.Open(); job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Running SQL:" + Environment.NewLine + Sql)); cmd = DatabaseCommandHelper.GetCommand(Sql, _con); cmd.CommandTimeout = _timeout; CommandAdjuster?.Invoke(cmd); _reader = cmd.ExecuteReaderAsync(cancellationToken.AbortToken).Result; _numberOfColumns = _reader.FieldCount; } int readThisBatch = 0; timer.Start(); try { DataTable chunk = GetChunkSchema(_reader); while (_reader.Read()) { AddRowToDataTable(chunk, _reader); readThisBatch++; //we reached batch limit if (readThisBatch == BatchSize) { return(chunk); } } //if data was read if (readThisBatch > 0) { return(chunk); } //data is exhausted //if data was exhausted on first read and we are allowing empty result sets if (firstChunk && AllowEmptyResultSets) { return(chunk);//return the empty chunk } //data exhausted return(null); } catch (Exception e) { job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Source read failed", e)); throw; } finally { firstChunk = false; timer.Stop(); job.OnProgress(this, new ProgressEventArgs(_taskBeingPerformed, new ProgressMeasurement(TotalRowsRead, ProgressType.Records), timer.Elapsed)); } }
public void OnProgress(object sender, ProgressEventArgs e) { _listener.OnProgress(sender, e); }
/// <inheritdoc/> public void ReverseAnonymiseDataTable(DataTable toProcess, IDataLoadEventListener listener, bool allowCaching) { int haveWarnedAboutTop1AlreadyCount = 10; var syntax = ExternalCohortTable.GetQuerySyntaxHelper(); string privateIdentifier = syntax.GetRuntimeName(GetPrivateIdentifier()); string releaseIdentifier = syntax.GetRuntimeName(GetReleaseIdentifier()); //if we don't want to support caching or there is no cached value yet if (!allowCaching || _releaseToPrivateKeyDictionary == null) { DataTable map = FetchEntireCohort(); Stopwatch sw = new Stopwatch(); sw.Start(); //dictionary of released values (for the cohort) back to private values _releaseToPrivateKeyDictionary = new Dictionary <string, string>(); foreach (DataRow r in map.Rows) { if (_releaseToPrivateKeyDictionary.Keys.Contains(r[releaseIdentifier])) { if (haveWarnedAboutTop1AlreadyCount > 0) { haveWarnedAboutTop1AlreadyCount--; listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Top 1-ing will occur for release identifier " + r[releaseIdentifier] + " because it maps to multiple private identifiers")); } else { if (haveWarnedAboutTop1AlreadyCount == 0) { haveWarnedAboutTop1AlreadyCount = -1; listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Top 1-ing error message disabled due to flood of messages")); } } } else { _releaseToPrivateKeyDictionary.Add(r[releaseIdentifier].ToString().Trim(), r[privateIdentifier].ToString().Trim()); } _reverseAnonymiseProgressFetchingMap++; if (_reverseAnonymiseProgressFetchingMap % 500 == 0) { listener.OnProgress(this, new ProgressEventArgs("Assembling Release Map Dictionary", new ProgressMeasurement(_reverseAnonymiseProgressFetchingMap, ProgressType.Records), sw.Elapsed)); } } listener.OnProgress(this, new ProgressEventArgs("Assembling Release Map Dictionary", new ProgressMeasurement(_reverseAnonymiseProgressFetchingMap, ProgressType.Records), sw.Elapsed)); } int nullsFound = 0; int substitutions = 0; Stopwatch sw2 = new Stopwatch(); sw2.Start(); //fix values foreach (DataRow row in toProcess.Rows) { try { object value = row[releaseIdentifier]; if (value == null || value == DBNull.Value) { nullsFound++; continue; } row[releaseIdentifier] = _releaseToPrivateKeyDictionary[value.ToString().Trim()].Trim();//swap release value for private value (reversing the anonymisation) substitutions++; _reverseAnonymiseProgressReversing++; if (_reverseAnonymiseProgressReversing % 500 == 0) { listener.OnProgress(this, new ProgressEventArgs("Substituting Release Identifiers For Private Identifiers", new ProgressMeasurement(_reverseAnonymiseProgressReversing, ProgressType.Records), sw2.Elapsed)); } } catch (KeyNotFoundException e) { throw new Exception("Could not find private identifier (" + privateIdentifier + ") for the release identifier (" + releaseIdentifier + ") with value '" + row[releaseIdentifier] + "' in cohort with cohortDefinitionID " + OriginID, e); } } //final value listener.OnProgress(this, new ProgressEventArgs("Substituting Release Identifiers For Private Identifiers", new ProgressMeasurement(_reverseAnonymiseProgressReversing, ProgressType.Records), sw2.Elapsed)); if (nullsFound > 0) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Found " + nullsFound + " null release identifiers amongst the " + toProcess.Rows.Count + " rows of the input data table (on which we were attempting to reverse annonymise)")); } listener.OnNotify(this, new NotifyEventArgs(substitutions > 0?ProgressEventType.Information : ProgressEventType.Error, "Substituted " + substitutions + " release identifiers for private identifiers in input data table (input data table contained " + toProcess.Rows.Count + " rows)")); toProcess.Columns[releaseIdentifier].ColumnName = privateIdentifier; }
private ExtractCommandState ExtractSupportingSql(SupportingSQLTable sql, IDataLoadEventListener listener, DataLoadInfo dataLoadInfo) { try { var tempDestination = new DataTableUploadDestination(); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to download SQL for global SupportingSQL " + sql.Name)); using (var con = sql.GetServer().GetConnection()) { con.Open(); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Connection opened successfully, about to send SQL command " + sql.SQL)); var cmd = DatabaseCommandHelper.GetCommand(sql.SQL, con); var da = DatabaseCommandHelper.GetDataAdapter(cmd); var sw = new Stopwatch(); sw.Start(); DataTable dt = new DataTable(); da.Fill(dt); dt.TableName = GetTableName(_destinationDatabase.Server.GetQuerySyntaxHelper().GetSensibleTableNameFromString(sql.Name)); var tableLoadInfo = dataLoadInfo.CreateTableLoadInfo("", dt.TableName, new[] { new DataSource(sql.SQL, DateTime.Now) }, -1); tableLoadInfo.Inserts = dt.Rows.Count; listener.OnProgress(this, new ProgressEventArgs("Reading from SupportingSQL " + sql.Name, new ProgressMeasurement(dt.Rows.Count, ProgressType.Records), sw.Elapsed)); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Decided on the following destination table name for SupportingSQL: " + dt.TableName)); tempDestination.AllowResizingColumnsAtUploadTime = true; tempDestination.PreInitialize(GetDestinationDatabase(listener), listener); tempDestination.ProcessPipelineData(dt, listener, new GracefulCancellationToken()); tempDestination.Dispose(listener, null); //end auditing it tableLoadInfo.CloseAndArchive(); if (_request is ExtractDatasetCommand) { var result = (_request as ExtractDatasetCommand).CumulativeExtractionResults; var supplementalResult = result.AddSupplementalExtractionResult(sql.SQL, sql); supplementalResult.CompleteAudit(this.GetType(), TargetDatabaseServer.ID + "|" + GetDatabaseName() + "|" + dt.TableName, dt.Rows.Count); } else { var extractGlobalsCommand = (_request as ExtractGlobalsCommand); Debug.Assert(extractGlobalsCommand != null, "extractGlobalsCommand != null"); var result = new SupplementalExtractionResults(extractGlobalsCommand.RepositoryLocator.DataExportRepository, extractGlobalsCommand.Configuration, sql.SQL, sql); result.CompleteAudit(this.GetType(), TargetDatabaseServer.ID + "|" + GetDatabaseName() + "|" + dt.TableName, dt.Rows.Count); extractGlobalsCommand.ExtractionResults.Add(result); } } } catch (Exception e) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Extraction of SupportingSQL " + sql + " failed ", e)); return(ExtractCommandState.Crashed); } return(ExtractCommandState.Completed); }
public FileInfo GenerateWordFile(IDataLoadEventListener listener, bool showFile) { try { //if there's only one catalogue call it 'prescribing.docx' etc string filename = _args.Catalogues.Length == 1 ? _args.Catalogues[0].Name : "MetadataReport"; using (var document = GetNewDocFile(filename)) { PageWidthInPixels = GetPageWidth(); var sw = Stopwatch.StartNew(); try { int completed = 0; foreach (Catalogue c in _args.Catalogues.OrderBy(c => c.Name)) { listener.OnProgress(this, new ProgressEventArgs("Extracting", new ProgressMeasurement(completed++, ProgressType.Records, _args.Catalogues.Length), sw.Elapsed)); int recordCount = -1; int distinctRecordCount = -1; string identifierName = null; bool gotRecordCount = false; try { if (_args.IncludeRowCounts) { GetRecordCount(c, out recordCount, out distinctRecordCount, out identifierName); gotRecordCount = true; } } catch (Exception e) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Error processing record count for Catalogue " + c.Name, e)); } InsertHeader(document, c.Name); //assume we don't know the age of the dataset DateTime?accurateAsOf = null; //get the age of the dataset if known and output it if (_args.TimespanCalculator != null) { string timespan = _args.TimespanCalculator.GetHumanReadableTimespanIfKnownOf(c, true, out accurateAsOf); if (!string.IsNullOrWhiteSpace(timespan) && !timespan.Equals("Unknown")) { InsertParagraph(document, timespan + (accurateAsOf.HasValue ? "*" :""), TextFontSize); } } InsertParagraph(document, c.Description, TextFontSize); if (accurateAsOf.HasValue) { InsertParagraph(document, "* Based on DQE run on " + accurateAsOf.Value, TextFontSize - 2); } if (gotRecordCount) { InsertHeader(document, "Record Count", 3); CreateCountTable(document, recordCount, distinctRecordCount, identifierName); } if (!_args.SkipImages && RequestCatalogueImages != null) { BitmapWithDescription[] onRequestCatalogueImages = RequestCatalogueImages(c); if (onRequestCatalogueImages.Any()) { InsertHeader(document, "Aggregates", 2); AddImages(document, onRequestCatalogueImages); } } CreateDescriptionsTable(document, c); if (_args.IncludeNonExtractableItems) { CreateNonExtractableColumnsTable(document, c); } //if this is not the last Catalogue create a new page if (completed != _args.Catalogues.Length) { InsertSectionPageBreak(document); } listener.OnProgress(this, new ProgressEventArgs("Extracting", new ProgressMeasurement(completed, ProgressType.Records, _args.Catalogues.Length), sw.Elapsed)); } if (LookupsEncounteredToAppearInAppendix.Any()) { CreateLookupAppendix(document, listener); } if (showFile) { ShowFile(document); } SetMargins(document, 20); AddFooter(document, "Created on " + DateTime.Now, TextFontSize); return(document.FileInfo); } catch (ThreadInterruptedException) { //user hit abort } } } catch (Exception e) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Entire process failed, see Exception for details", e)); } return(null); }
public override SMIDataChunk DoGetChunk(ICacheFetchRequest cacheRequest, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken) { #region assigns var dicomConfiguration = GetConfiguration(); var requestSender = new DicomRequestSender(dicomConfiguration, listener); var dateFrom = Request.Start; var dateTo = Request.End; var hasTransferTimedOut = false; CachingSCP.LocalAet = LocalAETitle; CachingSCP.Listener = listener; if (PatientIdWhitelistColumnInfo != null && !IgnoreWhiteList) { GetWhitelist(listener); } //temp dir var cacheDir = new LoadDirectory(Request.CacheProgress.LoadProgress.LoadMetadata.LocationOfFlatFiles).Cache; var cacheLayout = new SMICacheLayout(cacheDir, new SMICachePathResolver(Modality)); Chunk = new SMIDataChunk(Request) { FetchDate = dateFrom, Modality = Modality, Layout = cacheLayout }; // IOrder order = new ItemsBasedOrder(dateFrom, dateTo, PlacementMode.PlaceThenFill,OrderLevel, listener); IOrder order = new HierarchyBasedOrder(dateFrom, dateTo, PlacementMode.PlaceThenFill, OrderLevel, listener); IPicker picker = null; var pickerFilled = false; var transferTimeOutTimer = new Timer(dicomConfiguration.TransferTimeOutInMilliseconds); transferTimeOutTimer.Elapsed += (source, eventArgs) => { hasTransferTimedOut = true; listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Transfer Timeout Exception Generated")); throw new TimeoutException("Transfer Timeout Exception"); }; CachingSCP.OnEndProcessingCStoreRequest = (storeRequest, storeResponse) => { var item = new Item(storeRequest); transferTimeOutTimer.Reset(); if (picker != null) { picker.Fill(item); pickerFilled = picker.IsFilled(); } SaveSopInstance(storeRequest, cacheLayout, listener); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Debug, "Stored sopInstance" + storeRequest.SOPInstanceUID.UID)); }; #endregion //helps with tyding up resources if we abort or through an exception and neatly avoids -> Access to disposed closure using (var server = (DicomServer <CachingSCP>)DicomServer.Create <CachingSCP>(dicomConfiguration.LocalAetUri.Port)) { try { // Find a list of studies #region Query listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Requesting Studies from " + dateFrom + " to " + dateTo)); var studyUids = new List <string>(); var request = CreateStudyRequestByDateRangeForModality(dateFrom, dateTo, Modality); request.OnResponseReceived += (req, response) => { if (Filter(_whitelist, response)) { studyUids.Add(response.Dataset.GetSingleValue <string>(DicomTag.StudyInstanceUID)); } }; requestSender.ThrottleRequest(request, cancellationToken); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Debug, "Total filtered studies for " + dateFrom + " to " + dateTo + "is " + studyUids.Count)); foreach (var studyUid in studyUids) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Debug, "Sending series query for study" + studyUid)); var seriesUids = new List <string>(); request = CreateSeriesRequestByStudyUid(studyUid); request.OnResponseReceived += (req, response) => { if (response.Dataset == null) { return; } var seriesInstanceUID = response.Dataset.GetSingleValue <string>(DicomTag.SeriesInstanceUID); if (seriesInstanceUID != null) { seriesUids.Add(seriesInstanceUID); } }; requestSender.ThrottleRequest(request, cancellationToken); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Debug, "Total series for " + studyUid + "is " + seriesUids.Count)); foreach (var seriesUid in seriesUids) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Debug, "Sending image query for series" + seriesUid)); request = CreateSopRequestBySeriesUid(seriesUid); int imageCount = 0; request.OnResponseReceived += (req, response) => { if (response.Dataset == null) { return; } var sopUid = response.Dataset.GetSingleValue <string>(DicomTag.SOPInstanceUID); var patientId = response.Dataset.GetSingleValue <string>(DicomTag.PatientID); if (sopUid != null && patientId != null) { //Place order order.Place(patientId, studyUid, seriesUid, sopUid); imageCount++; } }; requestSender.ThrottleRequest(request, cancellationToken); listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Debug, "Successfully finished image query for " + seriesUid + " Toal images in series = " + imageCount)); } listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Debug, "Successfully finished series query for " + studyUid)); } listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Debug, "Successfully finished query phase")); #endregion //go and get them #region Retrieval var transferStopwatch = new Stopwatch(); //start building request to fill orders //get the picker - the for loop avoids sleeping after all the transfers have finished and attempting dequeue on empty queue for (int delay = 0, transferTimerPollingPeriods; order.HasNextPicker() && !hasTransferTimedOut; delay = (int)(dicomConfiguration.TransferDelayFactor * transferTimerPollingPeriods * dicomConfiguration.TransferPollingInMilliseconds) + dicomConfiguration.TransferCooldownInMilliseconds ) { transferStopwatch.Restart(); //delay value in mills if (delay != 0) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Transfers sleeping for " + delay / 1000 + "seconds")); Task.Delay(delay, cancellationToken.AbortToken).Wait(cancellationToken.AbortToken); } //set this here prior to request pickerFilled = false; transferTimerPollingPeriods = 0; //get next picker picker = order.NextPicker(); // A CMove will be performed if the storescp exists and this storescp is known to the QRSCP: var cMoveRequest = picker.GetDicomCMoveRequest(LocalAETitle); /* this won't work which means we cannot enforce (low) priority * cMoveRequest.Priority=DicomPriority.Low;*/ cMoveRequest.OnResponseReceived += (requ, response) => { if (response.Status.State == DicomState.Pending) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Debug, "Request: " + requ.ToString() + "items remaining: " + response.Remaining)); } else if (response.Status.State == DicomState.Success) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Debug, "Request: " + requ.ToString() + "completed successfully")); } else if (response.Status.State == DicomState.Failure) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Debug, "Request: " + requ.ToString() + "failed to download: " + response.Failures)); } }; listener.OnProgress(this, new ProgressEventArgs(CMoveRequestToString(cMoveRequest), new ProgressMeasurement(picker.Filled(), ProgressType.Records, picker.Total()), transferStopwatch.Elapsed)); //do not use requestSender.ThrottleRequest(cMoveRequest, cancellationToken); //TODO is there any need to throtttle this request given its lifetime DicomClient client = new DicomClient(); requestSender.ThrottleRequest(cMoveRequest, client, cancellationToken); transferTimeOutTimer.Reset(); while (!pickerFilled && !hasTransferTimedOut) { Task.Delay(dicomConfiguration.TransferPollingInMilliseconds, cancellationToken.AbortToken) .Wait(cancellationToken.AbortToken); transferTimerPollingPeriods++; } transferTimeOutTimer.Stop(); client.Release(); listener.OnProgress(this, new ProgressEventArgs(CMoveRequestToString(cMoveRequest), new ProgressMeasurement(picker.Filled(), ProgressType.Records, picker.Total()), transferStopwatch.Elapsed)); } #endregion } finally { server.Stop(); } } return(Chunk); }
public async void SendToAllRemotes <T>(T[] toSendAll, Action callback = null) where T : IMapsDirectlyToDatabaseTable { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Ready to send " + toSendAll.Length + " " + typeof(T).Name + " items to all remotes.")); var done = new Dictionary <string, int>(); foreach (var remoteRDMP in remotes) { listener.OnProgress(this, new ProgressEventArgs(remoteRDMP.Name, new ProgressMeasurement(0, ProgressType.Records, toSendAll.Length), new TimeSpan())); } var tasks = new List <Task>(); foreach (var remote in remotes) { done.Add(remote.Name, 0); foreach (var toSend in toSendAll) { if (!_gatherer.CanGatherDependencies(toSend)) { throw new Exception("Type " + typeof(T) + " is not supported yet by Gatherer and therefore cannot be shared"); } var share = _gatherer.GatherDependencies(toSend).ToShareDefinitionWithChildren(_shareManager); var json = JsonConvertExtensions.SerializeObject(share, _repositoryLocator); var handler = new HttpClientHandler() { Credentials = new NetworkCredential(remote.Username, remote.GetDecryptedPassword()) }; HttpResponseMessage result; var apiUrl = remote.GetUrlFor <T>(); RemoteRDMP remote1 = remote; T toSend1 = toSend; var sender = new Task(() => { using (var client = new HttpClient(handler)) { try { result = client.PostAsync(new Uri(apiUrl), new StringContent(json, Encoding.UTF8, "text/plain")).Result; if (result.IsSuccessStatusCode) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Sending " + toSend1 + " to " + remote1.Name + " completed.")); } else { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Error sending " + toSend1 + " to " + remote1.Name + ": " + result.ReasonPhrase + " - " + result.Content.ReadAsStringAsync().Result)); } lock (done) { listener.OnProgress(this, new ProgressEventArgs(remote1.Name, new ProgressMeasurement(++done[remote1.Name], ProgressType.Records, toSendAll.Length), new TimeSpan())); } } catch (Exception ex) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Error sending " + toSend1 + " to " + remote1.Name, ex)); listener.OnProgress(this, new ProgressEventArgs(remote1.Name, new ProgressMeasurement(1, ProgressType.Records, 1), new TimeSpan())); } } }); sender.Start(); tasks.Add(sender); } } await Task.WhenAll(tasks); if (callback != null) { callback(); } }