Exemplo n.º 1
0
        private void AsyncCopyMDFFilesWithEvents(string MDFSource, string MDFDestination, string LDFSource, string LDFDestination, IDataLoadEventListener job)
        {
            Stopwatch s = new Stopwatch();

            s.Start();

            job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Starting copy from " + MDFSource + " to " + MDFDestination));
            job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Starting copy from " + LDFSource + " to " + LDFDestination));

            CopyWithProgress copyMDF = new CopyWithProgress();

            copyMDF.Progress +=
                (size, transferred, streamSize, bytesTransferred, number, reason, file, destinationFile, data) =>
            {
                job.OnProgress(this, new ProgressEventArgs(MDFDestination, new ProgressMeasurement((int)(transferred * 0.001), ProgressType.Kilobytes), s.Elapsed));
                return(CopyWithProgress.CopyProgressResult.PROGRESS_CONTINUE);
            };
            copyMDF.XCopy(MDFSource, MDFDestination);
            s.Reset();
            s.Start();

            CopyWithProgress copyLDF = new CopyWithProgress();

            copyLDF.Progress +=
                (size, transferred, streamSize, bytesTransferred, number, reason, file, destinationFile, data) =>
            {
                job.OnProgress(this, new ProgressEventArgs(LDFDestination, new ProgressMeasurement((int)(transferred * 0.001), ProgressType.Kilobytes), s.Elapsed));
                return(CopyWithProgress.CopyProgressResult.PROGRESS_CONTINUE);
            };
            copyLDF.XCopy(LDFSource, LDFDestination);
            s.Stop();
        }
Exemplo n.º 2
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            bool didAno = false;

            stopwatch_TimeSpentTransforming.Start();

            if (!_bInitialized)
            {
                throw new Exception("Not Initialized yet");
            }

            recordsProcessedSoFar += toProcess.Rows.Count;

            var missingColumns = columnsToAnonymise.Keys.Where(k => !toProcess.Columns.Cast <DataColumn>().Any(c => c.ColumnName.Equals(k))).ToArray();

            if (missingColumns.Any())
            {
                throw new KeyNotFoundException("The following columns (which have ANO Transforms on them) were missing from the DataTable:" + Environment.NewLine
                                               + string.Join(Environment.NewLine, missingColumns) + Environment.NewLine + "The columns found in the DataTable were:" + Environment.NewLine
                                               + string.Join(Environment.NewLine, toProcess.Columns.Cast <DataColumn>().Select(c => c.ColumnName)));
            }

            //Dump Identifiers
            stopwatch_TimeSpentDumping.Start();
            _dumper.DumpAllIdentifiersInTable(toProcess); //do the dumping of all the rest of the columns (those that must disapear from pipeline as opposed to those above which were substituted for ANO versions)
            stopwatch_TimeSpentDumping.Stop();

            if (_dumper.HaveDumpedRecords)
            {
                listener.OnProgress(this, new ProgressEventArgs("Dump Identifiers", new ProgressMeasurement(recordsProcessedSoFar, ProgressType.Records), stopwatch_TimeSpentDumping.Elapsed));//time taken to dump identifiers
            }
            //Process ANO Identifier Substitutions
            //for each column with an ANOTrasformer
            foreach (KeyValuePair <string, ANOTransformer> kvp in columnsToAnonymise)
            {
                didAno = true;

                var            column      = kvp.Key;
                ANOTransformer transformer = kvp.Value;

                //add an ANO version
                DataColumn ANOColumn = new DataColumn(ANOTable.ANOPrefix + column);
                toProcess.Columns.Add(ANOColumn);

                //populate ANO version
                transformer.Transform(toProcess, toProcess.Columns[column], ANOColumn);

                //drop the non ANO version
                toProcess.Columns.Remove(column);
            }

            stopwatch_TimeSpentTransforming.Stop();

            if (didAno)
            {
                listener.OnProgress(this, new ProgressEventArgs("Anonymise Identifiers", new ProgressMeasurement(recordsProcessedSoFar, ProgressType.Records), stopwatch_TimeSpentTransforming.Elapsed)); //time taken to swap ANO identifiers
            }
            return(toProcess);
        }
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener job, GracefulCancellationToken cancellationToken)
        {
            _request.ElevateState(ExtractCommandState.WritingToFile);

            if (!haveWrittenBundleContents && _request is ExtractDatasetCommand)
            {
                WriteBundleContents(((ExtractDatasetCommand)_request).DatasetBundle, job, cancellationToken);
            }

            if (_request is ExtractGlobalsCommand)
            {
                ExtractGlobals((ExtractGlobalsCommand)_request, job, _dataLoadInfo);
                return(null);
            }

            stopwatch.Start();
            if (!haveOpened)
            {
                haveOpened = true;
                _output.Open();
                _output.WriteHeaders(toProcess);
                LinesWritten = 0;

                //create an audit object
                TableLoadInfo = new TableLoadInfo(_dataLoadInfo, "", OutputFile, new DataSource[] { new DataSource(_request.DescribeExtractionImplementation(), DateTime.Now) }, -1);
            }

            foreach (DataRow row in toProcess.Rows)
            {
                _output.Append(row);

                LinesWritten++;

                if (TableLoadInfo.IsClosed)
                {
                    throw new Exception("TableLoadInfo was closed so could not write number of rows (" + LinesWritten + ") to audit object - most likely the extraction crashed?");
                }
                else
                {
                    TableLoadInfo.Inserts = LinesWritten;
                }

                if (LinesWritten % 1000 == 0)
                {
                    job.OnProgress(this, new ProgressEventArgs("Write to file " + OutputFile, new ProgressMeasurement(LinesWritten, ProgressType.Records), stopwatch.Elapsed));
                }
            }
            job.OnProgress(this, new ProgressEventArgs("Write to file " + OutputFile, new ProgressMeasurement(LinesWritten, ProgressType.Records), stopwatch.Elapsed));

            stopwatch.Stop();
            _output.Flush();

            return(null);
        }
Exemplo n.º 4
0
        protected override void WriteRows(DataTable toProcess, IDataLoadEventListener job, GracefulCancellationToken cancellationToken, Stopwatch stopwatch)
        {
            foreach (DataRow row in toProcess.Rows)
            {
                _output.Append(row);

                LinesWritten++;

                if (LinesWritten % 1000 == 0)
                {
                    job.OnProgress(this, new ProgressEventArgs("Write to file " + OutputFile, new ProgressMeasurement(LinesWritten, ProgressType.Records), stopwatch.Elapsed));
                }
            }
            job.OnProgress(this, new ProgressEventArgs("Write to file " + OutputFile, new ProgressMeasurement(LinesWritten, ProgressType.Records), stopwatch.Elapsed));
        }
Exemplo n.º 5
0
        private void ProcessDir(string dir, DataTable dt, IDataLoadEventListener listener)
        {
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Starting '{dir}'"));

            if (File.Exists(dir))
            {
                // the inventory entry is a xml file directly :o
                XmlToRows(dir, dt, listener);
                return;
            }

            if (!Directory.Exists(dir))
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, $"'{dir}' was not a Directory or File"));
                return;
            }

            var matches = Directory.GetFiles(dir, SearchPattern, SearchOption.AllDirectories);

            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, $"Found {matches.Length} CFind files in {dir}"));

            foreach (var file in matches)
            {
                XmlToRows(file, dt, listener);

                if (filesRead++ % 10000 == 0)
                {
                    listener.OnProgress(this, new ProgressEventArgs("Reading files", new ProgressMeasurement(filesRead, ProgressType.Records, matches.Length), timer?.Elapsed ?? TimeSpan.Zero));
                }
            }
        }
Exemplo n.º 6
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            _sw.Start();
            DataTable outputTable = new DataTable();

            foreach (DataColumn dataColumn in toProcess.Columns)
            {
                outputTable.Columns.Add(dataColumn.ColumnName, dataColumn.DataType);
            }

            Regex regex = DeleteRowsWhereValuesMatch ?? new Regex(DeleteRowsWhereValuesMatchStandard.Regex);

            foreach (DataRow row in toProcess.Rows)
            {
                var val = row[ColumnNameToFind];

                //keep nulls, dbnulls or anything where ToString doesn't match the regex
                if (val == null || val == DBNull.Value || !regex.IsMatch(val.ToString()))
                {
                    outputTable.ImportRow(row);
                }
                else
                {
                    _deleted++;
                }
            }

            listener.OnProgress(this, new ProgressEventArgs("Deleting Rows", new ProgressMeasurement(_deleted, ProgressType.Records), _sw.Elapsed));

            _sw.Stop();
            return(outputTable);
        }
Exemplo n.º 7
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener job, GracefulCancellationToken cancellationToken)
        {
            timer.Start();

StartAgain:
            foreach (DataRow row in toProcess.Rows)
            {
                for (int i = 0; i < columnsToClean.Count; i++)
                {
                    string toClean = columnsToClean[i];
                    string val     = null;
                    try
                    {
                        object o = row[toClean];

                        if (o == DBNull.Value || o == null)
                        {
                            continue;
                        }

                        if (!(o is string))
                        {
                            throw new ArgumentException("Despite being marked as a string column, object found in column " + toClean + " was of type " + o.GetType());
                        }

                        val = o as string;
                    }
                    catch (ArgumentException e)
                    {
                        job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, e.Message)); //column could not be found
                        columnsToClean.Remove(columnsToClean[i]);
                        goto StartAgain;
                    }


                    //it is empty
                    if (string.IsNullOrWhiteSpace(val))
                    {
                        row[toClean] = DBNull.Value;
                    }
                    else
                    {
                        //trim it
                        var valAfterClean = val.Trim();

                        //set it
                        if (val != valAfterClean)
                        {
                            row[toClean] = valAfterClean;
                        }
                    }
                }
                _rowsProcessed++;
            }
            timer.Stop();

            job.OnProgress(this, new ProgressEventArgs(_taskDescription, new ProgressMeasurement(_rowsProcessed, ProgressType.Records), timer.Elapsed));

            return(toProcess);
        }
Exemplo n.º 8
0
        protected override void Download(string file, ILoadDirectory destination, IDataLoadEventListener job)
        {
            if (file.Contains("/") || file.Contains("\\"))
            {
                throw new Exception("Was not expecting a relative path here");
            }

            Stopwatch s = new Stopwatch();

            s.Start();

            using (var sftp = new SftpClient(_host, _username, _password))
            {
                sftp.ConnectionInfo.Timeout = new TimeSpan(0, 0, 0, TimeoutInSeconds);
                sftp.Connect();

                //if there is a specified remote directory then reference it otherwise reference it locally (or however we were told about it from GetFileList())
                string fullFilePath = !string.IsNullOrWhiteSpace(RemoteDirectory) ? Path.Combine(RemoteDirectory, file) : file;

                string destinationFilePath = Path.Combine(destination.ForLoading.FullName, file);

                //register for events
                Action <ulong> callback = (totalBytes) => job.OnProgress(this, new ProgressEventArgs(destinationFilePath, new ProgressMeasurement((int)(totalBytes * 0.001), ProgressType.Kilobytes), s.Elapsed));

                using (var fs = new FileStream(destinationFilePath, FileMode.CreateNew))
                {
                    //download
                    sftp.DownloadFile(fullFilePath, fs, callback);
                    fs.Close();
                }
                _filesRetrieved.Add(fullFilePath);
            }
            s.Stop();
        }
        private void WriteBundleContents(IExtractableDatasetBundle datasetBundle, IDataLoadEventListener job, GracefulCancellationToken cancellationToken)
        {
            var rootDir             = _request.GetExtractionDirectory();
            var supportingSQLFolder = new DirectoryInfo(Path.Combine(rootDir.FullName, SupportingSQLTable.ExtractionFolderName));
            var lookupDir           = rootDir.CreateSubdirectory("Lookups");

            //extract the documents
            foreach (SupportingDocument doc in datasetBundle.Documents)
            {
                datasetBundle.States[doc] = TryExtractSupportingDocument(rootDir, doc, job)
                    ? ExtractCommandState.Completed
                    : ExtractCommandState.Crashed;
            }

            //extract supporting SQL
            foreach (SupportingSQLTable sql in datasetBundle.SupportingSQL)
            {
                datasetBundle.States[sql] = TryExtractSupportingSQLTable(supportingSQLFolder, _request.Configuration, sql, job, _dataLoadInfo)
                    ? ExtractCommandState.Completed
                    : ExtractCommandState.Crashed;
            }

            //extract lookups
            foreach (BundledLookupTable lookup in datasetBundle.LookupTables)
            {
                try
                {
                    job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to extract lookup " + lookup));

                    var server = DataAccessPortal.GetInstance().ExpectServer(lookup.TableInfo, DataAccessContext.DataExport);

                    Stopwatch sw = new Stopwatch();
                    sw.Start();

                    //extracts all of them
                    var extractTableVerbatim = new ExtractTableVerbatim(server, new [] { lookup.TableInfo.Name }, lookupDir, _request.Configuration.Separator, DateFormat);
                    int linesWritten         = extractTableVerbatim.DoExtraction();
                    sw.Stop();
                    job.OnProgress(this, new ProgressEventArgs("Lookup " + lookup, new ProgressMeasurement(linesWritten, ProgressType.Records), sw.Elapsed));

                    if (_request is ExtractDatasetCommand)
                    {
                        var result             = (_request as ExtractDatasetCommand).CumulativeExtractionResults;
                        var supplementalResult = result.AddSupplementalExtractionResult("SELECT * FROM " + lookup.TableInfo.Name, lookup.TableInfo);
                        supplementalResult.CompleteAudit(this.GetType(), extractTableVerbatim.OutputFilename, linesWritten);
                    }

                    datasetBundle.States[lookup] = ExtractCommandState.Completed;
                }
                catch (Exception e)
                {
                    job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Error occurred trying to extract lookup " + lookup + " on server " + lookup.TableInfo.Server, e));

                    datasetBundle.States[lookup] = ExtractCommandState.Crashed;
                }
            }

            haveWrittenBundleContents = true;
        }
Exemplo n.º 10
0
        protected bool TryExtractSupportingSQLTable(SupportingSQLTable sql, DirectoryInfo directory, IExtractionConfiguration configuration, IDataLoadEventListener listener, DataLoadInfo dataLoadInfo)
        {
            try
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Preparing to extract Supporting SQL " + sql + " to directory " + directory.FullName));

                Stopwatch sw = new Stopwatch();
                sw.Start();

                //start auditing it as a table load
                string target        = Path.Combine(directory.FullName, sql.Name + ".csv");
                var    tableLoadInfo = dataLoadInfo.CreateTableLoadInfo("", target, new[] { new DataSource(sql.SQL, DateTime.Now) }, -1);

                TryExtractSupportingSQLTableImpl(sql, directory, configuration, listener, out int sqlLinesWritten, out string description);

                sw.Stop();

                //end auditing it
                tableLoadInfo.Inserts = sqlLinesWritten;
                tableLoadInfo.CloseAndArchive();

                if (_request is ExtractDatasetCommand)
                {
                    var result             = (_request as ExtractDatasetCommand).CumulativeExtractionResults;
                    var supplementalResult = result.AddSupplementalExtractionResult(sql.SQL, sql);
                    supplementalResult.CompleteAudit(this.GetType(), description, sqlLinesWritten);
                }
                else
                {
                    var extractGlobalsCommand = (_request as ExtractGlobalsCommand);
                    Debug.Assert(extractGlobalsCommand != null, "extractGlobalsCommand != null");
                    var result =
                        new SupplementalExtractionResults(extractGlobalsCommand.RepositoryLocator.DataExportRepository,
                                                          extractGlobalsCommand.Configuration,
                                                          sql.SQL,
                                                          sql);
                    result.CompleteAudit(this.GetType(), description, sqlLinesWritten);
                    extractGlobalsCommand.ExtractionResults.Add(result);
                }

                listener.OnProgress(this, new ProgressEventArgs("Extract " + sql, new ProgressMeasurement(sqlLinesWritten, ProgressType.Records), sw.Elapsed));
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Extracted " + sqlLinesWritten + " records from SupportingSQL " + sql + " into directory " + directory.FullName));

                return(true);
            }
            catch (Exception e)
            {
                if (e is SqlException)
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Failed to run extraction SQL (make sure to fully specify all database/table/column objects completely):" + Environment.NewLine + sql.SQL, e));
                }
                else
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Failed to extract " + sql + " into directory " + directory.FullName, e));
                }

                return(false);
            }
        }
Exemplo n.º 11
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            sw.Start();

            DataTable toReturn = toProcess.Clone();

            //now sort rows
            foreach (DataRow row in toProcess.Rows)
            {
                totalRecordsProcessed++;
                int hashOfItems = GetHashCode(row.ItemArray);

                if (unqiueHashesSeen.ContainsKey(hashOfItems))
                {
                    //GetHashCode on ItemArray of row has been seen before but it could be a collision so call Enumerable.SequenceEqual just incase.
                    if (unqiueHashesSeen[hashOfItems].Any(r => r.ItemArray.SequenceEqual(row.ItemArray)))
                    {
                        totalDuplicatesFound++;
                        continue; //it's a duplicate
                    }

                    unqiueHashesSeen[hashOfItems].Add(row);
                }
                else
                {
                    //its not a duplicate hashcode so add it to the return array and the record of everything we have seen so far (in order that we do not run across issues across batches)
                    unqiueHashesSeen.Add(hashOfItems, new List <DataRow>(new[] { row }));
                }

                toReturn.Rows.Add(row.ItemArray);
            }

            sw.Stop();

            if (!NoLogging)
            {
                listener.OnProgress(this, new ProgressEventArgs("Evaluating For Duplicates", new ProgressMeasurement(totalRecordsProcessed, ProgressType.Records), sw.Elapsed));
                listener.OnProgress(this, new ProgressEventArgs("Discarding Duplicates", new ProgressMeasurement(totalDuplicatesFound, ProgressType.Records), sw.Elapsed));
            }
            return(toReturn);
        }
        private void ExtractLookupTableSql(BundledLookupTable lookup, IDataLoadEventListener listener, DataLoadInfo dataLoadInfo)
        {
            try
            {
                var tempDestination = new DataTableUploadDestination();

                var server = DataAccessPortal.GetInstance().ExpectServer(lookup.TableInfo, DataAccessContext.DataExport);

                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to download SQL for lookup " + lookup.TableInfo.Name));
                using (var con = server.GetConnection())
                {
                    con.Open();
                    var sqlString = "SELECT * FROM " + lookup.TableInfo.Name;
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Connection opened successfully, about to send SQL command: " + sqlString));
                    var cmd = DatabaseCommandHelper.GetCommand(sqlString, con);
                    var da  = DatabaseCommandHelper.GetDataAdapter(cmd);

                    var sw = new Stopwatch();

                    sw.Start();
                    DataTable dt = new DataTable();
                    da.Fill(dt);

                    dt.TableName = GetTableName(_destinationDatabase.Server.GetQuerySyntaxHelper().GetSensibleTableNameFromString(lookup.TableInfo.Name));

                    var tableLoadInfo = dataLoadInfo.CreateTableLoadInfo("", dt.TableName, new[] { new DataSource(sqlString, DateTime.Now) }, -1);
                    tableLoadInfo.Inserts = dt.Rows.Count;

                    listener.OnProgress(this, new ProgressEventArgs("Reading from Lookup " + lookup.TableInfo.Name, new ProgressMeasurement(dt.Rows.Count, ProgressType.Records), sw.Elapsed));
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Decided on the following destination table name for Lookup: " + dt.TableName));

                    tempDestination.AllowResizingColumnsAtUploadTime = true;
                    tempDestination.PreInitialize(GetDestinationDatabase(listener), listener);
                    tempDestination.ProcessPipelineData(dt, listener, new GracefulCancellationToken());
                    tempDestination.Dispose(listener, null);

                    //end auditing it
                    tableLoadInfo.CloseAndArchive();

                    if (_request is ExtractDatasetCommand)
                    {
                        var result             = (_request as ExtractDatasetCommand).CumulativeExtractionResults;
                        var supplementalResult = result.AddSupplementalExtractionResult("SELECT * FROM " + lookup.TableInfo.Name, lookup.TableInfo);
                        supplementalResult.CompleteAudit(this.GetType(), TargetDatabaseServer.ID + "|" + GetDatabaseName() + "|" + dt.TableName, dt.Rows.Count);
                    }
                }
            }
            catch (Exception e)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Extraction of Lookup " + lookup.TableInfo.Name + " failed ", e));
                throw;
            }
        }
Exemplo n.º 13
0
        private void RaiseEvents(DataTable chunk, IDataLoadEventListener job)
        {
            if (chunk != null)
            {
                _recordsWritten += chunk.Rows.Count;

                if (TableLoadInfo != null)
                {
                    TableLoadInfo.Inserts = _recordsWritten;
                }
            }

            job.OnProgress(this, new ProgressEventArgs(_taskBeingPerformed, new ProgressMeasurement(_recordsWritten, ProgressType.Records), _timer.Elapsed));
        }
Exemplo n.º 14
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            GetCommonNamesTable(new ThrowImmediatelyCheckNotifier());

            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Ready to process batch with row count " + toProcess.Rows.Count));

            _timeProcessing.Start();

            //Go through each row in the table
            foreach (DataRow row in toProcess.Rows)
            {
                //for each cell in current row
                foreach (DataColumn col in toProcess.Columns)
                {
                    //if it's not a column we are skipping
                    if (ColumnsNotToEvaluate != null && ColumnsNotToEvaluate.IsMatch(col.ColumnName))
                    {
                        continue;
                    }

                    //if it is a string
                    var stringValue = row[col] as string;

                    if (stringValue != null)
                    {
                        //replace any common names with REDACTED
                        foreach (var name in _commonNames)
                        {
                            stringValue = Regex.Replace(stringValue, name, "REDACTED", RegexOptions.IgnoreCase);
                        }

                        //if string value changed
                        if (!row[col].Equals(stringValue))
                        {
                            //increment the counter of redactions made
                            _redactionsMade++;

                            //update the cell to the new value
                            row[col] = stringValue;
                        }
                    }
                }
            }

            _timeProcessing.Stop();
            listener.OnProgress(this, new ProgressEventArgs("REDACTING Names", new ProgressMeasurement(_redactionsMade, ProgressType.Records), _timeProcessing.Elapsed));

            return(toProcess);
        }
Exemplo n.º 15
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            _sw.Start();

            Regex regex = NullCellsWhereValuesMatch ?? new Regex(NullCellsWhereValuesMatchStandard.Regex);

            foreach (DataRow row in toProcess.Rows)
            {
                var val = row[ColumnNameToFind];

                //keep nulls, dbnulls or anything where ToString doesn't match the regex
                if (val != null && val != DBNull.Value && regex.IsMatch(val.ToString()))
                {
                    row[ColumnNameToFind] = DBNull.Value;
                    _changes++;
                }
            }

            listener.OnProgress(this, new ProgressEventArgs("SetNull Rows", new ProgressMeasurement(_changes, ProgressType.Records), _sw.Elapsed));

            _sw.Stop();
            return(toProcess);
        }
Exemplo n.º 16
0
        protected bool TryExtractLookupTable(BundledLookupTable lookup, DirectoryInfo lookupDir, IDataLoadEventListener job)
        {
            Stopwatch sw = new Stopwatch();

            sw.Start();

            job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to extract lookup " + lookup));

            try
            {
                TryExtractLookupTableImpl(lookup, lookupDir, _request.Configuration, job, out int linesWritten, out string destinationDescription);

                sw.Stop();
                job.OnProgress(this, new ProgressEventArgs("Lookup " + lookup, new ProgressMeasurement(linesWritten, ProgressType.Records), sw.Elapsed));

                //audit in the log the extraction
                var tableLoadInfo = _dataLoadInfo.CreateTableLoadInfo("", destinationDescription, new[] { new DataSource("SELECT * FROM " + lookup.TableInfo.Name, DateTime.Now) }, -1);
                tableLoadInfo.Inserts = linesWritten;
                tableLoadInfo.CloseAndArchive();

                //audit in cumulative extraction results (determines release-ability of artifacts).
                if (_request is ExtractDatasetCommand)
                {
                    var result             = (_request as ExtractDatasetCommand).CumulativeExtractionResults;
                    var supplementalResult = result.AddSupplementalExtractionResult("SELECT * FROM " + lookup.TableInfo.Name, lookup.TableInfo);
                    supplementalResult.CompleteAudit(this.GetType(), destinationDescription, linesWritten);
                }

                return(true);
            }
            catch (Exception e)
            {
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Error occurred trying to extract lookup " + lookup + " on server " + lookup.TableInfo.Server, e));

                return(false);
            }
        }
Exemplo n.º 17
0
        public virtual DataTable GetChunk(IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            // we are in the Global Commands case, let's return an empty DataTable (not null)
            // so we can trigger the destination to extract the globals docs and sql
            if (GlobalsRequest != null)
            {
                GlobalsRequest.ElevateState(ExtractCommandState.WaitingForSQLServer);
                if (firstGlobalChunk)
                {
                    //unless we are checking, start auditing
                    StartAuditGlobals();

                    firstGlobalChunk = false;
                    return(new DataTable(ExtractionDirectory.GLOBALS_DATA_NAME));
                }

                return(null);
            }

            if (Request == null)
            {
                throw new Exception("Component has not been initialized before being asked to GetChunk(s)");
            }

            Request.ElevateState(ExtractCommandState.WaitingForSQLServer);

            if (_cancel)
            {
                throw new Exception("User cancelled data extraction");
            }

            if (_hostedSource == null)
            {
                StartAudit(Request.QueryBuilder.SQL);

                if (Request.DatasetBundle.DataSet.DisableExtraction)
                {
                    throw new Exception("Cannot extract " + Request.DatasetBundle.DataSet + " because DisableExtraction is set to true");
                }

                _hostedSource = new DbDataCommandDataFlowSource(GetCommandSQL(listener),
                                                                "ExecuteDatasetExtraction " + Request.DatasetBundle.DataSet,
                                                                Request.GetDistinctLiveDatabaseServer().Builder,
                                                                ExecutionTimeout);

                // If we are running in batches then always allow empty extractions
                _hostedSource.AllowEmptyResultSets = AllowEmptyExtractions || Request.IsBatchResume;
                _hostedSource.BatchSize            = BatchSize;
            }

            DataTable chunk = null;

            try
            {
                chunk = _hostedSource.GetChunk(listener, cancellationToken);

                chunk = _peeker.AddPeekedRowsIfAny(chunk);

                //if we are trying to distinct the records in memory based on release id
                if (DistinctStrategy == DistinctStrategy.OrderByAndDistinctInMemory)
                {
                    var releaseIdentifierColumn = Request.ReleaseIdentifierSubstitutions.First().GetRuntimeName();

                    if (chunk != null)
                    {
                        //last release id in the current chunk
                        var lastReleaseId = chunk.Rows[chunk.Rows.Count - 1][releaseIdentifierColumn];

                        _peeker.AddWhile(_hostedSource, r => Equals(r[releaseIdentifierColumn], lastReleaseId), chunk);
                        chunk = MakeDistinct(chunk, listener, cancellationToken);
                    }
                }
            }
            catch (AggregateException a)
            {
                if (a.GetExceptionIfExists <TaskCanceledException>() != null)
                {
                    _cancel = true;
                }

                throw;
            }
            catch (Exception e)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Read from source failed", e));
            }

            if (cancellationToken.IsCancellationRequested)
            {
                throw new Exception("Data read cancelled because our cancellationToken was set, aborting data reading");
            }

            //if the first chunk is null
            if (firstChunk && chunk == null && !AllowEmptyExtractions)
            {
                throw new Exception("There is no data to load, query returned no rows, query was:" + Environment.NewLine +
                                    (_hostedSource.Sql ?? Request.QueryBuilder.SQL));
            }

            //not the first chunk anymore
            firstChunk = false;

            //data exhausted
            if (chunk == null)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Data exhausted after reading " + _rowsRead + " rows of data (" + UniqueReleaseIdentifiersEncountered.Count + " unique release identifiers seen)"));
                if (Request != null)
                {
                    Request.CumulativeExtractionResults.DistinctReleaseIdentifiersEncountered = Request.IsBatchResume ? -1 : UniqueReleaseIdentifiersEncountered.Count;
                }
                return(null);
            }

            _rowsRead += chunk.Rows.Count;
            //chunk will have datatypes for all the things in the buffer so we can populate our dictionary of facts about what columns/catalogue items have spontaneously changed name/type etc
            if (ExtractTimeTransformationsObserved == null)
            {
                GenerateExtractionTransformObservations(chunk);
            }


            //see if the SqlDataReader has a column with the same name as the ReleaseIdentifierSQL (if so then we can use it to count the number of distinct subjects written out to the csv)
            bool includesReleaseIdentifier = _extractionIdentifiersidx.Count > 0;


            //first line - lets see what columns we wrote out
            //looks at the buffer and computes any transforms performed on the column


            _timeSpentValidating.Start();
            //build up the validation report (Missing/Wrong/Etc) - this has no mechanical effect on the extracted data just some metadata that goes into a flat file
            if (ExtractionTimeValidator != null && Request.IncludeValidation)
            {
                try
                {
                    chunk.Columns.Add(ValidationColumnName);

                    ExtractionTimeValidator.Validate(chunk, ValidationColumnName);

                    _rowsValidated += chunk.Rows.Count;
                    listener.OnProgress(this, new ProgressEventArgs("Validation", new ProgressMeasurement(_rowsValidated, ProgressType.Records), _timeSpentValidating.Elapsed));
                }
                catch (Exception ex)
                {
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Could not validate data chunk", ex));
                    ValidationFailureException = ex;
                    ExtractionTimeValidator    = null;
                }
            }
            _timeSpentValidating.Stop();

            _timeSpentBuckettingDates.Start();
            if (ExtractionTimeTimeCoverageAggregator != null)
            {
                _rowsBucketted += chunk.Rows.Count;

                foreach (DataRow row in chunk.Rows)
                {
                    ExtractionTimeTimeCoverageAggregator.ProcessRow(row);
                }

                listener.OnProgress(this, new ProgressEventArgs("Bucketting Dates", new ProgressMeasurement(_rowsBucketted, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed));
            }
            _timeSpentBuckettingDates.Stop();

            _timeSpentCalculatingDISTINCT.Start();
            //record unique release identifiers found
            if (includesReleaseIdentifier)
            {
                foreach (string idx in _extractionIdentifiersidx)
                {
                    foreach (DataRow r in chunk.Rows)
                    {
                        if (r[idx] == DBNull.Value)
                        {
                            if (_extractionIdentifiersidx.Count == 1)
                            {
                                throw new Exception("Null release identifier found in extract of dataset " + Request.DatasetBundle.DataSet);
                            }
                            else
                            {
                                continue; //there are multiple extraction identifiers thats fine if one or two are null
                            }
                        }
                        if (!UniqueReleaseIdentifiersEncountered.Contains(r[idx]))
                        {
                            UniqueReleaseIdentifiersEncountered.Add(r[idx]);
                        }
                    }

                    listener.OnProgress(this, new ProgressEventArgs("Calculating Distinct Release Identifiers", new ProgressMeasurement(UniqueReleaseIdentifiersEncountered.Count, ProgressType.Records), _timeSpentCalculatingDISTINCT.Elapsed));
                }
            }
            _timeSpentCalculatingDISTINCT.Stop();

            return(chunk);
        }
Exemplo n.º 18
0
 private void UpdateProgressListeners()
 {
     _listener.OnProgress(this, new ProgressEventArgs("Processing Files", new ProgressMeasurement(_filesProcessedSoFar, ProgressType.Records), _stopwatch.Elapsed));
 }
Exemplo n.º 19
0
        public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            //Things we ignore, Lookups, SupportingSql etc
            if (_extractCommand == null)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Ignoring non dataset command "));
                return(toProcess);
            }

            //if it isn't a dicom dataset don't process it
            if (!toProcess.Columns.Contains(RelativeArchiveColumnName))
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Dataset " + _extractCommand.DatasetBundle.DataSet + " did not contain field '" + RelativeArchiveColumnName + "' so we will not attempt to extract images"));
                return(toProcess);
            }

            if (_putter == null)
            {
                _putter = (IPutDicomFilesInExtractionDirectories)  new ObjectConstructor().Construct(PutterType);
            }

            var projectNumber = _extractCommand.Configuration.Project.ProjectNumber.Value;

            var mappingServer        = new MappingRepository(UIDMappingServer);
            var destinationDirectory = new DirectoryInfo(Path.Combine(_extractCommand.GetExtractionDirectory().FullName, "Images"));

            var releaseCol = _extractCommand.QueryBuilder.SelectColumns.Select(c => c.IColumn).Single(c => c.IsExtractionIdentifier);

            // See: ftp://medical.nema.org/medical/dicom/2011/11_15pu.pdf

            var flags = DicomAnonymizer.SecurityProfileOptions.BasicProfile |
                        DicomAnonymizer.SecurityProfileOptions.CleanStructdCont |
                        DicomAnonymizer.SecurityProfileOptions.CleanDesc |
                        DicomAnonymizer.SecurityProfileOptions.RetainUIDs;

            if (RetainDates)
            {
                flags |= DicomAnonymizer.SecurityProfileOptions.RetainLongFullDates;
            }

            var profile = DicomAnonymizer.SecurityProfile.LoadProfile(null, flags);

            var anonymiser = new DicomAnonymizer(profile);

            using (var pool = new ZipPool())
            {
                _sw.Start();

                foreach (DataRow row in toProcess.Rows)
                {
                    if (_errors > 0 && _errors > ErrorThreshold)
                    {
                        throw new Exception($"Number of errors reported ({_errors}) reached the threshold ({ErrorThreshold})");
                    }

                    cancellationToken.ThrowIfAbortRequested();

                    var path = new AmbiguousFilePath(ArchiveRootIfAny, (string)row[RelativeArchiveColumnName]);

                    DicomFile dicomFile;

                    try
                    {
                        dicomFile = path.GetDataset(pool);
                    }
                    catch (Exception e)
                    {
                        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, $"Failed to get image at path '{path.FullPath}'", e));
                        _errors++;
                        continue;
                    }

                    //get the new patient ID
                    var releaseId = row[releaseCol.GetRuntimeName()].ToString();

                    DicomDataset ds;

                    try
                    {
                        ds = anonymiser.Anonymize(dicomFile.Dataset);
                    }
                    catch (Exception e)
                    {
                        listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, $"Failed to anonymize image at path '{path.FullPath}'", e));
                        _errors++;
                        continue;
                    }

                    //now we want to explicitly use our own release Id regardless of what FoDicom said
                    ds.AddOrUpdate(DicomTag.PatientID, releaseId);

                    //rewrite the UIDs
                    foreach (var kvp in UIDMapping.SupportedTags)
                    {
                        if (!ds.Contains(kvp.Key))
                        {
                            continue;
                        }

                        var value = ds.GetValue <string>(kvp.Key, 0);

                        //if it has a value for this UID
                        if (value == null)
                        {
                            continue;
                        }
                        var releaseValue = mappingServer.GetOrAllocateMapping(value, projectNumber, kvp.Value);

                        //change value in dataset
                        ds.AddOrUpdate(kvp.Key, releaseValue);

                        //and change value in DataTable
                        if (toProcess.Columns.Contains(kvp.Key.DictionaryEntry.Keyword))
                        {
                            row[kvp.Key.DictionaryEntry.Keyword] = releaseValue;
                        }
                    }

                    var newPath = _putter.WriteOutDataset(destinationDirectory, releaseId, ds);
                    row[RelativeArchiveColumnName] = newPath;

                    _anonymisedImagesCount++;

                    listener.OnProgress(this, new ProgressEventArgs("Writing ANO images", new ProgressMeasurement(_anonymisedImagesCount, ProgressType.Records), _sw.Elapsed));
                }

                _sw.Stop();
            }

            return(toProcess);
        }
 public void OnProgress(object sender, ProgressEventArgs e)
 {
     _child.OnProgress(_overridingSender, e);
 }
Exemplo n.º 21
0
        /// <summary>
        /// Generates the dita files and logs progress / errors to the <paramref name="listener"/>
        /// </summary>
        /// <param name="listener"></param>
        public void Extract(IDataLoadEventListener listener)
        {
            string xml = "";

            xml += @"<?xml version=""1.0"" encoding=""UTF-8""?>
<!DOCTYPE map PUBLIC ""-//OASIS//DTD DITA Map//EN""
""map.dtd"">" + Environment.NewLine;
            xml += "<map>" + Environment.NewLine;
            xml += "<title>HIC Data Catalogue</title>" + Environment.NewLine;

            xml += @"<topicmeta product=""hicdc"" rev=""1"">" + Environment.NewLine;
            xml += "<author>Wilfred Bonney; Thomas Nind; Mikhail Ghattas</author>" + Environment.NewLine;
            xml += "<publisher>Health Informatics Centre (HIC), University of Dundee</publisher>" + Environment.NewLine;
            xml += "</topicmeta>" + Environment.NewLine;


            xml += @"<topicref href=""introduction.dita""/>" + Environment.NewLine;
            GenerateIntroductionFile("introduction.dita");

            xml += @"<topicref href=""dataset.dita"">" + Environment.NewLine;
            GenerateDataSetFile("dataset.dita");

            xml += Environment.NewLine;

            //get all the catalogues then sort them alphabetically
            List <Catalogue> catas = new List <Catalogue>(_repository.GetAllObjects <Catalogue>().Where(c => !(c.IsDeprecated || c.IsInternalDataset || c.IsColdStorageDataset)));

            catas.Sort();

            Stopwatch sw = Stopwatch.StartNew();

            int cataloguesCompleted = 0;

            foreach (Catalogue c in catas)
            {
                listener.OnProgress(this, new ProgressEventArgs("Extracting", new ProgressMeasurement(cataloguesCompleted++, ProgressType.Records, catas.Count), sw.Elapsed));

                //ensure that it has an acryonym
                if (string.IsNullOrWhiteSpace(c.Acronym))
                {
                    throw new Exception("Dita Extraction requires that each catalogue have a unique Acronym, the catalogue " + c.Name + " is missing an Acronym");
                }

                if (c.Name.Contains("\\") || c.Name.Contains("/"))
                {
                    throw new Exception("Dita Extractor does not support catalogues with backslashes or forward slashs in their name");
                }

                //catalogue main file
                xml += "<topicref href=\"" + GetFileNameForCatalogue(c) + "\">" + Environment.NewLine;
                CreateCatalogueFile(c);

                //catalogue items
                List <CatalogueItem> cataItems = c.CatalogueItems.ToList();
                cataItems.Sort();

                foreach (CatalogueItem ci in cataItems)
                {
                    xml += "<topicref href=\"" + GetFileNameForCatalogueItem(c, ci) + "\"/>" + Environment.NewLine;
                    CreateCatalogueItemFile(c, ci);
                }
                xml += "</topicref>" + Environment.NewLine;

                //completed - mostly for end of loop tbh
                listener.OnProgress(this, new ProgressEventArgs("Extracting", new ProgressMeasurement(cataloguesCompleted, ProgressType.Records, catas.Count), sw.Elapsed));
            }

            xml += Environment.NewLine;
            xml += @"</topicref>" + Environment.NewLine;
            xml += "</map>";

            File.WriteAllText(Path.Combine(_folderToCreateIn.FullName, "hic_data_catalogue.ditamap"), xml);
        }
Exemplo n.º 22
0
        protected virtual void Download(string file, ILoadDirectory destination, IDataLoadEventListener job)
        {
            Stopwatch s = new Stopwatch();

            s.Start();

            string uri;

            if (!string.IsNullOrWhiteSpace(RemoteDirectory))
            {
                uri = "ftp://" + _host + "/" + RemoteDirectory + "/" + file;
            }
            else
            {
                uri = "ftp://" + _host + "/" + file;
            }

            if (_useSSL)
            {
                uri = "s" + uri;
            }

            Uri serverUri = new Uri(uri);

            if (serverUri.Scheme != Uri.UriSchemeFtp)
            {
                return;
            }

            FtpWebRequest reqFTP;

            reqFTP             = (FtpWebRequest)FtpWebRequest.Create(new Uri(uri));
            reqFTP.Credentials = new NetworkCredential(_username, _password);
            reqFTP.KeepAlive   = false;
            reqFTP.Method      = WebRequestMethods.Ftp.DownloadFile;
            reqFTP.UseBinary   = true;
            reqFTP.Proxy       = null;
            reqFTP.UsePassive  = true;
            reqFTP.EnableSsl   = _useSSL;
            reqFTP.Timeout     = TimeoutInSeconds * 1000;

            FtpWebResponse response            = (FtpWebResponse)reqFTP.GetResponse();
            Stream         responseStream      = response.GetResponseStream();
            string         destinationFileName = Path.Combine(destination.ForLoading.FullName, file);

            using (FileStream writeStream = new FileStream(destinationFileName, FileMode.Create))
            {
                int    Length              = 2048;
                Byte[] buffer              = new Byte[Length];
                int    bytesRead           = responseStream.Read(buffer, 0, Length);
                int    totalBytesReadSoFar = bytesRead;

                while (bytesRead > 0)
                {
                    writeStream.Write(buffer, 0, bytesRead);
                    bytesRead = responseStream.Read(buffer, 0, Length);


                    //notify whoever is listening of how far along the process we are
                    totalBytesReadSoFar += bytesRead;
                    job.OnProgress(this, new ProgressEventArgs(destinationFileName, new ProgressMeasurement(totalBytesReadSoFar / 1024, ProgressType.Kilobytes), s.Elapsed));
                }
                writeStream.Close();
            }

            response.Close();

            _filesRetrieved.Add(serverUri.ToString());
            s.Stop();
        }
Exemplo n.º 23
0
        public DataTable GetChunk(IDataLoadEventListener job, GracefulCancellationToken cancellationToken)
        {
            if (_reader == null)
            {
                _con = DatabaseCommandHelper.GetConnection(_builder);
                _con.Open();

                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Running SQL:" + Environment.NewLine + Sql));

                cmd = DatabaseCommandHelper.GetCommand(Sql, _con);
                cmd.CommandTimeout = _timeout;
                CommandAdjuster?.Invoke(cmd);

                _reader          = cmd.ExecuteReaderAsync(cancellationToken.AbortToken).Result;
                _numberOfColumns = _reader.FieldCount;
            }

            int readThisBatch = 0;

            timer.Start();
            try
            {
                DataTable chunk = GetChunkSchema(_reader);

                while (_reader.Read())
                {
                    AddRowToDataTable(chunk, _reader);
                    readThisBatch++;

                    //we reached batch limit
                    if (readThisBatch == BatchSize)
                    {
                        return(chunk);
                    }
                }

                //if data was read
                if (readThisBatch > 0)
                {
                    return(chunk);
                }

                //data is exhausted

                //if data was exhausted on first read and we are allowing empty result sets
                if (firstChunk && AllowEmptyResultSets)
                {
                    return(chunk);//return the empty chunk
                }
                //data exhausted
                return(null);
            }
            catch (Exception e)
            {
                job.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Source read failed", e));
                throw;
            }
            finally
            {
                firstChunk = false;
                timer.Stop();
                job.OnProgress(this, new ProgressEventArgs(_taskBeingPerformed, new ProgressMeasurement(TotalRowsRead, ProgressType.Records), timer.Elapsed));
            }
        }
Exemplo n.º 24
0
 public void OnProgress(object sender, ProgressEventArgs e)
 {
     _listener.OnProgress(sender, e);
 }
Exemplo n.º 25
0
        /// <inheritdoc/>
        public void ReverseAnonymiseDataTable(DataTable toProcess, IDataLoadEventListener listener, bool allowCaching)
        {
            int haveWarnedAboutTop1AlreadyCount = 10;

            var syntax = ExternalCohortTable.GetQuerySyntaxHelper();

            string privateIdentifier = syntax.GetRuntimeName(GetPrivateIdentifier());
            string releaseIdentifier = syntax.GetRuntimeName(GetReleaseIdentifier());

            //if we don't want to support caching or there is no cached value yet
            if (!allowCaching || _releaseToPrivateKeyDictionary == null)
            {
                DataTable map = FetchEntireCohort();


                Stopwatch sw = new Stopwatch();
                sw.Start();
                //dictionary of released values (for the cohort) back to private values
                _releaseToPrivateKeyDictionary = new Dictionary <string, string>();
                foreach (DataRow r in map.Rows)
                {
                    if (_releaseToPrivateKeyDictionary.Keys.Contains(r[releaseIdentifier]))
                    {
                        if (haveWarnedAboutTop1AlreadyCount > 0)
                        {
                            haveWarnedAboutTop1AlreadyCount--;
                            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Top 1-ing will occur for release identifier " + r[releaseIdentifier] + " because it maps to multiple private identifiers"));
                        }
                        else
                        {
                            if (haveWarnedAboutTop1AlreadyCount == 0)
                            {
                                haveWarnedAboutTop1AlreadyCount = -1;
                                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Top 1-ing error message disabled due to flood of messages"));
                            }
                        }
                    }
                    else
                    {
                        _releaseToPrivateKeyDictionary.Add(r[releaseIdentifier].ToString().Trim(), r[privateIdentifier].ToString().Trim());
                    }

                    _reverseAnonymiseProgressFetchingMap++;

                    if (_reverseAnonymiseProgressFetchingMap % 500 == 0)
                    {
                        listener.OnProgress(this, new ProgressEventArgs("Assembling Release Map Dictionary", new ProgressMeasurement(_reverseAnonymiseProgressFetchingMap, ProgressType.Records), sw.Elapsed));
                    }
                }

                listener.OnProgress(this, new ProgressEventArgs("Assembling Release Map Dictionary", new ProgressMeasurement(_reverseAnonymiseProgressFetchingMap, ProgressType.Records), sw.Elapsed));
            }
            int nullsFound    = 0;
            int substitutions = 0;

            Stopwatch sw2 = new Stopwatch();

            sw2.Start();

            //fix values
            foreach (DataRow row in toProcess.Rows)
            {
                try
                {
                    object value = row[releaseIdentifier];

                    if (value == null || value == DBNull.Value)
                    {
                        nullsFound++;
                        continue;
                    }

                    row[releaseIdentifier] = _releaseToPrivateKeyDictionary[value.ToString().Trim()].Trim();//swap release value for private value (reversing the anonymisation)
                    substitutions++;

                    _reverseAnonymiseProgressReversing++;

                    if (_reverseAnonymiseProgressReversing % 500 == 0)
                    {
                        listener.OnProgress(this, new ProgressEventArgs("Substituting Release Identifiers For Private Identifiers", new ProgressMeasurement(_reverseAnonymiseProgressReversing, ProgressType.Records), sw2.Elapsed));
                    }
                }
                catch (KeyNotFoundException e)
                {
                    throw new Exception("Could not find private identifier (" + privateIdentifier + ") for the release identifier (" + releaseIdentifier + ") with value '" + row[releaseIdentifier] + "' in cohort with cohortDefinitionID " + OriginID, e);
                }
            }

            //final value
            listener.OnProgress(this, new ProgressEventArgs("Substituting Release Identifiers For Private Identifiers", new ProgressMeasurement(_reverseAnonymiseProgressReversing, ProgressType.Records), sw2.Elapsed));

            if (nullsFound > 0)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "Found " + nullsFound + " null release identifiers amongst the " + toProcess.Rows.Count + " rows of the input data table (on which we were attempting to reverse annonymise)"));
            }

            listener.OnNotify(this, new NotifyEventArgs(substitutions > 0?ProgressEventType.Information : ProgressEventType.Error, "Substituted " + substitutions + " release identifiers for private identifiers in input data table (input data table contained " + toProcess.Rows.Count + " rows)"));

            toProcess.Columns[releaseIdentifier].ColumnName = privateIdentifier;
        }
        private ExtractCommandState ExtractSupportingSql(SupportingSQLTable sql, IDataLoadEventListener listener, DataLoadInfo dataLoadInfo)
        {
            try
            {
                var tempDestination = new DataTableUploadDestination();

                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "About to download SQL for global SupportingSQL " + sql.Name));
                using (var con = sql.GetServer().GetConnection())
                {
                    con.Open();
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Connection opened successfully, about to send SQL command " + sql.SQL));
                    var cmd = DatabaseCommandHelper.GetCommand(sql.SQL, con);
                    var da  = DatabaseCommandHelper.GetDataAdapter(cmd);

                    var sw = new Stopwatch();

                    sw.Start();
                    DataTable dt = new DataTable();
                    da.Fill(dt);

                    dt.TableName = GetTableName(_destinationDatabase.Server.GetQuerySyntaxHelper().GetSensibleTableNameFromString(sql.Name));

                    var tableLoadInfo = dataLoadInfo.CreateTableLoadInfo("", dt.TableName, new[] { new DataSource(sql.SQL, DateTime.Now) }, -1);
                    tableLoadInfo.Inserts = dt.Rows.Count;

                    listener.OnProgress(this, new ProgressEventArgs("Reading from SupportingSQL " + sql.Name, new ProgressMeasurement(dt.Rows.Count, ProgressType.Records), sw.Elapsed));
                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Decided on the following destination table name for SupportingSQL: " + dt.TableName));

                    tempDestination.AllowResizingColumnsAtUploadTime = true;
                    tempDestination.PreInitialize(GetDestinationDatabase(listener), listener);
                    tempDestination.ProcessPipelineData(dt, listener, new GracefulCancellationToken());
                    tempDestination.Dispose(listener, null);

                    //end auditing it
                    tableLoadInfo.CloseAndArchive();

                    if (_request is ExtractDatasetCommand)
                    {
                        var result             = (_request as ExtractDatasetCommand).CumulativeExtractionResults;
                        var supplementalResult = result.AddSupplementalExtractionResult(sql.SQL, sql);
                        supplementalResult.CompleteAudit(this.GetType(), TargetDatabaseServer.ID + "|" + GetDatabaseName() + "|" + dt.TableName, dt.Rows.Count);
                    }
                    else
                    {
                        var extractGlobalsCommand = (_request as ExtractGlobalsCommand);
                        Debug.Assert(extractGlobalsCommand != null, "extractGlobalsCommand != null");
                        var result =
                            new SupplementalExtractionResults(extractGlobalsCommand.RepositoryLocator.DataExportRepository,
                                                              extractGlobalsCommand.Configuration,
                                                              sql.SQL,
                                                              sql);
                        result.CompleteAudit(this.GetType(), TargetDatabaseServer.ID + "|" + GetDatabaseName() + "|" + dt.TableName, dt.Rows.Count);
                        extractGlobalsCommand.ExtractionResults.Add(result);
                    }
                }
            }
            catch (Exception e)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Extraction of SupportingSQL " + sql + " failed ", e));
                return(ExtractCommandState.Crashed);
            }

            return(ExtractCommandState.Completed);
        }
Exemplo n.º 27
0
        public FileInfo GenerateWordFile(IDataLoadEventListener listener, bool showFile)
        {
            try
            {
                //if there's only one catalogue call it 'prescribing.docx' etc
                string filename = _args.Catalogues.Length == 1 ? _args.Catalogues[0].Name : "MetadataReport";

                using (var document = GetNewDocFile(filename))
                {
                    PageWidthInPixels = GetPageWidth();

                    var sw = Stopwatch.StartNew();

                    try
                    {
                        int completed = 0;


                        foreach (Catalogue c in _args.Catalogues.OrderBy(c => c.Name))
                        {
                            listener.OnProgress(this, new ProgressEventArgs("Extracting", new ProgressMeasurement(completed++, ProgressType.Records, _args.Catalogues.Length), sw.Elapsed));

                            int    recordCount         = -1;
                            int    distinctRecordCount = -1;
                            string identifierName      = null;

                            bool gotRecordCount = false;
                            try
                            {
                                if (_args.IncludeRowCounts)
                                {
                                    GetRecordCount(c, out recordCount, out distinctRecordCount, out identifierName);
                                    gotRecordCount = true;
                                }
                            }
                            catch (Exception e)
                            {
                                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Error processing record count for Catalogue " + c.Name, e));
                            }

                            InsertHeader(document, c.Name);

                            //assume we don't know the age of the dataset
                            DateTime?accurateAsOf = null;

                            //get the age of the dataset if known and output it
                            if (_args.TimespanCalculator != null)
                            {
                                string timespan = _args.TimespanCalculator.GetHumanReadableTimespanIfKnownOf(c, true, out accurateAsOf);
                                if (!string.IsNullOrWhiteSpace(timespan) && !timespan.Equals("Unknown"))
                                {
                                    InsertParagraph(document, timespan + (accurateAsOf.HasValue ? "*" :""), TextFontSize);
                                }
                            }

                            InsertParagraph(document, c.Description, TextFontSize);

                            if (accurateAsOf.HasValue)
                            {
                                InsertParagraph(document, "* Based on DQE run on " + accurateAsOf.Value, TextFontSize - 2);
                            }

                            if (gotRecordCount)
                            {
                                InsertHeader(document, "Record Count", 3);
                                CreateCountTable(document, recordCount, distinctRecordCount, identifierName);
                            }

                            if (!_args.SkipImages && RequestCatalogueImages != null)
                            {
                                BitmapWithDescription[] onRequestCatalogueImages = RequestCatalogueImages(c);

                                if (onRequestCatalogueImages.Any())
                                {
                                    InsertHeader(document, "Aggregates", 2);
                                    AddImages(document, onRequestCatalogueImages);
                                }
                            }

                            CreateDescriptionsTable(document, c);

                            if (_args.IncludeNonExtractableItems)
                            {
                                CreateNonExtractableColumnsTable(document, c);
                            }

                            //if this is not the last Catalogue create a new page
                            if (completed != _args.Catalogues.Length)
                            {
                                InsertSectionPageBreak(document);
                            }

                            listener.OnProgress(this, new ProgressEventArgs("Extracting", new ProgressMeasurement(completed, ProgressType.Records, _args.Catalogues.Length), sw.Elapsed));
                        }

                        if (LookupsEncounteredToAppearInAppendix.Any())
                        {
                            CreateLookupAppendix(document, listener);
                        }

                        if (showFile)
                        {
                            ShowFile(document);
                        }

                        SetMargins(document, 20);

                        AddFooter(document, "Created on " + DateTime.Now, TextFontSize);

                        return(document.FileInfo);
                    }
                    catch (ThreadInterruptedException)
                    {
                        //user hit abort
                    }
                }
            }
            catch (Exception e)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Entire process failed, see Exception for details", e));
            }

            return(null);
        }
Exemplo n.º 28
0
        public override SMIDataChunk DoGetChunk(ICacheFetchRequest cacheRequest, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
        {
            #region assigns
            var dicomConfiguration  = GetConfiguration();
            var requestSender       = new DicomRequestSender(dicomConfiguration, listener);
            var dateFrom            = Request.Start;
            var dateTo              = Request.End;
            var hasTransferTimedOut = false;
            CachingSCP.LocalAet = LocalAETitle;
            CachingSCP.Listener = listener;

            if (PatientIdWhitelistColumnInfo != null && !IgnoreWhiteList)
            {
                GetWhitelist(listener);
            }


            //temp dir
            var cacheDir    = new LoadDirectory(Request.CacheProgress.LoadProgress.LoadMetadata.LocationOfFlatFiles).Cache;
            var cacheLayout = new SMICacheLayout(cacheDir, new SMICachePathResolver(Modality));


            Chunk = new SMIDataChunk(Request)
            {
                FetchDate = dateFrom,
                Modality  = Modality,
                Layout    = cacheLayout
            };
            //            IOrder order = new ItemsBasedOrder(dateFrom, dateTo, PlacementMode.PlaceThenFill,OrderLevel, listener);
            IOrder  order                = new HierarchyBasedOrder(dateFrom, dateTo, PlacementMode.PlaceThenFill, OrderLevel, listener);
            IPicker picker               = null;
            var     pickerFilled         = false;
            var     transferTimeOutTimer = new Timer(dicomConfiguration.TransferTimeOutInMilliseconds);
            transferTimeOutTimer.Elapsed += (source, eventArgs) =>
            {
                hasTransferTimedOut = true;
                listener.OnNotify(this,
                                  new NotifyEventArgs(ProgressEventType.Information, "Transfer Timeout Exception Generated"));
                throw new TimeoutException("Transfer Timeout Exception");
            };

            CachingSCP.OnEndProcessingCStoreRequest = (storeRequest, storeResponse) =>
            {
                var item = new Item(storeRequest);
                transferTimeOutTimer.Reset();
                if (picker != null)
                {
                    picker.Fill(item);
                    pickerFilled = picker.IsFilled();
                }
                SaveSopInstance(storeRequest, cacheLayout, listener);
                listener.OnNotify(this,
                                  new NotifyEventArgs(ProgressEventType.Debug,
                                                      "Stored sopInstance" + storeRequest.SOPInstanceUID.UID));
            };
            #endregion

            //helps with tyding up resources if we abort or through an exception and neatly avoids ->  Access to disposed closure
            using (var server = (DicomServer <CachingSCP>)DicomServer.Create <CachingSCP>(dicomConfiguration.LocalAetUri.Port))
            {
                try
                {
                    // Find a list of studies
                    #region Query

                    listener.OnNotify(this,
                                      new NotifyEventArgs(ProgressEventType.Information,
                                                          "Requesting Studies from " + dateFrom + " to " + dateTo));
                    var studyUids = new List <string>();
                    var request   = CreateStudyRequestByDateRangeForModality(dateFrom, dateTo, Modality);
                    request.OnResponseReceived += (req, response) =>
                    {
                        if (Filter(_whitelist, response))
                        {
                            studyUids.Add(response.Dataset.GetSingleValue <string>(DicomTag.StudyInstanceUID));
                        }
                    };
                    requestSender.ThrottleRequest(request, cancellationToken);
                    listener.OnNotify(this,
                                      new NotifyEventArgs(ProgressEventType.Debug,
                                                          "Total filtered studies for " + dateFrom + " to " + dateTo + "is " + studyUids.Count));
                    foreach (var studyUid in studyUids)
                    {
                        listener.OnNotify(this,
                                          new NotifyEventArgs(ProgressEventType.Debug,
                                                              "Sending series query for study" + studyUid));
                        var seriesUids = new List <string>();
                        request = CreateSeriesRequestByStudyUid(studyUid);
                        request.OnResponseReceived += (req, response) =>
                        {
                            if (response.Dataset == null)
                            {
                                return;
                            }
                            var seriesInstanceUID = response.Dataset.GetSingleValue <string>(DicomTag.SeriesInstanceUID);
                            if (seriesInstanceUID != null)
                            {
                                seriesUids.Add(seriesInstanceUID);
                            }
                        };
                        requestSender.ThrottleRequest(request, cancellationToken);
                        listener.OnNotify(this,
                                          new NotifyEventArgs(ProgressEventType.Debug,
                                                              "Total series for " + studyUid + "is " + seriesUids.Count));
                        foreach (var seriesUid in seriesUids)
                        {
                            listener.OnNotify(this,
                                              new NotifyEventArgs(ProgressEventType.Debug,
                                                                  "Sending image query for series" + seriesUid));
                            request = CreateSopRequestBySeriesUid(seriesUid);
                            int imageCount = 0;
                            request.OnResponseReceived += (req, response) =>
                            {
                                if (response.Dataset == null)
                                {
                                    return;
                                }

                                var sopUid    = response.Dataset.GetSingleValue <string>(DicomTag.SOPInstanceUID);
                                var patientId = response.Dataset.GetSingleValue <string>(DicomTag.PatientID);

                                if (sopUid != null && patientId != null)
                                {
                                    //Place order
                                    order.Place(patientId, studyUid, seriesUid, sopUid);
                                    imageCount++;
                                }
                            };
                            requestSender.ThrottleRequest(request, cancellationToken);
                            listener.OnNotify(this,
                                              new NotifyEventArgs(ProgressEventType.Debug,
                                                                  "Successfully finished image query for " + seriesUid + " Toal images in series = " + imageCount));
                        }
                        listener.OnNotify(this,
                                          new NotifyEventArgs(ProgressEventType.Debug,
                                                              "Successfully finished series query for " + studyUid));
                    }
                    listener.OnNotify(this,
                                      new NotifyEventArgs(ProgressEventType.Debug,
                                                          "Successfully finished query phase"));

                    #endregion
                    //go and get them
                    #region Retrieval

                    var transferStopwatch = new Stopwatch();
                    //start building request to fill orders
                    //get the picker - the for loop avoids sleeping after all the transfers have finished and attempting dequeue on empty queue
                    for (int delay = 0, transferTimerPollingPeriods;
                         order.HasNextPicker() && !hasTransferTimedOut;
                         delay = (int)(dicomConfiguration.TransferDelayFactor * transferTimerPollingPeriods * dicomConfiguration.TransferPollingInMilliseconds)
                                 + dicomConfiguration.TransferCooldownInMilliseconds
                         )
                    {
                        transferStopwatch.Restart();
                        //delay value in mills
                        if (delay != 0)
                        {
                            listener.OnNotify(this,
                                              new NotifyEventArgs(ProgressEventType.Information,
                                                                  "Transfers sleeping for " + delay / 1000 + "seconds"));
                            Task.Delay(delay, cancellationToken.AbortToken).Wait(cancellationToken.AbortToken);
                        }

                        //set this here prior to request
                        pickerFilled = false;
                        transferTimerPollingPeriods = 0;
                        //get next picker
                        picker = order.NextPicker();
                        //  A CMove will be performed if the storescp exists and this storescp is known to the QRSCP:
                        var cMoveRequest = picker.GetDicomCMoveRequest(LocalAETitle);

                        /* this won't work which means we cannot enforce (low) priority
                         * cMoveRequest.Priority=DicomPriority.Low;*/

                        cMoveRequest.OnResponseReceived += (requ, response) =>
                        {
                            if (response.Status.State == DicomState.Pending)
                            {
                                listener.OnNotify(this,
                                                  new NotifyEventArgs(ProgressEventType.Debug,
                                                                      "Request: " + requ.ToString() + "items remaining: " + response.Remaining));
                            }
                            else if (response.Status.State == DicomState.Success)
                            {
                                listener.OnNotify(this,
                                                  new NotifyEventArgs(ProgressEventType.Debug,
                                                                      "Request: " + requ.ToString() + "completed successfully"));
                            }
                            else if (response.Status.State == DicomState.Failure)
                            {
                                listener.OnNotify(this,
                                                  new NotifyEventArgs(ProgressEventType.Debug,
                                                                      "Request: " + requ.ToString() + "failed to download: " + response.Failures));
                            }
                        };

                        listener.OnProgress(this,
                                            new ProgressEventArgs(CMoveRequestToString(cMoveRequest),
                                                                  new ProgressMeasurement(picker.Filled(), ProgressType.Records, picker.Total()),
                                                                  transferStopwatch.Elapsed));
                        //do not use requestSender.ThrottleRequest(cMoveRequest, cancellationToken);
                        //TODO is there any need to throtttle this request given its lifetime
                        DicomClient client = new DicomClient();
                        requestSender.ThrottleRequest(cMoveRequest, client, cancellationToken);
                        transferTimeOutTimer.Reset();
                        while (!pickerFilled && !hasTransferTimedOut)
                        {
                            Task.Delay(dicomConfiguration.TransferPollingInMilliseconds, cancellationToken.AbortToken)
                            .Wait(cancellationToken.AbortToken);
                            transferTimerPollingPeriods++;
                        }
                        transferTimeOutTimer.Stop();
                        client.Release();
                        listener.OnProgress(this,
                                            new ProgressEventArgs(CMoveRequestToString(cMoveRequest),
                                                                  new ProgressMeasurement(picker.Filled(), ProgressType.Records, picker.Total()),
                                                                  transferStopwatch.Elapsed));
                    }

                    #endregion
                }
                finally
                {
                    server.Stop();
                }
            }
            return(Chunk);
        }
Exemplo n.º 29
0
        public async void SendToAllRemotes <T>(T[] toSendAll, Action callback = null) where  T : IMapsDirectlyToDatabaseTable
        {
            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Ready to send " + toSendAll.Length + " " + typeof(T).Name + " items to all remotes."));
            var done = new Dictionary <string, int>();

            foreach (var remoteRDMP in remotes)
            {
                listener.OnProgress(this, new ProgressEventArgs(remoteRDMP.Name, new ProgressMeasurement(0, ProgressType.Records, toSendAll.Length), new TimeSpan()));
            }

            var tasks = new List <Task>();

            foreach (var remote in remotes)
            {
                done.Add(remote.Name, 0);

                foreach (var toSend in toSendAll)
                {
                    if (!_gatherer.CanGatherDependencies(toSend))
                    {
                        throw new Exception("Type " + typeof(T) + " is not supported yet by Gatherer and therefore cannot be shared");
                    }

                    var share = _gatherer.GatherDependencies(toSend).ToShareDefinitionWithChildren(_shareManager);
                    var json  = JsonConvertExtensions.SerializeObject(share, _repositoryLocator);

                    var handler = new HttpClientHandler()
                    {
                        Credentials = new NetworkCredential(remote.Username, remote.GetDecryptedPassword())
                    };

                    HttpResponseMessage result;

                    var apiUrl = remote.GetUrlFor <T>();

                    RemoteRDMP remote1 = remote;
                    T          toSend1 = toSend;

                    var sender = new Task(() =>
                    {
                        using (var client = new HttpClient(handler))
                        {
                            try
                            {
                                result = client.PostAsync(new Uri(apiUrl), new StringContent(json, Encoding.UTF8, "text/plain")).Result;
                                if (result.IsSuccessStatusCode)
                                {
                                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Sending " + toSend1 + " to " + remote1.Name + " completed."));
                                }
                                else
                                {
                                    listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error,
                                                                                "Error sending " + toSend1 + " to " + remote1.Name + ": " +
                                                                                result.ReasonPhrase + " - " +
                                                                                result.Content.ReadAsStringAsync().Result));
                                }
                                lock (done)
                                {
                                    listener.OnProgress(this, new ProgressEventArgs(remote1.Name, new ProgressMeasurement(++done[remote1.Name], ProgressType.Records, toSendAll.Length), new TimeSpan()));
                                }
                            }
                            catch (Exception ex)
                            {
                                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Error sending " + toSend1 + " to " + remote1.Name, ex));
                                listener.OnProgress(this, new ProgressEventArgs(remote1.Name, new ProgressMeasurement(1, ProgressType.Records, 1), new TimeSpan()));
                            }
                        }
                    });
                    sender.Start();
                    tasks.Add(sender);
                }
            }

            await Task.WhenAll(tasks);

            if (callback != null)
            {
                callback();
            }
        }