public void BuildRecordList_WithData() { var testData = new Dictionary <string, List <string> > { { "foo", new List <string> { "2.dcm", "1.dcm", } }, { "bar", new List <string> { "3.dcm", "2.dcm", "1.dcm", } }, }; var expected = new List <TagDataFullCsvRecord> { new TagDataFullCsvRecord("ScanOptions", "bar", "1.dcm"), new TagDataFullCsvRecord("ScanOptions", "bar", "2.dcm"), new TagDataFullCsvRecord("ScanOptions", "bar", "3.dcm"), new TagDataFullCsvRecord("ScanOptions", "foo", "1.dcm"), new TagDataFullCsvRecord("ScanOptions", "foo", "2.dcm"), }; List <TagDataFullCsvRecord> actual = TagDataFullCsvRecord.BuildRecordList("ScanOptions", testData).ToList(); Assert.AreEqual(expected, actual); }
public void Constructor_ThrowsArgumentException_OnInvalidArgs(string tagName, string failureValue, string filePath) { Assert.Throws <ArgumentException>(() => { var _ = new TagDataFullCsvRecord(tagName, failureValue, filePath); }); }
public void BuildRecordList_Empty() { IEnumerable <TagDataFullCsvRecord> records = TagDataFullCsvRecord.BuildRecordList("foo", new Dictionary <string, List <string> >()); Assert.AreEqual(Enumerable.Empty <TagDataFullCsvRecord>(), records); }
/// <summary> /// Writes each part of the report content separately by calling the relevant GetStreamForX methods in turn /// </summary> /// <param name="jobInfo"></param> private void WriteSplitReport(CompletedExtractJobInfo jobInfo) { // TODO(rkm 2020-10-29) We can probably reduce the number of full collection enumerations in this method using (Stream stream = GetStreamForSummary(jobInfo)) { using StreamWriter streamWriter = GetStreamWriter(stream); foreach (string line in JobHeader(jobInfo)) { streamWriter.WriteLine(line); } streamWriter.WriteLine(); streamWriter.WriteLine("Files included:"); streamWriter.WriteLine("- README.md (this file)"); streamWriter.WriteLine("- pixel_data_summary.csv"); streamWriter.WriteLine("- pixel_data_full.csv"); streamWriter.WriteLine("- pixel_data_word_length_frequencies.csv"); streamWriter.WriteLine("- tag_data_summary.csv"); streamWriter.WriteLine("- tag_data_full.csv"); streamWriter.WriteLine(); streamWriter.WriteLine("This file contents:"); streamWriter.WriteLine("- Blocked files"); streamWriter.WriteLine("- Anonymisation failures"); streamWriter.WriteLine(); streamWriter.WriteLine("## Blocked files"); streamWriter.WriteLine(); IOrderedEnumerable <ExtractionIdentifierRejectionInfo> orderedRejections = _jobStore .GetCompletedJobRejections(jobInfo.ExtractionJobIdentifier) .OrderByDescending(x => x.RejectionItems.Sum(y => y.Value)); foreach (ExtractionIdentifierRejectionInfo extractionIdentifierRejectionInfo in orderedRejections) { WriteJobRejections(streamWriter, extractionIdentifierRejectionInfo); } streamWriter.WriteLine(); streamWriter.WriteLine("## Anonymisation failures"); streamWriter.WriteLine(); foreach (FileAnonFailureInfo fileAnonFailureInfo in _jobStore.GetCompletedJobAnonymisationFailures( jobInfo.ExtractionJobIdentifier)) { WriteAnonFailure(streamWriter, fileAnonFailureInfo); } streamWriter.WriteLine(); streamWriter.WriteLine("--- end of report ---"); streamWriter.Flush(); FinishReportPart(stream); } // Local helper function to write each CSV void WriteCsv <T>(Stream stream, IEnumerable <T> records) where T : IExtractionReportCsvRecord { using StreamWriter streamWriter = GetStreamWriter(stream); using var csvWriter = new CsvWriter(streamWriter, _csvConfiguration); csvWriter.WriteHeader <T>(); csvWriter.NextRecord(); csvWriter.WriteRecords(records); streamWriter.Flush(); FinishReportPart(stream); } // All validation failures for this job Dictionary <string, Dictionary <string, List <string> > > groupedFailures = GetJobVerificationFailures(jobInfo.ExtractionJobIdentifier); // First deal with the pixel data Dictionary <string, List <string> > pixelFailures = groupedFailures.GetValueOrDefault(PixelDataStr); if (pixelFailures == null) { Logger.Info($"No {PixelDataStr} failures found for the extraction job"); pixelFailures = new Dictionary <string, List <string> >(); } // Create records for the pixel reports List <TagDataSummaryCsvRecord> pixelSummaryRecords = TagDataSummaryCsvRecord.BuildRecordList(PixelDataStr, pixelFailures).ToList(); var wordLengthCounts = new Dictionary <uint, uint>(); foreach (TagDataSummaryCsvRecord tagDataSummaryCsvRecord in pixelSummaryRecords) { var wordLen = (uint)tagDataSummaryCsvRecord.FailureValue.Length; if (!wordLengthCounts.ContainsKey(wordLen)) { wordLengthCounts.Add(wordLen, 0); } wordLengthCounts[wordLen] += (uint)tagDataSummaryCsvRecord.Occurrences; tagDataSummaryCsvRecord.RelativeFrequencyInReport = tagDataSummaryCsvRecord.RelativeFrequencyInTag; } // Write summary pixel CSV using (Stream stream = GetStreamForPixelDataSummary(jobInfo)) WriteCsv( stream, pixelSummaryRecords .OrderByDescending(x => x.FailureValue.Length) .ThenByDescending(x => x.Occurrences) ); // Write full pixel CSV using (Stream stream = GetStreamForPixelDataFull(jobInfo)) WriteCsv( stream, TagDataFullCsvRecord .BuildRecordList(PixelDataStr, pixelFailures) .OrderByDescending(x => x.FailureValue.Length) ); // Write the pixel text frequency file using (Stream stream = GetStreamForPixelDataWordLengthFrequencies(jobInfo)) WriteCsv( stream, TagDataFrequencyRecord.BuildRecordList(wordLengthCounts) ); // Now select all other tags Dictionary <string, Dictionary <string, List <string> > > otherTagFailures = groupedFailures .Where(x => x.Key != PixelDataStr) .ToDictionary(x => x.Key, x => x.Value); // Write the summary CSV for all other tags. Before doing so, we need to convert into records and calculate the relative frequencies var summaryRecordsByTag = new List <List <TagDataSummaryCsvRecord> >(); var totalOccurrencesByValue = new Dictionary <string, uint>(); foreach ((string tagName, Dictionary <string, List <string> > failures) in otherTagFailures) { List <TagDataSummaryCsvRecord> record = TagDataSummaryCsvRecord.BuildRecordList(tagName, failures).ToList(); summaryRecordsByTag.Add(record); foreach (TagDataSummaryCsvRecord r in record) { if (!totalOccurrencesByValue.ContainsKey(r.FailureValue)) { totalOccurrencesByValue[r.FailureValue] = 0; } totalOccurrencesByValue[r.FailureValue] += r.Occurrences; } } var totalFailureValues = (uint)summaryRecordsByTag.Sum(x => x.Sum(y => y.Occurrences)); var orderedTagSummaryRecords = new List <TagDataSummaryCsvRecord>(); foreach (List <TagDataSummaryCsvRecord> tagRecordList in summaryRecordsByTag.OrderByDescending(x => x.Sum(y => y.Occurrences))) { foreach (TagDataSummaryCsvRecord record in tagRecordList.OrderByDescending(x => x.Occurrences)) { record.RelativeFrequencyInReport = totalOccurrencesByValue[record.FailureValue] * 1.0 / totalFailureValues; orderedTagSummaryRecords.Add(record); } } using (Stream stream = GetStreamForTagDataSummary(jobInfo)) WriteCsv( stream, orderedTagSummaryRecords ); // Write the full csv for all other tags. var fullRecordsByTag = new List <List <TagDataFullCsvRecord> >(); foreach ((string tagName, Dictionary <string, List <string> > failures) in otherTagFailures) { fullRecordsByTag.Add(TagDataFullCsvRecord.BuildRecordList(tagName, failures).ToList()); } var orderedFullTagRecords = new List <TagDataFullCsvRecord>(); foreach (IEnumerable <TagDataFullCsvRecord> tagRecordSet in fullRecordsByTag.OrderBy(x => x[0].TagName)) { foreach (var x in tagRecordSet.OrderByDescending(x => x.FailureValue)) { orderedFullTagRecords.Add(x); } } using (Stream stream = GetStreamForTagDataFull(jobInfo)) WriteCsv( stream, orderedFullTagRecords ); }