public void InvalidReport_ThrowsApplicationException() { CompletedExtractJobInfo jobInfo = TestJobInfo(); var verificationFailures = new List <FileVerificationFailureInfo> { new FileVerificationFailureInfo(anonFilePath: "foo1.dcm", failureData: "totally not a report"), }; var mockJobStore = new Mock <IExtractJobStore>(MockBehavior.Strict); mockJobStore.Setup(x => x.GetCompletedJobInfo(It.IsAny <Guid>())).Returns(jobInfo); mockJobStore.Setup(x => x.GetCompletedJobRejections(It.IsAny <Guid>())).Returns(new List <ExtractionIdentifierRejectionInfo>()); mockJobStore.Setup(x => x.GetCompletedJobAnonymisationFailures(It.IsAny <Guid>())).Returns(new List <FileAnonFailureInfo>()); mockJobStore.Setup(x => x.GetCompletedJobVerificationFailures(It.IsAny <Guid>())).Returns(verificationFailures); TestJobReporter reporter; using (reporter = new TestJobReporter(mockJobStore.Object, ReportFormat.Combined, LinuxNewLine)) { Assert.Throws <ApplicationException>(() => reporter.CreateReport(Guid.Empty), "aa"); } Assert.True(reporter.Disposed); }
public void CreateReport_Empty(string newLine) { CompletedExtractJobInfo jobInfo = TestJobInfo(); var mockJobStore = new Mock <IExtractJobStore>(MockBehavior.Strict); mockJobStore.Setup(x => x.GetCompletedJobInfo(It.IsAny <Guid>())).Returns(jobInfo); mockJobStore.Setup(x => x.GetCompletedJobRejections(It.IsAny <Guid>())).Returns(new List <ExtractionIdentifierRejectionInfo>()); mockJobStore.Setup(x => x.GetCompletedJobAnonymisationFailures(It.IsAny <Guid>())).Returns(new List <FileAnonFailureInfo>()); mockJobStore.Setup(x => x.GetCompletedJobVerificationFailures(It.IsAny <Guid>())).Returns(new List <FileVerificationFailureInfo>()); TestJobReporter reporter; using (reporter = new TestJobReporter(mockJobStore.Object, ReportFormat.Combined, newLine)) { reporter.CreateReport(Guid.Empty); } Assert.True(reporter.Disposed); ReportEqualityHelpers.AssertReportsAreEqual( jobInfo, _dateTimeProvider, verificationFailuresExpected: null, blockedFilesExpected: null, anonFailuresExpected: null, isIdentifiableExtraction: false, isJoinedReport: false, newLine, reporter.Report ); }
public void Test_GetHashCode() { var guid = Guid.NewGuid(); var info1 = new CompletedExtractJobInfo( guid, _dateTimeProvider.UtcNow(), _dateTimeProvider.UtcNow() + TimeSpan.FromHours(1), "1234", "test/directory", "KeyTag", 123, "MR", isIdentifiableExtraction: true, isNoFilterExtraction: true ); var info2 = new CompletedExtractJobInfo( guid, _dateTimeProvider.UtcNow(), _dateTimeProvider.UtcNow() + TimeSpan.FromHours(1), "1234", "test/directory", "KeyTag", 123, "MR", isIdentifiableExtraction: true, isNoFilterExtraction: true ); Assert.AreEqual(info1.GetHashCode(), info2.GetHashCode()); }
public static void AssertReportsAreEqual( [NotNull] CompletedExtractJobInfo jobInfo, [NotNull] DateTimeProvider provider, [CanBeNull] Dictionary<string, Dictionary<string, List<string>>> verificationFailuresExpected, [CanBeNull] Dictionary<string, List<Tuple<int, string>>> blockedFilesExpected, [CanBeNull] List<Tuple<string, string>> anonFailuresExpected, bool isIdentifiableExtraction, bool isJoinedReport, [NotNull] string newLine, [NotNull] string actualReport ) { string header = GetHeaderAndContents(jobInfo, provider, newLine); if (isIdentifiableExtraction) { Assert.NotNull(anonFailuresExpected); IEnumerable<string> missingFiles = anonFailuresExpected.Select(x => x.Item1); CheckIdentReport(header, missingFiles, newLine, actualReport); return; } ( string expectedVerificationFailuresSummary, string expectedVerificationFailuresFull ) = ExpectedVerificationFailures(verificationFailuresExpected, newLine); var expected = new List<string> { header, $"", $"## Verification failures", $"", $"### Summary", $"", expectedVerificationFailuresSummary ?? "", $"### Full details", $"", expectedVerificationFailuresFull ?? "", $"## Blocked files", $"", BlockedFiles(blockedFilesExpected, newLine) ?? "", $"## Anonymisation failures", $"", AnonymisationFailures(anonFailuresExpected, newLine) ?? "", $"--- end of report ---", $"", }; string expectedStr = string.Join(newLine, expected); if (ShouldPrintReports) PrintReports(expectedStr, actualReport); Assert.AreEqual(expectedStr, actualReport); }
public void CreateReport(Guid jobId) { CompletedExtractJobInfo jobInfo = _jobStore.GetCompletedJobInfo(jobId); Logger.Info($"Creating report(s) for {jobId}"); if (ShouldWriteCombinedReport(jobInfo)) { WriteCombinedReport(jobInfo); } else { WriteSplitReport(jobInfo); } Logger.Info($"Report(s) for {jobId} created"); }
private static IEnumerable <string> JobHeader(CompletedExtractJobInfo jobInfo) { string identExtraction = jobInfo.IsIdentifiableExtraction ? "Yes" : "No"; string filteredExtraction = !jobInfo.IsNoFilterExtraction ? "Yes" : "No"; return(new List <string> { $"# SMI extraction validation report for {jobInfo.ProjectNumber}/{jobInfo.ExtractionName()}", "", "Job info:", $"- Job submitted at: {jobInfo.JobSubmittedAt.ToString("s", CultureInfo.InvariantCulture)}", $"- Job completed at: {jobInfo.JobCompletedAt.ToString("s", CultureInfo.InvariantCulture)}", $"- Job duration: {(jobInfo.JobCompletedAt - jobInfo.JobSubmittedAt)}", $"- Job extraction id: {jobInfo.ExtractionJobIdentifier}", $"- Extraction tag: {jobInfo.KeyTag}", $"- Extraction modality: {jobInfo.ExtractionModality ?? "Unspecified"}", $"- Requested identifier count: {jobInfo.KeyValueCount}", $"- Identifiable extraction: {identExtraction}", $"- Filtered extraction: {filteredExtraction}", }); }
public void CreateReport_IdentifiableExtraction() { CompletedExtractJobInfo jobInfo = TestJobInfo(isIdentifiableExtraction: true); var missingFiles = new List <string> { "missing.dcm", }; var mockJobStore = new Mock <IExtractJobStore>(MockBehavior.Strict); mockJobStore.Setup(x => x.GetCompletedJobInfo(It.IsAny <Guid>())).Returns(jobInfo); mockJobStore.Setup(x => x.GetCompletedJobMissingFileList(It.IsAny <Guid>())).Returns(missingFiles); TestJobReporter reporter; using (reporter = new TestJobReporter(mockJobStore.Object, ReportFormat.Combined, LinuxNewLine)) { reporter.CreateReport(Guid.Empty); } Assert.True(reporter.Disposed); var missingFilesExpected = new List <Tuple <string, string> > { new Tuple <string, string>("missing.dcm", null), }; ReportEqualityHelpers.AssertReportsAreEqual( jobInfo, _dateTimeProvider, verificationFailuresExpected: null, blockedFilesExpected: null, missingFilesExpected, isIdentifiableExtraction: true, isJoinedReport: false, LinuxNewLine, reporter.Report ); }
public void CreateReport_SplitReport(string newLine) { CompletedExtractJobInfo jobInfo = TestJobInfo(); const string report = @" [ { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'PixelData', 'ProblemValue': 'aaaaaaaaaaa' }, { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'PixelData', 'ProblemValue': 'a' }, { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'PixelData', 'ProblemValue': 'a' }, { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'PixelData', 'ProblemValue': 'another' }, { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'X', 'ProblemValue': 'foo' }, { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'X', 'ProblemValue': 'foo' }, { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'X', 'ProblemValue': 'bar' }, { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'Z', 'ProblemValue': 'bar' }, ]"; var verificationFailures = new List <FileVerificationFailureInfo> { new FileVerificationFailureInfo(anonFilePath: "foo1.dcm", report), new FileVerificationFailureInfo(anonFilePath: "foo2.dcm", report), }; var mockJobStore = new Mock <IExtractJobStore>(MockBehavior.Strict); mockJobStore.Setup(x => x.GetCompletedJobInfo(It.IsAny <Guid>())).Returns(jobInfo); mockJobStore.Setup(x => x.GetCompletedJobRejections(It.IsAny <Guid>())).Returns(new List <ExtractionIdentifierRejectionInfo>()); mockJobStore.Setup(x => x.GetCompletedJobAnonymisationFailures(It.IsAny <Guid>())).Returns(new List <FileAnonFailureInfo>()); mockJobStore.Setup(x => x.GetCompletedJobVerificationFailures(It.IsAny <Guid>())).Returns(verificationFailures); TestJobReporter reporter; using (reporter = new TestJobReporter(mockJobStore.Object, ReportFormat.Split, newLine)) { reporter.CreateReport(Guid.Empty); } Assert.True(reporter.Disposed); var expected = new List <string> { $"", "=== summary file ===", "# SMI extraction validation report for 1234/test", $"", $"Job info:", $"- Job submitted at: {_dateTimeProvider.UtcNow().ToString("s", CultureInfo.InvariantCulture)}", $"- Job completed at: {(_dateTimeProvider.UtcNow() + TimeSpan.FromHours(1)).ToString("s", CultureInfo.InvariantCulture)}", $"- Job duration: {TimeSpan.FromHours(1)}", $"- Job extraction id: {jobInfo.ExtractionJobIdentifier}", $"- Extraction tag: keyTag", $"- Extraction modality: Unspecified", $"- Requested identifier count: 123", $"- Identifiable extraction: No", $"- Filtered extraction: Yes", $"", $"Files included:", $"- README.md (this file)", $"- pixel_data_summary.csv", $"- pixel_data_full.csv", $"- pixel_data_word_length_frequencies.csv", $"- tag_data_summary.csv", $"- tag_data_full.csv", $"", $"This file contents:", $"- Blocked files", $"- Anonymisation failures", $"", $"## Blocked files", $"", $"", $"## Anonymisation failures", $"", $"", $"--- end of report ---", $"", $"=== pixel summary file ===", $"TagName,FailureValue,Occurrences,RelativeFrequencyInTag,RelativeFrequencyInReport", $"PixelData,aaaaaaaaaaa,2,0.25,0.25", $"PixelData,another,2,0.25,0.25", $"PixelData,a,4,0.5,0.5", $"", $"=== pixel full file ===", $"TagName,FailureValue,FilePath", $"PixelData,aaaaaaaaaaa,foo1.dcm", $"PixelData,aaaaaaaaaaa,foo2.dcm", $"PixelData,another,foo1.dcm", $"PixelData,another,foo2.dcm", $"PixelData,a,foo1.dcm", $"PixelData,a,foo1.dcm", $"PixelData,a,foo2.dcm", $"PixelData,a,foo2.dcm", $"", $"=== pixel word length frequencies file ===", $"WordLength,Count,RelativeFrequencyInReport", $"1,4,0.5", $"2,0,0", $"3,0,0", $"4,0,0", $"5,0,0", $"6,0,0", $"7,2,0.25", $"8,0,0", $"9,0,0", $"10,0,0", $"11,2,0.25", $"", $"=== tag summary file ===", $"TagName,FailureValue,Occurrences,RelativeFrequencyInTag,RelativeFrequencyInReport", $"X,foo,4,0.6666666666666666,0.5", $"X,bar,2,0.3333333333333333,0.5", $"Z,bar,2,1,0.5", $"", $"=== tag full file ===", $"TagName,FailureValue,FilePath", $"X,foo,foo1.dcm", $"X,foo,foo1.dcm", $"X,foo,foo2.dcm", $"X,foo,foo2.dcm", $"X,bar,foo1.dcm", $"X,bar,foo2.dcm", $"Z,bar,foo1.dcm", $"Z,bar,foo2.dcm", $"", }; Assert.AreEqual(string.Join(newLine, expected), reporter.Report); }
public void CreateReport_WithPixelData() { CompletedExtractJobInfo jobInfo = TestJobInfo(); // NOTE(rkm 2020-08-25) Tests that the "Z" tag is ordered before PixelData, and that PixelData items are ordered by decreasing length not by occurrence const string report = @" [ { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'PixelData', 'ProblemValue': 'aaaaaaaaaaa' }, { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'PixelData', 'ProblemValue': 'a' }, { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'PixelData', 'ProblemValue': 'a' }, { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'Z', 'ProblemValue': 'bar' }, ]"; var verificationFailures = new List <FileVerificationFailureInfo> { new FileVerificationFailureInfo(anonFilePath: "foo1.dcm", report), }; var mockJobStore = new Mock <IExtractJobStore>(MockBehavior.Strict); mockJobStore.Setup(x => x.GetCompletedJobInfo(It.IsAny <Guid>())).Returns(jobInfo); mockJobStore.Setup(x => x.GetCompletedJobRejections(It.IsAny <Guid>())).Returns(new List <ExtractionIdentifierRejectionInfo>()); mockJobStore.Setup(x => x.GetCompletedJobAnonymisationFailures(It.IsAny <Guid>())).Returns(new List <FileAnonFailureInfo>()); mockJobStore.Setup(x => x.GetCompletedJobVerificationFailures(It.IsAny <Guid>())).Returns(verificationFailures); TestJobReporter reporter; using (reporter = new TestJobReporter(mockJobStore.Object, ReportFormat.Combined, LinuxNewLine)) { reporter.CreateReport(Guid.Empty); } Assert.True(reporter.Disposed); var verificationFailuresExpected = new Dictionary <string, Dictionary <string, List <string> > > { { "Z", new Dictionary <string, List <string> > { { "bar", new List <string> { "foo1.dcm" } } } }, { "PixelData", new Dictionary <string, List <string> > { { "aaaaaaaaaaa", new List <string> { "foo1.dcm" } }, { "a", new List <string> { "foo1.dcm", "foo1.dcm" } }, } }, }; ReportEqualityHelpers.AssertReportsAreEqual( jobInfo, _dateTimeProvider, verificationFailuresExpected, blockedFilesExpected: null, anonFailuresExpected: null, isIdentifiableExtraction: false, isJoinedReport: false, LinuxNewLine, reporter.Report ); }
public void CreateReport_AggregateData(string newLine) { CompletedExtractJobInfo jobInfo = TestJobInfo(); var verificationFailures = new List <FileVerificationFailureInfo> { new FileVerificationFailureInfo(anonFilePath: "ccc/ddd/foo1.dcm", failureData: @" [ { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'SomeOtherTag', 'ProblemValue': 'BAZ' } ]" ), new FileVerificationFailureInfo(anonFilePath: "ccc/ddd/foo2.dcm", failureData: @" [ { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'SomeOtherTag', 'ProblemValue': 'BAZ' } ]" ), new FileVerificationFailureInfo(anonFilePath: "aaa/bbb/foo1.dcm", failureData: @" [ { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'ScanOptions', 'ProblemValue': 'FOO' } ]" ), new FileVerificationFailureInfo(anonFilePath: "aaa/bbb/foo2.dcm", failureData: @" [ { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'ScanOptions', 'ProblemValue': 'FOO' } ]" ), new FileVerificationFailureInfo(anonFilePath: "aaa/bbb/foo2.dcm", failureData: @" [ { 'Parts': [], 'Resource': 'unused', 'ResourcePrimaryKey': 'unused', 'ProblemField': 'ScanOptions', 'ProblemValue': 'BAR' } ]" ), }; var mockJobStore = new Mock <IExtractJobStore>(MockBehavior.Strict); mockJobStore.Setup(x => x.GetCompletedJobInfo(It.IsAny <Guid>())).Returns(jobInfo); mockJobStore.Setup(x => x.GetCompletedJobRejections(It.IsAny <Guid>())).Returns(new List <ExtractionIdentifierRejectionInfo>()); mockJobStore.Setup(x => x.GetCompletedJobAnonymisationFailures(It.IsAny <Guid>())).Returns(new List <FileAnonFailureInfo>()); mockJobStore.Setup(x => x.GetCompletedJobVerificationFailures(It.IsAny <Guid>())) .Returns(verificationFailures); TestJobReporter reporter; using (reporter = new TestJobReporter(mockJobStore.Object, ReportFormat.Combined, newLine)) { reporter.CreateReport(Guid.Empty); } Assert.True(reporter.Disposed); var verificationFailuresExpected = new Dictionary <string, Dictionary <string, List <string> > > { { "ScanOptions", new Dictionary <string, List <string> > { { "FOO", new List <string> { "aaa/bbb/foo1.dcm", "aaa/bbb/foo2.dcm", } }, { "BAR", new List <string> { "aaa/bbb/foo2.dcm", } }, } }, { "SomeOtherTag", new Dictionary <string, List <string> > { { "BAZ", new List <string> { "ccc/ddd/foo1.dcm", "ccc/ddd/foo2.dcm", } }, } }, }; ReportEqualityHelpers.AssertReportsAreEqual( jobInfo, _dateTimeProvider, verificationFailuresExpected, blockedFilesExpected: null, anonFailuresExpected: null, isIdentifiableExtraction: false, isJoinedReport: false, newLine, reporter.Report ); }
public void CreateReport_BasicData(string newLine) { CompletedExtractJobInfo jobInfo = TestJobInfo(); var rejections = new List <ExtractionIdentifierRejectionInfo> { new ExtractionIdentifierRejectionInfo( keyValue: "1.2.3.4", new Dictionary <string, int> { { "image is in the deny list for extraction", 123 }, { "foo bar", 456 }, }), }; var anonFailures = new List <FileAnonFailureInfo> { new FileAnonFailureInfo(expectedAnonFile: "foo1.dcm", reason: "image was corrupt"), }; const string report = @" [ { 'Parts': [], 'Resource': '/foo1.dcm', 'ResourcePrimaryKey': '1.2.3.4', 'ProblemField': 'ScanOptions', 'ProblemValue': 'FOO' } ]"; var verificationFailures = new List <FileVerificationFailureInfo> { new FileVerificationFailureInfo(anonFilePath: "foo1.dcm", report), }; var mockJobStore = new Mock <IExtractJobStore>(MockBehavior.Strict); mockJobStore.Setup(x => x.GetCompletedJobInfo(It.IsAny <Guid>())).Returns(jobInfo); mockJobStore.Setup(x => x.GetCompletedJobRejections(It.IsAny <Guid>())).Returns(rejections); mockJobStore.Setup(x => x.GetCompletedJobAnonymisationFailures(It.IsAny <Guid>())).Returns(anonFailures); mockJobStore.Setup(x => x.GetCompletedJobVerificationFailures(It.IsAny <Guid>())).Returns(verificationFailures); TestJobReporter reporter; using (reporter = new TestJobReporter(mockJobStore.Object, ReportFormat.Combined, newLine)) { reporter.CreateReport(Guid.Empty); } var verificationFailuresExpected = new Dictionary <string, Dictionary <string, List <string> > > { { "ScanOptions", new Dictionary <string, List <string> > { { "FOO", new List <string> { "foo1.dcm" } } } }, }; var blockedFilesExpected = new Dictionary <string, List <Tuple <int, string> > > { { "1.2.3.4", new List <Tuple <int, string> > { new Tuple <int, string>(123, "image is in the deny list for extraction"), new Tuple <int, string>(456, "foo bar"), } }, }; var anonFailuresExpected = new List <Tuple <string, string> > { new Tuple <string, string>("foo1.dcm", "image was corrupt"), }; Assert.True(reporter.Disposed); ReportEqualityHelpers.AssertReportsAreEqual( jobInfo, _dateTimeProvider, verificationFailuresExpected, blockedFilesExpected, anonFailuresExpected, isIdentifiableExtraction: false, isJoinedReport: false, newLine, reporter.Report ); }
private void WriteCombinedReport(CompletedExtractJobInfo jobInfo) { using Stream stream = GetStreamForSummary(jobInfo); using StreamWriter streamWriter = GetStreamWriter(stream); foreach (string line in JobHeader(jobInfo)) { streamWriter.WriteLine(line); } streamWriter.WriteLine(); streamWriter.WriteLine("Report contents:"); // For identifiable extractions, write the metadata and list of missing files then return. The other parts don't make sense in this case if (jobInfo.IsIdentifiableExtraction) { streamWriter.WriteLine(); streamWriter.WriteLine("- Missing file list (files which were selected from an input ID but could not be found)"); streamWriter.WriteLine(); streamWriter.WriteLine("## Missing file list"); streamWriter.WriteLine(); WriteJobMissingFileList(streamWriter, _jobStore.GetCompletedJobMissingFileList(jobInfo.ExtractionJobIdentifier)); streamWriter.WriteLine(); streamWriter.WriteLine("--- end of report ---"); streamWriter.Flush(); FinishReportPart(stream); return; } streamWriter.WriteLine(); streamWriter.WriteLine("- Verification failures"); streamWriter.WriteLine(" - Summary"); streamWriter.WriteLine(" - Full Details"); streamWriter.WriteLine("- Blocked files"); streamWriter.WriteLine("- Anonymisation failures"); streamWriter.WriteLine(); streamWriter.WriteLine("## Verification failures"); streamWriter.WriteLine(); WriteJobVerificationFailures(streamWriter, jobInfo.ExtractionJobIdentifier); streamWriter.WriteLine(); streamWriter.WriteLine("## Blocked files"); streamWriter.WriteLine(); foreach (ExtractionIdentifierRejectionInfo extractionIdentifierRejectionInfo in _jobStore.GetCompletedJobRejections(jobInfo.ExtractionJobIdentifier) ) { WriteJobRejections(streamWriter, extractionIdentifierRejectionInfo); } streamWriter.WriteLine(); streamWriter.WriteLine("## Anonymisation failures"); streamWriter.WriteLine(); foreach (FileAnonFailureInfo fileAnonFailureInfo in _jobStore.GetCompletedJobAnonymisationFailures(jobInfo.ExtractionJobIdentifier) ) { WriteAnonFailure(streamWriter, fileAnonFailureInfo); } streamWriter.WriteLine(); streamWriter.WriteLine("--- end of report ---"); streamWriter.Flush(); FinishReportPart(stream); }
/// <summary> /// Writes each part of the report content separately by calling the relevant GetStreamForX methods in turn /// </summary> /// <param name="jobInfo"></param> private void WriteSplitReport(CompletedExtractJobInfo jobInfo) { // TODO(rkm 2020-10-29) We can probably reduce the number of full collection enumerations in this method using (Stream stream = GetStreamForSummary(jobInfo)) { using StreamWriter streamWriter = GetStreamWriter(stream); foreach (string line in JobHeader(jobInfo)) { streamWriter.WriteLine(line); } streamWriter.WriteLine(); streamWriter.WriteLine("Files included:"); streamWriter.WriteLine("- README.md (this file)"); streamWriter.WriteLine("- pixel_data_summary.csv"); streamWriter.WriteLine("- pixel_data_full.csv"); streamWriter.WriteLine("- pixel_data_word_length_frequencies.csv"); streamWriter.WriteLine("- tag_data_summary.csv"); streamWriter.WriteLine("- tag_data_full.csv"); streamWriter.WriteLine(); streamWriter.WriteLine("This file contents:"); streamWriter.WriteLine("- Blocked files"); streamWriter.WriteLine("- Anonymisation failures"); streamWriter.WriteLine(); streamWriter.WriteLine("## Blocked files"); streamWriter.WriteLine(); IOrderedEnumerable <ExtractionIdentifierRejectionInfo> orderedRejections = _jobStore .GetCompletedJobRejections(jobInfo.ExtractionJobIdentifier) .OrderByDescending(x => x.RejectionItems.Sum(y => y.Value)); foreach (ExtractionIdentifierRejectionInfo extractionIdentifierRejectionInfo in orderedRejections) { WriteJobRejections(streamWriter, extractionIdentifierRejectionInfo); } streamWriter.WriteLine(); streamWriter.WriteLine("## Anonymisation failures"); streamWriter.WriteLine(); foreach (FileAnonFailureInfo fileAnonFailureInfo in _jobStore.GetCompletedJobAnonymisationFailures( jobInfo.ExtractionJobIdentifier)) { WriteAnonFailure(streamWriter, fileAnonFailureInfo); } streamWriter.WriteLine(); streamWriter.WriteLine("--- end of report ---"); streamWriter.Flush(); FinishReportPart(stream); } // Local helper function to write each CSV void WriteCsv <T>(Stream stream, IEnumerable <T> records) where T : IExtractionReportCsvRecord { using StreamWriter streamWriter = GetStreamWriter(stream); using var csvWriter = new CsvWriter(streamWriter, _csvConfiguration); csvWriter.WriteHeader <T>(); csvWriter.NextRecord(); csvWriter.WriteRecords(records); streamWriter.Flush(); FinishReportPart(stream); } // All validation failures for this job Dictionary <string, Dictionary <string, List <string> > > groupedFailures = GetJobVerificationFailures(jobInfo.ExtractionJobIdentifier); // First deal with the pixel data Dictionary <string, List <string> > pixelFailures = groupedFailures.GetValueOrDefault(PixelDataStr); if (pixelFailures == null) { Logger.Info($"No {PixelDataStr} failures found for the extraction job"); pixelFailures = new Dictionary <string, List <string> >(); } // Create records for the pixel reports List <TagDataSummaryCsvRecord> pixelSummaryRecords = TagDataSummaryCsvRecord.BuildRecordList(PixelDataStr, pixelFailures).ToList(); var wordLengthCounts = new Dictionary <uint, uint>(); foreach (TagDataSummaryCsvRecord tagDataSummaryCsvRecord in pixelSummaryRecords) { var wordLen = (uint)tagDataSummaryCsvRecord.FailureValue.Length; if (!wordLengthCounts.ContainsKey(wordLen)) { wordLengthCounts.Add(wordLen, 0); } wordLengthCounts[wordLen] += (uint)tagDataSummaryCsvRecord.Occurrences; tagDataSummaryCsvRecord.RelativeFrequencyInReport = tagDataSummaryCsvRecord.RelativeFrequencyInTag; } // Write summary pixel CSV using (Stream stream = GetStreamForPixelDataSummary(jobInfo)) WriteCsv( stream, pixelSummaryRecords .OrderByDescending(x => x.FailureValue.Length) .ThenByDescending(x => x.Occurrences) ); // Write full pixel CSV using (Stream stream = GetStreamForPixelDataFull(jobInfo)) WriteCsv( stream, TagDataFullCsvRecord .BuildRecordList(PixelDataStr, pixelFailures) .OrderByDescending(x => x.FailureValue.Length) ); // Write the pixel text frequency file using (Stream stream = GetStreamForPixelDataWordLengthFrequencies(jobInfo)) WriteCsv( stream, TagDataFrequencyRecord.BuildRecordList(wordLengthCounts) ); // Now select all other tags Dictionary <string, Dictionary <string, List <string> > > otherTagFailures = groupedFailures .Where(x => x.Key != PixelDataStr) .ToDictionary(x => x.Key, x => x.Value); // Write the summary CSV for all other tags. Before doing so, we need to convert into records and calculate the relative frequencies var summaryRecordsByTag = new List <List <TagDataSummaryCsvRecord> >(); var totalOccurrencesByValue = new Dictionary <string, uint>(); foreach ((string tagName, Dictionary <string, List <string> > failures) in otherTagFailures) { List <TagDataSummaryCsvRecord> record = TagDataSummaryCsvRecord.BuildRecordList(tagName, failures).ToList(); summaryRecordsByTag.Add(record); foreach (TagDataSummaryCsvRecord r in record) { if (!totalOccurrencesByValue.ContainsKey(r.FailureValue)) { totalOccurrencesByValue[r.FailureValue] = 0; } totalOccurrencesByValue[r.FailureValue] += r.Occurrences; } } var totalFailureValues = (uint)summaryRecordsByTag.Sum(x => x.Sum(y => y.Occurrences)); var orderedTagSummaryRecords = new List <TagDataSummaryCsvRecord>(); foreach (List <TagDataSummaryCsvRecord> tagRecordList in summaryRecordsByTag.OrderByDescending(x => x.Sum(y => y.Occurrences))) { foreach (TagDataSummaryCsvRecord record in tagRecordList.OrderByDescending(x => x.Occurrences)) { record.RelativeFrequencyInReport = totalOccurrencesByValue[record.FailureValue] * 1.0 / totalFailureValues; orderedTagSummaryRecords.Add(record); } } using (Stream stream = GetStreamForTagDataSummary(jobInfo)) WriteCsv( stream, orderedTagSummaryRecords ); // Write the full csv for all other tags. var fullRecordsByTag = new List <List <TagDataFullCsvRecord> >(); foreach ((string tagName, Dictionary <string, List <string> > failures) in otherTagFailures) { fullRecordsByTag.Add(TagDataFullCsvRecord.BuildRecordList(tagName, failures).ToList()); } var orderedFullTagRecords = new List <TagDataFullCsvRecord>(); foreach (IEnumerable <TagDataFullCsvRecord> tagRecordSet in fullRecordsByTag.OrderBy(x => x[0].TagName)) { foreach (var x in tagRecordSet.OrderByDescending(x => x.FailureValue)) { orderedFullTagRecords.Add(x); } } using (Stream stream = GetStreamForTagDataFull(jobInfo)) WriteCsv( stream, orderedFullTagRecords ); }