public void IgnorePixelDataLessThan(bool ignoreShortText) { var opts = new IsIdentifiableDicomFileOptions { ColumnReport = true, TessDirectory = _tessDir.FullName, }; // NOTE(rkm 2020-11-16) The test image should report 3 bits of text with lengths 123, 127, and 170. // If we ignore less than 170 then only 1 bit of text should be reported. opts.IgnoreTextLessThan = ignoreShortText ? 170 : 0U; string fileName = Path.Combine(TestContext.CurrentContext.TestDirectory, nameof(DicomFileRunnerTest), "f1.dcm"); TestData.Create(new FileInfo(fileName), TestData.BURNED_IN_TEXT_IMG); var runner = new DicomFileRunner(opts); var fileSystem = new FileSystem(); IFileInfo fileInfo = fileSystem.FileInfo.FromFileName(fileName); Assert.True(fileInfo.Exists); var toMemory = new ToMemoryFailureReport(); runner.Reports.Add(toMemory); runner.ValidateDicomFile(fileInfo); List <Failure> failures = toMemory.Failures.ToList(); Assert.AreEqual(ignoreShortText ? 1 : 3, failures.Count); }
private static int OnParse(GlobalOptions globals, object parsedOpts) { var opts = SmiCliInit.Verify <IsIdentifiableAbstractOptions>(parsedOpts); return(opts switch { IsIdentifiableRelationalDatabaseOptions o => Run(o), IsIdentifiableDicomFileOptions o => Run(o), IsIdentifiableMongoOptions o => Run(o), IsIdentifiableServiceOptions o => Run(o), IsIdentifiableFileOptions o => Run(o), _ => throw new NotImplementedException($"No case for '{opts.GetType()}'") });
public DicomFileRunner(IsIdentifiableDicomFileOptions opts) : base(opts) { _opts = opts; _ignoreTextLessThan = opts.IgnoreTextLessThan; //if using Efferent.Native DICOM codecs // (see https://github.com/Efferent-Health/Dicom-native) //Dicom.Imaging.Codec.TranscoderManager.SetImplementation(new Efferent.Native.Codec.NativeTranscoderManager()); //OR if using fo-dicom.Native DICOM codecs // (see https://github.com/fo-dicom/fo-dicom/issues/631) ImageManager.SetImplementation(new WinFormsImageManager()); //if there is a value we are treating as a zero date if (!string.IsNullOrWhiteSpace(_opts.ZeroDate)) { _zeroDate = DateTime.Parse(_opts.ZeroDate); } //if the user wants to run text detection if (!string.IsNullOrWhiteSpace(_opts.TessDirectory)) { var dir = new DirectoryInfo(_opts.TessDirectory); if (!dir.Exists) { throw new DirectoryNotFoundException("Could not find TESS directory '" + _opts.TessDirectory + "'"); } //to work with Tesseract eng.traineddata has to be in a folder called tessdata if (!dir.Name.Equals("tessdata")) { dir = dir.CreateSubdirectory("tessdata"); } var languageFile = new FileInfo(Path.Combine(dir.FullName, "eng.traineddata")); if (!languageFile.Exists) { throw new FileNotFoundException($"Could not find tesseract models file ('{languageFile.FullName}')", languageFile.FullName); } _tesseractEngine = new TesseractEngine(dir.FullName, "eng", EngineMode.Default); _tesseractEngine.DefaultPageSegMode = PageSegMode.Auto; _tesseractReport = new PixelTextFailureReport(_opts.GetTargetName()); Reports.Add(_tesseractReport); } }
//public TesseractStanfordDicomFileClassifier(DirectoryInfo dataDirectory) : base(dataDirectory) public TesseractStanfordDicomFileClassifier(DirectoryInfo dataDirectory, IsIdentifiableServiceOptions isIdentifiableServiceOptions) : base(dataDirectory) { var fileOptions = new IsIdentifiableDicomFileOptions(); //need to pass this so that the runner doesn't get unhappy about there being no reports (even though we clear it below) fileOptions.ColumnReport = true; fileOptions.TessDirectory = dataDirectory.FullName; fileOptions.IgnoreTextLessThan = isIdentifiableServiceOptions.IgnoreTextLessThan; // The Rules directory is always called "IsIdentifiableRules" DirectoryInfo[] subDirs = dataDirectory.GetDirectories("IsIdentifiableRules"); foreach (DirectoryInfo subDir in subDirs) { fileOptions.RulesDirectory = subDir.FullName; } _runner = new DicomFileRunner(fileOptions); }