public PdfRecognitionJob( IConfiguredTesseractEngineFactory engineFactory, IStoredImageFile imageFile, IEnumerable <IPreprocessor> preprocessors, IPdfDocumentFactory pdfDocumentFactory, IPixFactory pixFactory, IImageFileStorage storage) { _engineFactory = engineFactory ?? throw new ArgumentNullException(nameof(engineFactory)); _pdfFile = imageFile ?? throw new ArgumentNullException(nameof(imageFile)); _preprocessors = preprocessors ?? throw new ArgumentNullException(nameof(preprocessors)); _pdfDocumentFactory = pdfDocumentFactory ?? throw new ArgumentNullException(nameof(pdfDocumentFactory)); _pixFactory = pixFactory ?? throw new ArgumentNullException(nameof(pixFactory)); _storage = storage ?? throw new ArgumentNullException(nameof(storage)); }
internal ImageRecognitionEngine( IImageFileTypeRecognizer recognizer, IImageFileStorage storage, IRecognitionJobFactory recognitionJobFactory, IRecognitionJobQueue recognitionJobQueue, ITesseractEngineFactory tesseractFactory, IPreprocessorFactory preprocessorFactory) { _recognizer = recognizer ?? throw new ArgumentNullException(nameof(recognizer)); _storage = storage ?? throw new ArgumentNullException(nameof(storage)); _jobFactory = recognitionJobFactory ?? throw new ArgumentNullException(nameof(recognitionJobFactory)); _jobQueue = recognitionJobQueue ?? throw new ArgumentNullException(nameof(recognitionJobQueue)); _tesseractFactory = tesseractFactory ?? throw new ArgumentNullException(nameof(tesseractFactory)); _preprocessorFactory = preprocessorFactory ?? throw new ArgumentNullException(nameof(preprocessorFactory)); }
public static async Task <IEnumerable <IStoredImageFile> > ExtractImagesAsync(this PdfDocument document, IImageFileStorage storage) { if (storage == null) { throw new ArgumentNullException(nameof(storage)); } var results = new List <IStoredImageFile>(); foreach (var page in document.Pages) { var resources = page.Elements.GetDictionary("/Resources"); var xObjects = resources?.Elements.GetDictionary("/XObject"); if (xObjects == null) { continue; } var items = xObjects.Elements.Values; foreach (var item in items) { var reference = item as PdfReference; if (!(reference?.Value is PdfDictionary xObject) || xObject.Elements.GetString("/Subtype") != "/Image") { continue; } var path = await TryExportImageAsync(xObject); if (path == null) { continue; } var file = storage.Wrap(path); results.Add(file); } } return(results); }
public RecognitionJobFactory(IPixFactory pixFactory, IPdfDocumentFactory pdfDocumentFactory, IImageFileStorage storage) { _pixFactory = pixFactory ?? throw new ArgumentNullException(nameof(pixFactory)); _pdfDocumentFactory = pdfDocumentFactory ?? throw new ArgumentNullException(nameof(pdfDocumentFactory)); _storage = storage ?? throw new ArgumentNullException(nameof(storage)); }