Пример #1
0
 public PdfRecognitionJob(
     IConfiguredTesseractEngineFactory engineFactory,
     IStoredImageFile imageFile,
     IEnumerable <IPreprocessor> preprocessors,
     IPdfDocumentFactory pdfDocumentFactory,
     IPixFactory pixFactory,
     IImageFileStorage storage)
 {
     _engineFactory      = engineFactory ?? throw new ArgumentNullException(nameof(engineFactory));
     _pdfFile            = imageFile ?? throw new ArgumentNullException(nameof(imageFile));
     _preprocessors      = preprocessors ?? throw new ArgumentNullException(nameof(preprocessors));
     _pdfDocumentFactory = pdfDocumentFactory ?? throw new ArgumentNullException(nameof(pdfDocumentFactory));
     _pixFactory         = pixFactory ?? throw new ArgumentNullException(nameof(pixFactory));
     _storage            = storage ?? throw new ArgumentNullException(nameof(storage));
 }
Пример #2
0
 internal ImageRecognitionEngine(
     IImageFileTypeRecognizer recognizer,
     IImageFileStorage storage,
     IRecognitionJobFactory recognitionJobFactory,
     IRecognitionJobQueue recognitionJobQueue,
     ITesseractEngineFactory tesseractFactory,
     IPreprocessorFactory preprocessorFactory)
 {
     _recognizer          = recognizer ?? throw new ArgumentNullException(nameof(recognizer));
     _storage             = storage ?? throw new ArgumentNullException(nameof(storage));
     _jobFactory          = recognitionJobFactory ?? throw new ArgumentNullException(nameof(recognitionJobFactory));
     _jobQueue            = recognitionJobQueue ?? throw new ArgumentNullException(nameof(recognitionJobQueue));
     _tesseractFactory    = tesseractFactory ?? throw new ArgumentNullException(nameof(tesseractFactory));
     _preprocessorFactory = preprocessorFactory ?? throw new ArgumentNullException(nameof(preprocessorFactory));
 }
        public static async Task <IEnumerable <IStoredImageFile> > ExtractImagesAsync(this PdfDocument document, IImageFileStorage storage)
        {
            if (storage == null)
            {
                throw new ArgumentNullException(nameof(storage));
            }

            var results = new List <IStoredImageFile>();

            foreach (var page in document.Pages)
            {
                var resources = page.Elements.GetDictionary("/Resources");
                var xObjects  = resources?.Elements.GetDictionary("/XObject");
                if (xObjects == null)
                {
                    continue;
                }

                var items = xObjects.Elements.Values;
                foreach (var item in items)
                {
                    var reference = item as PdfReference;
                    if (!(reference?.Value is PdfDictionary xObject) ||
                        xObject.Elements.GetString("/Subtype") != "/Image")
                    {
                        continue;
                    }

                    var path = await TryExportImageAsync(xObject);

                    if (path == null)
                    {
                        continue;
                    }

                    var file = storage.Wrap(path);
                    results.Add(file);
                }
            }
            return(results);
        }
 public RecognitionJobFactory(IPixFactory pixFactory, IPdfDocumentFactory pdfDocumentFactory, IImageFileStorage storage)
 {
     _pixFactory         = pixFactory ?? throw new ArgumentNullException(nameof(pixFactory));
     _pdfDocumentFactory = pdfDocumentFactory ?? throw new ArgumentNullException(nameof(pdfDocumentFactory));
     _storage            = storage ?? throw new ArgumentNullException(nameof(storage));
 }