public DocumentProcessingService(IDocumentContentExtractor documentContentExtractor,
                                  IFormRecognizerService formRecognizerService,
                                  IDataService <InvoiceData> dataService,
                                  ILogger <DocumentProcessingService> log)
 {
     _documentContentExtractor = documentContentExtractor;
     _formRecognizerService    = formRecognizerService;
     _dataService = dataService;
     _log         = log;
 }
        private async Task <Response> ParseFileImages(string fileName, string folderName)
        {
            string logMsg = "ControllerName: GroupDocsParserController FileName: " + fileName + " FolderName: " + folderName;

            try
            {
                return(await ProcessTask(fileName, folderName, ".jpg", true, "", delegate(string inFilePath, string outPath, string zipOutFolder)
                {
                    if (!Directory.Exists(zipOutFolder))
                    {
                        Directory.CreateDirectory(zipOutFolder);
                    }

                    string fileExt = Path.GetExtension(fileName).Substring(1).ToLower();
                    IDocumentContentExtractor extractor = GetContentExtractor(inFilePath, fileExt);
                    if (extractor == null)
                    {
                        throw new Exception("Unsupported file type for image extraction");
                    }

                    ImageAreaSearchOptions searchOptions = new ImageAreaSearchOptions();
                    searchOptions.Rectangle = new Rectangle(0, 0, 1920, 1080);
                    int pageCount = extractor.DocumentContent.PageCount;

                    for (int pageIndex = 0; pageIndex < pageCount; pageIndex++)
                    {
                        IList <ImageArea> imageAreas = extractor.DocumentContent.GetImageAreas(pageIndex, searchOptions);

                        if (pageIndex == 0 && imageAreas.Count == 0)
                        {
                            throw new Exception("No images found for extraction");
                        }

                        for (int i = 0; i < imageAreas.Count; i++)
                        {
                            using (Stream fs = System.IO.File.Create(String.Format(zipOutFolder + "/{0}-{1}.jpg", pageIndex + 1, i + 1)))
                            {
                                CopyStream(imageAreas[i].GetRawStream(), fs);
                            }
                        }
                    }
                }));
            }
            catch (Exception exc)
            {
                return(new Response {
                    FileName = fileName, FolderName = folderName, OutputType = "zip", Status = exc.Message, StatusCode = 500, Text = exc.ToString()
                });
            }
        }
        private IDocumentContentExtractor GetContentExtractor(string inFilePath, string fileExt)
        {
            IDocumentContentExtractor extractor = null;

            if (Array.Exists(WordTypesForImage, E => E == fileExt))
            {
                extractor = new WordsTextExtractor(inFilePath);
            }
            else if (Array.Exists(ExcelTypesForImage, E => E == fileExt))
            {
                extractor = new CellsTextExtractor(inFilePath);
            }
            else if (Array.Exists(SlidesTypesForImage, E => E == fileExt))
            {
                extractor = new SlidesTextExtractor(inFilePath);
            }
            else if (Array.Exists(PdfTypesForImage, E => E == fileExt))
            {
                extractor = new PdfTextExtractor(inFilePath);
            }

            return(extractor);
        }