/// <summary>Checks whether a specified page of a PDF file contains images.</summary> /// <returns>True if the page contains at least one image; false otherwise.</returns> public static bool PageContainsImages(string filename, int pageNumber) { using (var reader = new PdfReader(filename)) { var parser = new PdfReaderContentParser(reader); var listener = new ImageRenderListener(); parser.ProcessContent(pageNumber, listener); return(listener.Images.Count > 0); } }
/// <summary>Extrai todas as imagens de um arquivo PDF.</summary> public string ExtractImage(string filePath, string language) { var tesseract = new OCR() { Language = language }; var imagePath = String.Empty; var imageText = String.Empty; using (var reader = new PdfReader(filePath)) { var parser = new PdfReaderContentParser(reader); var listener = new ImageRenderListener(); for (var index = 1; index <= reader.NumberOfPages; index++) { parser.ProcessContent(index, listener); if (listener.Images.Count > 0) { for (int count = 0; count < listener.Images.Count; count++) { imagePath = System.IO.Path.GetFullPath(AppDomain.CurrentDomain.BaseDirectory) + "temp.png"; tesseract.ImagePath = imagePath; imageText += tesseract.FromImage(); } } } if (!String.IsNullOrWhiteSpace(imagePath)) { File.Delete(imagePath); } return(imageText); } }