Example #1
0
        /// <summary>Checks whether a specified page of a PDF file contains images.</summary>
        /// <returns>True if the page contains at least one image; false otherwise.</returns>
        public static bool PageContainsImages(string filename, int pageNumber)
        {
            using (var reader = new PdfReader(filename))
            {
                var parser   = new PdfReaderContentParser(reader);
                var listener = new ImageRenderListener();

                parser.ProcessContent(pageNumber, listener);

                return(listener.Images.Count > 0);
            }
        }
Example #2
0
        /// <summary>Extrai todas as imagens de um arquivo PDF.</summary>
        public string ExtractImage(string filePath, string language)
        {
            var tesseract = new OCR()
            {
                Language = language
            };
            var imagePath = String.Empty;
            var imageText = String.Empty;

            using (var reader = new PdfReader(filePath))
            {
                var parser   = new PdfReaderContentParser(reader);
                var listener = new ImageRenderListener();

                for (var index = 1; index <= reader.NumberOfPages; index++)
                {
                    parser.ProcessContent(index, listener);

                    if (listener.Images.Count > 0)
                    {
                        for (int count = 0; count < listener.Images.Count; count++)
                        {
                            imagePath           = System.IO.Path.GetFullPath(AppDomain.CurrentDomain.BaseDirectory) + "temp.png";
                            tesseract.ImagePath = imagePath;
                            imageText          += tesseract.FromImage();
                        }
                    }
                }

                if (!String.IsNullOrWhiteSpace(imagePath))
                {
                    File.Delete(imagePath);
                }

                return(imageText);
            }
        }