/// <summary>Checks whether a specified page of a PDF file contains images.</summary> /// <returns>True if the page contains at least one image; false otherwise.</returns> public static bool PageContainsImages(string filename, int pageNumber) { CurrentFilename = filename; using (var reader = new PdfReader(filename)) { var parser = new PdfReaderContentParser(reader); ImageRenderListener listener = null; parser.ProcessContent(pageNumber, (listener = new ImageRenderListener())); return(listener.Images.Count > 0); } }
/// <summary>Extracts all images (of types that iTextSharp knows how to decode) from a PDF file.</summary> public static Dictionary <string, System.Drawing.Image> ExtractImage(string filename) { var images = new Dictionary <string, System.Drawing.Image>(); using (var reader = new PdfReader(filename)) { List <string> textList = new List <string>(); StringBuilder sb = new StringBuilder(); var parser = new PdfReaderContentParser(reader); ImageRenderListener listener = null; ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); //läser in texten på en pdf for (var i = 1; i <= reader.NumberOfPages; i++) { parser.ProcessContent(i, (listener = new ImageRenderListener())); var text = PdfTextExtractor.GetTextFromPage(reader, i, strategy); if (!string.IsNullOrWhiteSpace(text)) { string[] lines = text.Split('\n', ' '); foreach (string line in lines) { if (line != "") { textList.Add(line); } } } Program.addCombineLists(textList); var index = 1; if (listener.Images.Count > 0) { Console.WriteLine("Found {0} images on page {1}.", listener.Images.Count, i); foreach (var pair in listener.Images) { images.Add(string.Format("{0}_Page_{1}_Image_{2}.{3}", System.IO.Path.GetFileNameWithoutExtension(filename), i.ToString("D4"), index.ToString("D4"), pair.Value), pair.Key); index++; } } } return(images); } }
/// <summary>Extracts all images (of types that iTextSharp knows how to decode) /// from a specified page of a PDF file.</summary> /// <returns>Returns a generic <see cref="Dictionary<string, System.Drawing.Image>"/>, /// where the key is a suggested file name, in the format: PDF filename without extension, /// page number and image index in the page.</returns> public static Dictionary <string, System.Drawing.Image> ExtractImages(string filename, int pageNumber) { Dictionary <string, System.Drawing.Image> images = new Dictionary <string, System.Drawing.Image>(); PdfReader reader = new PdfReader(filename); PdfReaderContentParser parser = new PdfReaderContentParser(reader); ImageRenderListener listener = null; parser.ProcessContent(pageNumber, (listener = new ImageRenderListener())); int index = 1; if (listener.Images.Count > 0) { Console.WriteLine("Found {0} images on page {1}.", listener.Images.Count, pageNumber); foreach (KeyValuePair <System.Drawing.Image, string> pair in listener.Images) { images.Add(string.Format("{0}_Page_{1}_Image_{2}{3}", System.IO.Path.GetFileNameWithoutExtension(filename), pageNumber.ToString("D4"), index.ToString("D4"), pair.Value), pair.Key); index++; } } return(images); }