/// <summary> /// Extracts text fragments from the 3rd page and highlights the glyphs in the fragment. /// </summary> /// <param name="document"></param> private static void ExtractImagesAndHighlight(PDFFixedDocument document) { PDFPen pen = new PDFPen(new PDFRgbColor(255, 0, 192), 0.5); PDFBrush brush = new PDFBrush(new PDFRgbColor(0, 0, 0)); PDFStandardFont helvetica = new PDFStandardFont(PDFStandardFontFace.Helvetica, 8); PDFStringAppearanceOptions sao = new PDFStringAppearanceOptions(); sao.Brush = brush; sao.Font = helvetica; PDFStringLayoutOptions slo = new PDFStringLayoutOptions(); slo.Width = 1000; PDFContentExtractor ce = new PDFContentExtractor(document.Pages[2]); PDFVisualImageCollection eic = ce.ExtractImages(false); for (int i = 0; i < eic.Count; i++) { string imageProperties = string.Format("Image ID: {0}\nPixel width: {1} pixels\nPixel height: {2} pixels\n" + "Display width: {3} points\nDisplay height: {4} points\nHorizonal Resolution: {5} dpi\nVertical Resolution: {6} dpi", eic[i].ImageID, eic[i].Width, eic[i].Height, eic[i].DisplayWidth, eic[i].DisplayHeight, eic[i].DpiX, eic[i].DpiY); PDFPath boundingPath = new PDFPath(); boundingPath.StartSubpath(eic[i].ImageCorners[0].X, eic[i].ImageCorners[0].Y); boundingPath.AddLineTo(eic[i].ImageCorners[1].X, eic[i].ImageCorners[1].Y); boundingPath.AddLineTo(eic[i].ImageCorners[2].X, eic[i].ImageCorners[2].Y); boundingPath.AddLineTo(eic[i].ImageCorners[3].X, eic[i].ImageCorners[3].Y); boundingPath.CloseSubpath(); document.Pages[2].Canvas.DrawPath(pen, boundingPath); slo.X = eic[i].ImageCorners[3].X + 1; slo.Y = eic[i].ImageCorners[3].Y + 1; document.Pages[2].Canvas.DrawString(imageProperties, sao, slo); } }
static void Main(string[] args) { // Load the PDF file. //PDF4NET v5: PDFDocument doc = new PDFDocument("..\\SupportFiles\\Images.pdf"); PDFFixedDocument doc = new PDFFixedDocument("..\\..\\..\\..\\..\\SupportFiles\\content.pdf"); //for (int i = 0; i < doc.Pages.Count; i++) //{ // Convert the pages to PDFImportedPage to get access to ExtractImages method. //PDF4NET v5: PDFImportedPage ip = doc.Pages[i] as PDFImportedPage; PDFContentExtractor ce = new PDFContentExtractor(doc.Pages[2]); //PDF4NET v5: Bitmap[] images = ip.ExtractImages(); PDFVisualImageCollection images = ce.ExtractImages(true); // Save the page images to disk, if there are any. for (int j = 0; j < images.Count; j++) { //PDF4NET v5: images[j].Save("image" + i.ToString() + j.ToString() + ".png", ImageFormat.Png); FileStream fs = File.OpenWrite("image" + j.ToString() + ".png"); images[j].Save(fs, PDFVisualImageSaveFormat.Png); fs.Flush(); fs.Close(); } //} }