/// <summary> /// Extracts text fragments from the 2nd page and highlights the glyphs in the fragment. /// </summary> /// <param name="document"></param> private static void ExtractTextAndHighlightGlyphs(PDFFixedDocument document) { PDFRgbColor penColor = new PDFRgbColor(); PDFPen pen = new PDFPen(penColor, 0.5); Random rnd = new Random(); byte[] rgb = new byte[3]; PDFContentExtractor ce = new PDFContentExtractor(document.Pages[1]); PDFTextRunCollection trc = ce.ExtractTextRuns(); PDFTextRun tr = trc[1]; for (int i = 0; i < tr.Glyphs.Count; i++) { rnd.NextBytes(rgb); penColor.R = rgb[0]; penColor.G = rgb[1]; penColor.B = rgb[2]; PDFPath boundingPath = new PDFPath(); boundingPath.StartSubpath(tr.Glyphs[i].GlyphCorners[0].X, tr.Glyphs[i].GlyphCorners[0].Y); boundingPath.AddLineTo(tr.Glyphs[i].GlyphCorners[1].X, tr.Glyphs[i].GlyphCorners[1].Y); boundingPath.AddLineTo(tr.Glyphs[i].GlyphCorners[2].X, tr.Glyphs[i].GlyphCorners[2].Y); boundingPath.AddLineTo(tr.Glyphs[i].GlyphCorners[3].X, tr.Glyphs[i].GlyphCorners[3].Y); boundingPath.CloseSubpath(); document.Pages[1].Canvas.DrawPath(pen, boundingPath); } }
/// <summary> /// Extracts text fragments from the 3rd page and highlights the glyphs in the fragment. /// </summary> /// <param name="document"></param> private static void ExtractImagesAndHighlight(PDFFixedDocument document) { PDFPen pen = new PDFPen(new PDFRgbColor(255, 0, 192), 0.5); PDFBrush brush = new PDFBrush(new PDFRgbColor(0, 0, 0)); PDFStandardFont helvetica = new PDFStandardFont(PDFStandardFontFace.Helvetica, 8); PDFStringAppearanceOptions sao = new PDFStringAppearanceOptions(); sao.Brush = brush; sao.Font = helvetica; PDFStringLayoutOptions slo = new PDFStringLayoutOptions(); slo.Width = 1000; PDFContentExtractor ce = new PDFContentExtractor(document.Pages[2]); PDFVisualImageCollection eic = ce.ExtractImages(false); for (int i = 0; i < eic.Count; i++) { string imageProperties = string.Format("Image ID: {0}\nPixel width: {1} pixels\nPixel height: {2} pixels\n" + "Display width: {3} points\nDisplay height: {4} points\nHorizonal Resolution: {5} dpi\nVertical Resolution: {6} dpi", eic[i].ImageID, eic[i].Width, eic[i].Height, eic[i].DisplayWidth, eic[i].DisplayHeight, eic[i].DpiX, eic[i].DpiY); PDFPath boundingPath = new PDFPath(); boundingPath.StartSubpath(eic[i].ImageCorners[0].X, eic[i].ImageCorners[0].Y); boundingPath.AddLineTo(eic[i].ImageCorners[1].X, eic[i].ImageCorners[1].Y); boundingPath.AddLineTo(eic[i].ImageCorners[2].X, eic[i].ImageCorners[2].Y); boundingPath.AddLineTo(eic[i].ImageCorners[3].X, eic[i].ImageCorners[3].Y); boundingPath.CloseSubpath(); document.Pages[2].Canvas.DrawPath(pen, boundingPath); slo.X = eic[i].ImageCorners[3].X + 1; slo.Y = eic[i].ImageCorners[3].Y + 1; document.Pages[2].Canvas.DrawString(imageProperties, sao, slo); } }
static void Main(string[] args) { // Load the PDF file. //PDF4NET v5: PDFDocument doc = new PDFDocument("..\\SupportFiles\\Images.pdf"); PDFFixedDocument doc = new PDFFixedDocument("..\\..\\..\\..\\..\\SupportFiles\\content.pdf"); //for (int i = 0; i < doc.Pages.Count; i++) //{ // Convert the pages to PDFImportedPage to get access to ExtractImages method. //PDF4NET v5: PDFImportedPage ip = doc.Pages[i] as PDFImportedPage; PDFContentExtractor ce = new PDFContentExtractor(doc.Pages[2]); //PDF4NET v5: Bitmap[] images = ip.ExtractImages(); PDFVisualImageCollection images = ce.ExtractImages(true); // Save the page images to disk, if there are any. for (int j = 0; j < images.Count; j++) { //PDF4NET v5: images[j].Save("image" + i.ToString() + j.ToString() + ".png", ImageFormat.Png); FileStream fs = File.OpenWrite("image" + j.ToString() + ".png"); images[j].Save(fs, PDFVisualImageSaveFormat.Png); fs.Flush(); fs.Close(); } //} }
public static void Main(string[] args) { string supportPath = "..\\..\\..\\..\\..\\SupportFiles\\"; FileStream input = File.OpenRead(supportPath + "content.pdf"); PDFFixedDocument document = new PDFFixedDocument(input); input.Close(); PDFContentExtractor ce = new PDFContentExtractor(document.Pages[0]); PDFTextSearchResultCollection searchResults = ce.SearchText("lorem"); if (searchResults.Count > 0) { PDFContentRedactor cr = new PDFContentRedactor(document.Pages[0]); cr.BeginRedaction(); for (int i = 0; i < searchResults.Count; i++) { cr.RedactArea(searchResults[i].VisualBounds); } cr.ApplyRedaction(); } using (FileStream output = File.Create("RedactedSearchResults.pdf")) { document.Save(output); } }
/// <summary> /// Main method for running the sample. /// </summary> public static SampleOutputInfo[] Run(Stream input) { PDFFixedDocument document = new PDFFixedDocument(input); PDFContentExtractor ce = new PDFContentExtractor(document.Pages[0]); // Simple search. PDFTextSearchResultCollection searchResults = ce.SearchText("at"); HighlightSearchResults(document.Pages[0], searchResults, PDFRgbColor.Red); // Whole words search. searchResults = ce.SearchText("at", PDFTextSearchOptions.WholeWordSearch); HighlightSearchResults(document.Pages[0], searchResults, PDFRgbColor.Green); // Regular expression search, find all words that start with uppercase. searchResults = ce.SearchText("[A-Z][a-z]*", PDFTextSearchOptions.RegExSearch); HighlightSearchResults(document.Pages[0], searchResults, PDFRgbColor.Blue); SampleOutputInfo[] output = new SampleOutputInfo[] { new SampleOutputInfo(document, "searchtext.pdf") }; return(output); }
/// <summary> /// Main method for running the sample. /// </summary> public static SampleOutputInfo[] Run(Stream input) { PDFBrush brush = new PDFBrush(); PDFPen redPen = new PDFPen(PDFRgbColor.Red, 1); PDFStandardFont helvetica = new PDFStandardFont(PDFStandardFontFace.Helvetica, 10); PDFFixedDocument document = new PDFFixedDocument(input); PDFContentExtractor ce = new PDFContentExtractor(document.Pages[0]); PDFVisualObjectCollection voc = ce.ExtractVisualObjects(false); PDFPath contour = null; for (int i = 0; i < voc.Count; i++) { switch (voc[i].Type) { case PDFVisualObjectType.Image: PDFImageVisualObject ivo = voc[i] as PDFImageVisualObject; contour = new PDFPath(); contour.StartSubpath(ivo.Image.ImageCorners[0].X - 5, ivo.Image.ImageCorners[0].Y + 5); contour.AddLineTo(ivo.Image.ImageCorners[1].X + 5, ivo.Image.ImageCorners[1].Y + 5); contour.AddLineTo(ivo.Image.ImageCorners[2].X + 5, ivo.Image.ImageCorners[2].Y - 5); contour.AddLineTo(ivo.Image.ImageCorners[3].X - 5, ivo.Image.ImageCorners[3].Y - 5); contour.CloseSubpath(); document.Pages[0].Canvas.DrawPath(redPen, contour); document.Pages[0].Canvas.DrawString("Image", helvetica, brush, ivo.Image.ImageCorners[0].X - 5, ivo.Image.ImageCorners[0].Y + 5); break; case PDFVisualObjectType.Text: PDFTextVisualObject tvo = voc[i] as PDFTextVisualObject; contour = new PDFPath(); contour.StartSubpath(tvo.TextFragment.FragmentCorners[0].X - 5, tvo.TextFragment.FragmentCorners[0].Y + 5); contour.AddLineTo(tvo.TextFragment.FragmentCorners[1].X + 5, tvo.TextFragment.FragmentCorners[1].Y + 5); contour.AddLineTo(tvo.TextFragment.FragmentCorners[2].X + 5, tvo.TextFragment.FragmentCorners[2].Y - 5); contour.AddLineTo(tvo.TextFragment.FragmentCorners[3].X - 5, tvo.TextFragment.FragmentCorners[3].Y - 5); contour.CloseSubpath(); document.Pages[0].Canvas.DrawPath(redPen, contour); document.Pages[0].Canvas.DrawString("Text", helvetica, brush, tvo.TextFragment.FragmentCorners[0].X - 5, tvo.TextFragment.FragmentCorners[0].Y + 5); break; case PDFVisualObjectType.Path: PDFPathVisualObject pvo = voc[i] as PDFPathVisualObject; // Examine all the path points and determine the minimum rectangle that bounds the path. double minX = 999999, minY = 999999, maxX = -999999, maxY = -999999; for (int j = 0; j < pvo.PathItems.Count; j++) { PDFPathItem pi = pvo.PathItems[j]; if (pi.Points != null) { for (int k = 0; k < pi.Points.Length; k++) { if (minX >= pi.Points[k].X) { minX = pi.Points[k].X; } if (minY >= pi.Points[k].Y) { minY = pi.Points[k].Y; } if (maxX <= pi.Points[k].X) { maxX = pi.Points[k].X; } if (maxY <= pi.Points[k].Y) { maxY = pi.Points[k].Y; } } } } contour = new PDFPath(); contour.StartSubpath(minX - 5, minY - 5); contour.AddLineTo(maxX + 5, minY - 5); contour.AddLineTo(maxX + 5, maxY + 5); contour.AddLineTo(minX - 5, maxY + 5); contour.CloseSubpath(); document.Pages[0].Canvas.DrawPath(redPen, contour); document.Pages[0].Canvas.DrawString("Path", helvetica, brush, minX - 5, maxY + 5); // Skip the rest of path objects, they are the evaluation message i = voc.Count; break; } } SampleOutputInfo[] output = new SampleOutputInfo[] { new SampleOutputInfo(document, "pageobjects.pdf") }; return(output); }