예제 #1
0
        /// <summary>
        /// Extracts text fragments from the 2nd page and highlights the glyphs in the fragment.
        /// </summary>
        /// <param name="document"></param>
        private static void ExtractTextAndHighlightGlyphs(PDFFixedDocument document)
        {
            PDFRgbColor penColor = new PDFRgbColor();
            PDFPen      pen      = new PDFPen(penColor, 0.5);
            Random      rnd      = new Random();

            byte[] rgb = new byte[3];

            PDFContentExtractor  ce  = new PDFContentExtractor(document.Pages[1]);
            PDFTextRunCollection trc = ce.ExtractTextRuns();
            PDFTextRun           tr  = trc[1];

            for (int i = 0; i < tr.Glyphs.Count; i++)
            {
                rnd.NextBytes(rgb);
                penColor.R = rgb[0];
                penColor.G = rgb[1];
                penColor.B = rgb[2];

                PDFPath boundingPath = new PDFPath();
                boundingPath.StartSubpath(tr.Glyphs[i].GlyphCorners[0].X, tr.Glyphs[i].GlyphCorners[0].Y);
                boundingPath.AddLineTo(tr.Glyphs[i].GlyphCorners[1].X, tr.Glyphs[i].GlyphCorners[1].Y);
                boundingPath.AddLineTo(tr.Glyphs[i].GlyphCorners[2].X, tr.Glyphs[i].GlyphCorners[2].Y);
                boundingPath.AddLineTo(tr.Glyphs[i].GlyphCorners[3].X, tr.Glyphs[i].GlyphCorners[3].Y);
                boundingPath.CloseSubpath();

                document.Pages[1].Canvas.DrawPath(pen, boundingPath);
            }
        }
예제 #2
0
        /// <summary>
        /// Extracts text fragments from the 3rd page and highlights the glyphs in the fragment.
        /// </summary>
        /// <param name="document"></param>
        private static void ExtractImagesAndHighlight(PDFFixedDocument document)
        {
            PDFPen                     pen       = new PDFPen(new PDFRgbColor(255, 0, 192), 0.5);
            PDFBrush                   brush     = new PDFBrush(new PDFRgbColor(0, 0, 0));
            PDFStandardFont            helvetica = new PDFStandardFont(PDFStandardFontFace.Helvetica, 8);
            PDFStringAppearanceOptions sao       = new PDFStringAppearanceOptions();

            sao.Brush = brush;
            sao.Font  = helvetica;
            PDFStringLayoutOptions slo = new PDFStringLayoutOptions();

            slo.Width = 1000;

            PDFContentExtractor      ce  = new PDFContentExtractor(document.Pages[2]);
            PDFVisualImageCollection eic = ce.ExtractImages(false);

            for (int i = 0; i < eic.Count; i++)
            {
                string imageProperties = string.Format("Image ID: {0}\nPixel width: {1} pixels\nPixel height: {2} pixels\n" +
                                                       "Display width: {3} points\nDisplay height: {4} points\nHorizonal Resolution: {5} dpi\nVertical Resolution: {6} dpi",
                                                       eic[i].ImageID, eic[i].Width, eic[i].Height, eic[i].DisplayWidth, eic[i].DisplayHeight, eic[i].DpiX, eic[i].DpiY);

                PDFPath boundingPath = new PDFPath();
                boundingPath.StartSubpath(eic[i].ImageCorners[0].X, eic[i].ImageCorners[0].Y);
                boundingPath.AddLineTo(eic[i].ImageCorners[1].X, eic[i].ImageCorners[1].Y);
                boundingPath.AddLineTo(eic[i].ImageCorners[2].X, eic[i].ImageCorners[2].Y);
                boundingPath.AddLineTo(eic[i].ImageCorners[3].X, eic[i].ImageCorners[3].Y);
                boundingPath.CloseSubpath();

                document.Pages[2].Canvas.DrawPath(pen, boundingPath);
                slo.X = eic[i].ImageCorners[3].X + 1;
                slo.Y = eic[i].ImageCorners[3].Y + 1;
                document.Pages[2].Canvas.DrawString(imageProperties, sao, slo);
            }
        }
예제 #3
0
        static void Main(string[] args)
        {
            // Load the PDF file.
            //PDF4NET v5: PDFDocument doc = new PDFDocument("..\\SupportFiles\\Images.pdf");
            PDFFixedDocument doc = new PDFFixedDocument("..\\..\\..\\..\\..\\SupportFiles\\content.pdf");

            //for (int i = 0; i < doc.Pages.Count; i++)
            //{
            // Convert the pages to PDFImportedPage to get access to ExtractImages method.
            //PDF4NET v5: PDFImportedPage ip = doc.Pages[i] as PDFImportedPage;
            PDFContentExtractor ce = new PDFContentExtractor(doc.Pages[2]);
            //PDF4NET v5: Bitmap[] images = ip.ExtractImages();
            PDFVisualImageCollection images = ce.ExtractImages(true);

            // Save the page images to disk, if there are any.
            for (int j = 0; j < images.Count; j++)
            {
                //PDF4NET v5: images[j].Save("image" + i.ToString() + j.ToString() + ".png", ImageFormat.Png);
                FileStream fs = File.OpenWrite("image" + j.ToString() + ".png");
                images[j].Save(fs, PDFVisualImageSaveFormat.Png);
                fs.Flush();
                fs.Close();
            }
            //}
        }
예제 #4
0
        public static void Main(string[] args)
        {
            string supportPath = "..\\..\\..\\..\\..\\SupportFiles\\";

            FileStream       input    = File.OpenRead(supportPath + "content.pdf");
            PDFFixedDocument document = new PDFFixedDocument(input);

            input.Close();

            PDFContentExtractor           ce            = new PDFContentExtractor(document.Pages[0]);
            PDFTextSearchResultCollection searchResults = ce.SearchText("lorem");

            if (searchResults.Count > 0)
            {
                PDFContentRedactor cr = new PDFContentRedactor(document.Pages[0]);

                cr.BeginRedaction();

                for (int i = 0; i < searchResults.Count; i++)
                {
                    cr.RedactArea(searchResults[i].VisualBounds);
                }

                cr.ApplyRedaction();
            }

            using (FileStream output = File.Create("RedactedSearchResults.pdf"))
            {
                document.Save(output);
            }
        }
예제 #5
0
        /// <summary>
        /// Main method for running the sample.
        /// </summary>
        public static SampleOutputInfo[] Run(Stream input)
        {
            PDFFixedDocument    document = new PDFFixedDocument(input);
            PDFContentExtractor ce       = new PDFContentExtractor(document.Pages[0]);

            // Simple search.
            PDFTextSearchResultCollection searchResults = ce.SearchText("at");

            HighlightSearchResults(document.Pages[0], searchResults, PDFRgbColor.Red);

            // Whole words search.
            searchResults = ce.SearchText("at", PDFTextSearchOptions.WholeWordSearch);
            HighlightSearchResults(document.Pages[0], searchResults, PDFRgbColor.Green);

            // Regular expression search, find all words that start with uppercase.
            searchResults = ce.SearchText("[A-Z][a-z]*", PDFTextSearchOptions.RegExSearch);
            HighlightSearchResults(document.Pages[0], searchResults, PDFRgbColor.Blue);

            SampleOutputInfo[] output = new SampleOutputInfo[] { new SampleOutputInfo(document, "searchtext.pdf") };
            return(output);
        }
예제 #6
0
        /// <summary>
        /// Main method for running the sample.
        /// </summary>
        public static SampleOutputInfo[] Run(Stream input)
        {
            PDFBrush        brush     = new PDFBrush();
            PDFPen          redPen    = new PDFPen(PDFRgbColor.Red, 1);
            PDFStandardFont helvetica = new PDFStandardFont(PDFStandardFontFace.Helvetica, 10);

            PDFFixedDocument document = new PDFFixedDocument(input);

            PDFContentExtractor       ce  = new PDFContentExtractor(document.Pages[0]);
            PDFVisualObjectCollection voc = ce.ExtractVisualObjects(false);

            PDFPath contour = null;

            for (int i = 0; i < voc.Count; i++)
            {
                switch (voc[i].Type)
                {
                case PDFVisualObjectType.Image:
                    PDFImageVisualObject ivo = voc[i] as PDFImageVisualObject;
                    contour = new PDFPath();
                    contour.StartSubpath(ivo.Image.ImageCorners[0].X - 5, ivo.Image.ImageCorners[0].Y + 5);
                    contour.AddLineTo(ivo.Image.ImageCorners[1].X + 5, ivo.Image.ImageCorners[1].Y + 5);
                    contour.AddLineTo(ivo.Image.ImageCorners[2].X + 5, ivo.Image.ImageCorners[2].Y - 5);
                    contour.AddLineTo(ivo.Image.ImageCorners[3].X - 5, ivo.Image.ImageCorners[3].Y - 5);
                    contour.CloseSubpath();
                    document.Pages[0].Canvas.DrawPath(redPen, contour);

                    document.Pages[0].Canvas.DrawString("Image", helvetica, brush,
                                                        ivo.Image.ImageCorners[0].X - 5, ivo.Image.ImageCorners[0].Y + 5);
                    break;

                case PDFVisualObjectType.Text:
                    PDFTextVisualObject tvo = voc[i] as PDFTextVisualObject;
                    contour = new PDFPath();
                    contour.StartSubpath(tvo.TextFragment.FragmentCorners[0].X - 5, tvo.TextFragment.FragmentCorners[0].Y + 5);
                    contour.AddLineTo(tvo.TextFragment.FragmentCorners[1].X + 5, tvo.TextFragment.FragmentCorners[1].Y + 5);
                    contour.AddLineTo(tvo.TextFragment.FragmentCorners[2].X + 5, tvo.TextFragment.FragmentCorners[2].Y - 5);
                    contour.AddLineTo(tvo.TextFragment.FragmentCorners[3].X - 5, tvo.TextFragment.FragmentCorners[3].Y - 5);
                    contour.CloseSubpath();
                    document.Pages[0].Canvas.DrawPath(redPen, contour);

                    document.Pages[0].Canvas.DrawString("Text", helvetica, brush,
                                                        tvo.TextFragment.FragmentCorners[0].X - 5, tvo.TextFragment.FragmentCorners[0].Y + 5);
                    break;

                case PDFVisualObjectType.Path:
                    PDFPathVisualObject pvo = voc[i] as PDFPathVisualObject;
                    // Examine all the path points and determine the minimum rectangle that bounds the path.
                    double minX = 999999, minY = 999999, maxX = -999999, maxY = -999999;
                    for (int j = 0; j < pvo.PathItems.Count; j++)
                    {
                        PDFPathItem pi = pvo.PathItems[j];
                        if (pi.Points != null)
                        {
                            for (int k = 0; k < pi.Points.Length; k++)
                            {
                                if (minX >= pi.Points[k].X)
                                {
                                    minX = pi.Points[k].X;
                                }
                                if (minY >= pi.Points[k].Y)
                                {
                                    minY = pi.Points[k].Y;
                                }
                                if (maxX <= pi.Points[k].X)
                                {
                                    maxX = pi.Points[k].X;
                                }
                                if (maxY <= pi.Points[k].Y)
                                {
                                    maxY = pi.Points[k].Y;
                                }
                            }
                        }
                    }

                    contour = new PDFPath();
                    contour.StartSubpath(minX - 5, minY - 5);
                    contour.AddLineTo(maxX + 5, minY - 5);
                    contour.AddLineTo(maxX + 5, maxY + 5);
                    contour.AddLineTo(minX - 5, maxY + 5);
                    contour.CloseSubpath();
                    document.Pages[0].Canvas.DrawPath(redPen, contour);

                    document.Pages[0].Canvas.DrawString("Path", helvetica, brush, minX - 5, maxY + 5);
                    // Skip the rest of path objects, they are the evaluation message
                    i = voc.Count;
                    break;
                }
            }

            SampleOutputInfo[] output = new SampleOutputInfo[] { new SampleOutputInfo(document, "pageobjects.pdf") };
            return(output);
        }