Example #1
0
        public void RunPdfTron(string input_path)
        {
            PDFNet.Initialize();

            // string output_path = "../../../../TestFiles/Output/";

            try
            {
                // Open the test file
                PDFDoc doc = new PDFDoc(input_path);
                doc.InitSecurityHandler();

                PageIterator  itr;
                ElementReader page_reader = new ElementReader();

                for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next())        //  Read every page
                {
                    int pageno = itr.GetPageNumber();


                    page_reader.Begin(itr.Current());
                    ProcessElements(page_reader);
                    page_reader.End();
                }

                page_reader.Dispose(); // Calling Dispose() on ElementReader/Writer/Builder can result in increased performance and lower memory consumption.
                doc.Close();
            }
            catch (PDFNetException e)
            {
                ConsoleLog += e.Message;
            }

            PDFNet.Terminate();
        }
        static void Main(string[] args)
        {
            PDFNet.Initialize();

            // Relative path to the folder containing test files.
            string input_path = "../../TestFiles/";

            try
            {
                Console.WriteLine("-------------------------------------------------");
                Console.WriteLine("Sample 1 - Extract text data from all pages in the document.");

                // Open the test file
                Console.WriteLine("Opening the input pdf...");
                using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
                    using (ElementReader page_reader = new ElementReader())
                    {
                        doc.InitSecurityHandler();

                        PageIterator itr;
                        for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next())                            //  Read every page
                        {
                            page_reader.Begin(itr.Current());
                            ProcessElements(page_reader);
                            page_reader.End();
                        }
                        Console.WriteLine("Done.");
                    }
            }
            catch (PDFNetException e)
            {
                Console.WriteLine(e.Message);
            }
        }
Example #3
0
        /// <summary>
        /// extract text in the pdf page
        /// </summary>
        void ExtractText()
        {
            _charCode = -1;

            _pageMatrix = PdfPage.GetDefaultMatrix();

            _pageHeight = PdfPage.GetPageHeight();

            _textLines = new PdfLineOfText[((int)_pageHeight >> 2) + 8];

            using (ElementReader page_reader = new ElementReader())
            {
                page_reader.Begin(PdfPage);
                ProcessElements(page_reader);
            }

            PdfLineOfText preLt = null;

            for (int i = 0; i < _textLines.Length; i++)
            {
                var lt = _textLines[i];

                if (preLt != null && lt != null && preLt.FirstChar.Top - lt.FirstChar.Top < 3)
                {
                    preLt.AddPdfChars(lt.Chars);

                    _textLines[i] = lt = null;
                }

                preLt = lt;
            }
        }
Example #4
0
 // A utility method used to extract all text content from
 // a given selection rectangle. The rectangle coordinates are
 // expressed in PDF user/page coordinate system.
 public string ReadTextFromRect(Page page, Rect pos, ElementReader reader)
 {
     _srch_str = "";
     reader.Begin(page);
     RectTextSearch(reader, pos);
     reader.End();
     return(_srch_str);
 }
        static void Main(string[] args)
        {
            PDFNet.Initialize();

            // Relative path to the folder containing test files.
            string input_path      = "../../TestFiles/";
            string output_path     = "../../TestFiles/Output/";
            string input_filename  = "newsletter.pdf";
            string output_filename = "newsletter_edited.pdf";

            try
            {
                Console.WriteLine("Opening the input file...");
                using (PDFDoc doc = new PDFDoc(input_path + input_filename))
                {
                    doc.InitSecurityHandler();

                    ElementWriter writer  = new ElementWriter();
                    ElementReader reader  = new ElementReader();
                    XSet          visited = new XSet();

                    PageIterator itr = doc.GetPageIterator();

                    while (itr.HasNext())
                    {
                        try
                        {
                            Page page = itr.Current();
                            visited.Add(page.GetSDFObj().GetObjNum());

                            reader.Begin(page);
                            writer.Begin(page, ElementWriter.WriteMode.e_replacement, false, true, page.GetResourceDict());

                            ProcessElements(reader, writer, visited);
                            writer.End();
                            reader.End();

                            itr.Next();
                        }
                        catch (PDFNetException e)
                        {
                            Console.WriteLine(e.Message);
                        }
                    }

                    doc.Save(output_path + output_filename, SDFDoc.SaveOptions.e_remove_unused);
                    Console.WriteLine("Done. Result saved in {0}...", output_filename);
                }
            }
            catch (PDFNetException e)
            {
                Console.WriteLine(e.Message);
            }
        }
        static void Main(string[] args)
        {
            try
            {
                PDFNet.Initialize();

                Console.WriteLine("-------------------------------------------------");
                Console.WriteLine("Extract page element information from all ");
                Console.WriteLine("pages in the document.");

                // Open the test file
                using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
                {
                    doc.InitSecurityHandler();

                    int          pgnum = doc.GetPageCount();
                    PageIterator itr;

                    using (ElementReader page_reader = new ElementReader())
                    {
                        for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next())                                    //  Read every page
                        {
                            Console.WriteLine("Page {0:d}----------------------------------------",
                                              itr.GetPageNumber());

                            Rect crop_box = itr.Current().GetCropBox();
                            crop_box.Normalize();

                            // Console.WriteLine(" Page Rectangle: x={0:f} y={1:f} x2={2:f} y2={3:f}", crop_box.x1, crop_box.y1, crop_box.x2, crop_box.y2);
                            // Console.WriteLine(" Page Size: width={0:f} height={1:f}", crop_box.Width(), crop_box.Height());

                            page_reader.Begin(itr.Current());
                            ProcessElements(page_reader);
                            page_reader.End();
                        }
                    }

                    Console.WriteLine("Done.");
                }
            }
            catch (PDFNetException e)
            {
                Console.WriteLine(e.Message);
            }
        }
Example #7
0
        public void ReadTextFromCoordinates(string input_path, int pagenumber, int urx, int ury, int llx, int lly)
        {
            PDFDoc doc = new PDFDoc(input_path);

            doc.InitSecurityHandler();
            Page          page   = doc.GetPage(pagenumber);
            ElementReader reader = new ElementReader();
            PageIterator  itr    = doc.GetPageIterator();

            reader.Begin(itr.Current());

            LowLevelTextExtractUtils u = new LowLevelTextExtractUtils();
            //u.DumpAllText(reader);
            //ConsoleLog += u.ConsoleLog;
            //reader.End();

            string field3 = u.ReadTextFromRect(page, new Rect(urx, ury, llx, lly), reader);

            ConsoleLog = field3;

            reader.Dispose();
            doc.Close();
        }
Example #8
0
        double GetBottomTextYValue(int pageNum, double bottomAxisValue, double topAxisValue, bool isMatchNum)
        {
            Page curPage = _pdfDoc.GetPage(pageNum);
            List <TextAndAxisValue> values = new List <TextAndAxisValue>();

            using (ElementReader pageReader = new ElementReader())
            {
                pageReader.Begin(curPage);
                GetTextElement(pageNum, pageReader, values, bottomAxisValue, topAxisValue, isMatchNum);
            }
            Merge(values);
            Filtrate(values);
            double bottomValue = bottomAxisValue;

            if (values.Count > 0)
            {
                double value = values.Min(x => x.yValue) - 15;
                if (value > 0)
                {
                    bottomValue = value;
                }
            }
            return(bottomValue);
        }
        public SortedDictionary <double, FormLineList>[] GetFormLines(int pageNum, LinePos startPos, LinePos endPos
                                                                      , bool isSubsequentPage, SortedDictionary <double, FormLineList> lastPageVerticalLines)
        {
            //Get the information of lines and extreme points by travelsaling the page
            horizontalLines = new SortedDictionary <double, FormLineList>();
            verticalLines   = new SortedDictionary <double, FormLineList>();
            _pageNum        = pageNum;
            _startPos       = startPos;
            _endPos         = endPos;

            Page page = _pdfDoc.GetPage(pageNum);

            pageSize              = PdfTronHelper.GetPageSize(page);
            topBound.AxisValue    = startPos == null ? pageSize[1] : startPos.AxisValue;
            bottomBound.AxisValue = endPos == null ? 0 : endPos.AxisValue;
            ii = 0;
            using (ElementReader page_reader = new ElementReader())
            {
                page_reader.Begin(page);
                ProcessElements(page_reader);
            }
            RemoveLittleLines(horizontalLines);
            RemoveLittleLines(verticalLines);
            //Remove short lines.
            Rect posRect = GetTablePosRect(_pageNum, _startPos, _endPos, isSubsequentPage);

            RemoveTooShortAndTooLongLines(page, horizontalLines, true, posRect);

            bool isNotNeedGenerateVerLines =
                isSubsequentPage && lastPageVerticalLines != null && verticalLines.Count == lastPageVerticalLines.Count;

            if (!isNotNeedGenerateVerLines)
            {
                RemoveTooShortAndTooLongLines(page, verticalLines, false, posRect);
            }
            //Generate drawed lines.
            Rect areaRect;

            bool existRealRect = horizontalLines.Count > 1 && verticalLines.Count > 1 && IsRect(posRect, horizontalLines, verticalLines);

            areaRect = existRealRect ? GenerateRectByLines() : posRect;
            if (existRealRect || isSubsequentPage)
            {
                RemoveLinesNotInRect(areaRect, horizontalLines, verticalLines);
                isNotNeedGenerateVerLines =
                    isSubsequentPage && lastPageVerticalLines != null && verticalLines.Count == lastPageVerticalLines.Count;
            }
            else
            {
                posRect = GetTablePosRect(_pageNum, _startPos, _endPos, true);
            }
            FormLineGenerator.RemoveSpareNearLines(verticalLines, false);
            if (!isNotNeedGenerateVerLines)
            {
                isNotNeedGenerateVerLines =
                    isSubsequentPage && lastPageVerticalLines != null && verticalLines.Count == lastPageVerticalLines.Count;
            }
            FormLineGenerator lineGenerator = new FormLineGenerator(page, areaRect);

            SortedDictionary <double, FormLineList>[] lines = lineGenerator.GetFormLines(existRealRect, horizontalLines, verticalLines, isNotNeedGenerateVerLines);
            return(lines);
        }
        static void Main(string[] args)
        {
            PDFNet.Initialize();

            // Relative path to the folder containing test files.
            string input_path  = "../../TestFiles/";
            string output_path = "../../TestFiles/Output/";


            try
            {
                using (PDFDoc doc = new PDFDoc())
                    using (ElementBuilder eb = new ElementBuilder())                    // ElementBuilder is used to build new Element objects
                        using (ElementWriter writer = new ElementWriter())              // ElementWriter is used to write Elements to the page
                        {
                            // Start a new page ------------------------------------
                            // Position an image stream on several places on the page
                            Page page = doc.PageCreate(new Rect(0, 0, 612, 794));

                            writer.Begin(page);                 // begin writing to this page

                            // Create an Image that can be reused multiple times in the document or
                            // multiple on the same page.
                            MappedFile   img_file = new MappedFile(input_path + "peppers.jpg");
                            FilterReader img_data = new FilterReader(img_file);
                            Image        img      = Image.Create(doc, img_data, 400, 600, 8, ColorSpace.CreateDeviceRGB(), Image.InputFilter.e_jpeg);

                            Element element = eb.CreateImage(img, new Matrix2D(200, -145, 20, 300, 200, 150));
                            writer.WritePlacedElement(element);

                            GState gstate = element.GetGState();                // use the same image (just change its matrix)
                            gstate.SetTransform(200, 0, 0, 300, 50, 450);
                            writer.WritePlacedElement(element);

                            // use the same image again (just change its matrix).
                            writer.WritePlacedElement(eb.CreateImage(img, 300, 600, 200, -150));

                            writer.End();              // save changes to the current page
                            doc.PagePushBack(page);

                            // Start a new page ------------------------------------
                            // Construct and draw a path object using different styles
                            page = doc.PageCreate(new Rect(0, 0, 612, 794));

                            writer.Begin(page);                 // begin writing to this page
                            eb.Reset();                         // Reset GState to default


                            eb.PathBegin();                     // start constructing the path
                            eb.MoveTo(306, 396);
                            eb.CurveTo(681, 771, 399.75, 864.75, 306, 771);
                            eb.CurveTo(212.25, 864.75, -69, 771, 306, 396);
                            eb.ClosePath();
                            element = eb.PathEnd();                             // the path is now finished
                            element.SetPathFill(true);                          // the path should be filled

                            // Set the path color space and color
                            gstate = element.GetGState();
                            gstate.SetFillColorSpace(ColorSpace.CreateDeviceCMYK());
                            gstate.SetFillColor(new ColorPt(1, 0, 0, 0));              // cyan
                            gstate.SetTransform(0.5, 0, 0, 0.5, -20, 300);
                            writer.WritePlacedElement(element);

                            // Draw the same path using a different stroke color
                            element.SetPathStroke(true);                        // this path is should be filled and stroked
                            gstate.SetFillColor(new ColorPt(0, 0, 1, 0));       // yellow
                            gstate.SetStrokeColorSpace(ColorSpace.CreateDeviceRGB());
                            gstate.SetStrokeColor(new ColorPt(1, 0, 0));        // red
                            gstate.SetTransform(0.5, 0, 0, 0.5, 280, 300);
                            gstate.SetLineWidth(20);
                            writer.WritePlacedElement(element);

                            // Draw the same path with with a given dash pattern
                            element.SetPathFill(false);                  // this path is should be only stroked
                            gstate.SetStrokeColor(new ColorPt(0, 0, 1)); // blue
                            gstate.SetTransform(0.5, 0, 0, 0.5, 280, 0);
                            double[] dash_pattern = { 30 };
                            gstate.SetDashPattern(dash_pattern, 0);
                            writer.WritePlacedElement(element);

                            // Use the path as a clipping path
                            writer.WriteElement(eb.CreateGroupBegin());                 // Save the graphics state
                            // Start constructing a new path (the old path was lost when we created
                            // a new Element using CreateGroupBegin()).
                            eb.PathBegin();
                            eb.MoveTo(306, 396);
                            eb.CurveTo(681, 771, 399.75, 864.75, 306, 771);
                            eb.CurveTo(212.25, 864.75, -69, 771, 306, 396);
                            eb.ClosePath();
                            element = eb.PathEnd();             // path is now built
                            element.SetPathClip(true);          // this path is a clipping path
                            element.SetPathStroke(true);        // this path is should be filled and stroked
                            gstate = element.GetGState();
                            gstate.SetTransform(0.5, 0, 0, 0.5, -20, 0);
                            writer.WriteElement(element);
                            writer.WriteElement(eb.CreateImage(img, 100, 300, 400, 600));
                            writer.WriteElement(eb.CreateGroupEnd()); // Restore the graphics state

                            writer.End();                             // save changes to the current page
                            doc.PagePushBack(page);


                            // Start a new page ------------------------------------
                            page = doc.PageCreate(new Rect(0, 0, 612, 794));

                            writer.Begin(page);                 // begin writing to this page
                            eb.Reset();                         // Reset GState to default

                            // Begin writing a block of text
                            element = eb.CreateTextBegin(Font.Create(doc, Font.StandardType1Font.e_times_roman), 12);
                            writer.WriteElement(element);

                            string data = "Hello World!";
                            element = eb.CreateTextRun(data);
                            element.SetTextMatrix(10, 0, 0, 10, 0, 600);
                            element.GetGState().SetLeading(15);                          // Set the spacing between lines
                            writer.WriteElement(element);

                            writer.WriteElement(eb.CreateTextNewLine());              // New line

                            element = eb.CreateTextRun(data);
                            gstate  = element.GetGState();
                            gstate.SetTextRenderMode(GState.TextRenderingMode.e_stroke_text);
                            gstate.SetCharSpacing(-1.25);
                            gstate.SetWordSpacing(-1.25);
                            writer.WriteElement(element);

                            writer.WriteElement(eb.CreateTextNewLine());              // New line

                            element = eb.CreateTextRun(data);
                            gstate  = element.GetGState();
                            gstate.SetCharSpacing(0);
                            gstate.SetWordSpacing(0);
                            gstate.SetLineWidth(3);
                            gstate.SetTextRenderMode(GState.TextRenderingMode.e_fill_stroke_text);
                            gstate.SetStrokeColorSpace(ColorSpace.CreateDeviceRGB());
                            gstate.SetStrokeColor(new ColorPt(1, 0, 0));                // red
                            gstate.SetFillColorSpace(ColorSpace.CreateDeviceCMYK());
                            gstate.SetFillColor(new ColorPt(1, 0, 0, 0));               // cyan
                            writer.WriteElement(element);

                            writer.WriteElement(eb.CreateTextNewLine());              // New line

                            // Set text as a clipping path to the image.
                            element = eb.CreateTextRun(data);
                            gstate  = element.GetGState();
                            gstate.SetTextRenderMode(GState.TextRenderingMode.e_clip_text);
                            writer.WriteElement(element);

                            // Finish the block of text
                            writer.WriteElement(eb.CreateTextEnd());

                            // Draw an image that will be clipped by the above text
                            writer.WriteElement(eb.CreateImage(img, 10, 100, 1300, 720));

                            writer.End();              // save changes to the current page
                            doc.PagePushBack(page);

                            // Start a new page ------------------------------------
                            //
                            // The example illustrates how to embed the external font in a PDF document.
                            // The example also shows how ElementReader can be used to copy and modify
                            // Elements between pages.

                            using (ElementReader reader = new ElementReader())
                            {
                                // Start reading Elements from the last page. We will copy all Elements to
                                // a new page but will modify the font associated with text.
                                reader.Begin(doc.GetPage(doc.GetPageCount()));

                                page = doc.PageCreate(new Rect(0, 0, 1300, 794));

                                writer.Begin(page);                     // begin writing to this page
                                eb.Reset();                             // Reset GState to default

                                // Embed an external font in the document.
                                Font font = Font.CreateTrueTypeFont(doc, input_path + "font.ttf");

                                while ((element = reader.Next()) != null)                       // Read page contents
                                {
                                    if (element.GetType() == Element.Type.e_text)
                                    {
                                        element.GetGState().SetFont(font, 12);
                                    }

                                    writer.WriteElement(element);
                                }

                                reader.End();
                                writer.End();                  // save changes to the current page

                                doc.PagePushBack(page);


                                // Start a new page ------------------------------------
                                //
                                // The example illustrates how to embed the external font in a PDF document.
                                // The example also shows how ElementReader can be used to copy and modify
                                // Elements between pages.

                                // Start reading Elements from the last page. We will copy all Elements to
                                // a new page but will modify the font associated with text.
                                reader.Begin(doc.GetPage(doc.GetPageCount()));

                                page = doc.PageCreate(new Rect(0, 0, 1300, 794));

                                writer.Begin(page);                     // begin writing to this page
                                eb.Reset();                             // Reset GState to default

                                // Embed an external font in the document.
                                Font font2 = Font.CreateType1Font(doc, input_path + "Misc-Fixed.pfa");

                                while ((element = reader.Next()) != null)                       // Read page contents
                                {
                                    if (element.GetType() == Element.Type.e_text)
                                    {
                                        element.GetGState().SetFont(font2, 12);
                                    }

                                    writer.WriteElement(element);
                                }

                                reader.End();
                                writer.End();                  // save changes to the current page
                                doc.PagePushBack(page);


                                // Start a new page ------------------------------------
                                page = doc.PageCreate();
                                writer.Begin(page);                     // begin writing to this page
                                eb.Reset();                             // Reset GState to default

                                // Begin writing a block of text
                                element = eb.CreateTextBegin(Font.Create(doc, Font.StandardType1Font.e_times_roman), 12);
                                element.SetTextMatrix(1.5, 0, 0, 1.5, 50, 600);
                                element.GetGState().SetLeading(15);                     // Set the spacing between lines
                                writer.WriteElement(element);

                                string para = "A PDF text object consists of operators that can show " +
                                              "text strings, move the text position, and set text state and certain " +
                                              "other parameters. In addition, there are three parameters that are " +
                                              "defined only within a text object and do not persist from one text " +
                                              "object to the next: Tm, the text matrix, Tlm, the text line matrix, " +
                                              "Trm, the text rendering matrix, actually just an intermediate result " +
                                              "that combines the effects of text state parameters, the text matrix " +
                                              "(Tm), and the current transformation matrix";

                                int para_end = para.Length;
                                int text_run = 0;
                                int text_run_end;

                                double para_width = 300;                 // paragraph width is 300 units
                                double cur_width  = 0;

                                while (text_run < para_end)
                                {
                                    text_run_end = para.IndexOf(' ', text_run);
                                    if (text_run_end < 0)
                                    {
                                        text_run_end = para_end - 1;
                                    }

                                    string text = para.Substring(text_run, text_run_end - text_run + 1);
                                    element = eb.CreateTextRun(text);
                                    if (cur_width + element.GetTextLength() < para_width)
                                    {
                                        writer.WriteElement(element);
                                        cur_width += element.GetTextLength();
                                    }
                                    else
                                    {
                                        writer.WriteElement(eb.CreateTextNewLine());                          // New line
                                        text      = para.Substring(text_run, text_run_end - text_run + 1);
                                        element   = eb.CreateTextRun(text);
                                        cur_width = element.GetTextLength();
                                        writer.WriteElement(element);
                                    }

                                    text_run = text_run_end + 1;
                                }

                                // -----------------------------------------------------------------------
                                // The following code snippet illustrates how to adjust spacing between
                                // characters (text runs).
                                element = eb.CreateTextNewLine();
                                writer.WriteElement(element); // Skip 2 lines
                                writer.WriteElement(element);

                                writer.WriteElement(eb.CreateTextRun("An example of space adjustments between inter-characters:"));
                                writer.WriteElement(eb.CreateTextNewLine());

                                // Write string "AWAY" without space adjustments between characters.
                                element = eb.CreateTextRun("AWAY");
                                writer.WriteElement(element);

                                writer.WriteElement(eb.CreateTextNewLine());

                                // Write string "AWAY" with space adjustments between characters.
                                element = eb.CreateTextRun("A");
                                writer.WriteElement(element);

                                element = eb.CreateTextRun("W");
                                element.SetPosAdjustment(140);
                                writer.WriteElement(element);

                                element = eb.CreateTextRun("A");
                                element.SetPosAdjustment(140);
                                writer.WriteElement(element);

                                element = eb.CreateTextRun("Y again");
                                element.SetPosAdjustment(115);
                                writer.WriteElement(element);

                                // Draw the same strings using direct content output...
                                writer.Flush(); // flush pending Element writing operations.

                                // You can also write page content directly to the content stream using
                                // ElementWriter.WriteString(...) and ElementWriter.WriteBuffer(...) methods.
                                // Note that if you are planning to use these functions you need to be familiar
                                // with PDF page content operators (see Appendix A in PDF Reference Manual).
                                // Because it is easy to make mistakes during direct output we recommend that
                                // you use ElementBuilder and Element interface instead.
                                writer.WriteString("T* T* ");                 // New Lines
                                // writer.WriteElement(eb.CreateTextNewLine());
                                writer.WriteString("(Direct output to PDF page content stream:) Tj  T* ");
                                writer.WriteString("(AWAY) Tj T* ");
                                writer.WriteString("[(A)140(W)140(A)115(Y again)] TJ ");

                                // Finish the block of text
                                writer.WriteElement(eb.CreateTextEnd());

                                writer.End();                  // save changes to the current page
                                doc.PagePushBack(page);

                                // Start a new page ------------------------------------

                                // Image Masks
                                //
                                // In the opaque imaging model, images mark all areas they occupy on the page as
                                // if with opaque paint. All portions of the image, whether black, white, gray,
                                // or color, completely obscure any marks that may previously have existed in the
                                // same place on the page.
                                // In the graphic arts industry and page layout applications, however, it is common
                                // to crop or 'mask out' the background of an image and then place the masked image
                                // on a different background, allowing the existing background to show through the
                                // masked areas. This sample illustrates how to use image masks.

                                page = doc.PageCreate();
                                writer.Begin(page); // begin writing to the page

                                // Create the Image Mask
                                MappedFile   imgf      = new MappedFile(input_path + "imagemask.dat");
                                FilterReader mask_read = new FilterReader(imgf);

                                ColorSpace device_gray = ColorSpace.CreateDeviceGray();
                                Image      mask        = Image.Create(doc, mask_read, 64, 64, 1, device_gray, Image.InputFilter.e_ascii_hex);

                                mask.GetSDFObj().PutBool("ImageMask", true);

                                element = eb.CreateRect(0, 0, 612, 794);
                                element.SetPathStroke(false);
                                element.SetPathFill(true);
                                element.GetGState().SetFillColorSpace(device_gray);
                                element.GetGState().SetFillColor(new ColorPt(0.8));
                                writer.WritePlacedElement(element);

                                element = eb.CreateImage(mask, new Matrix2D(200, 0, 0, -200, 40, 680));
                                element.GetGState().SetFillColor(new ColorPt(0.1));
                                writer.WritePlacedElement(element);

                                element.GetGState().SetFillColorSpace(ColorSpace.CreateDeviceRGB());
                                element.GetGState().SetFillColor(new ColorPt(1, 0, 0));
                                element = eb.CreateImage(mask, new Matrix2D(200, 0, 0, -200, 320, 680));
                                writer.WritePlacedElement(element);

                                element.GetGState().SetFillColor(new ColorPt(0, 1, 0));
                                element = eb.CreateImage(mask, new Matrix2D(200, 0, 0, -200, 40, 380));
                                writer.WritePlacedElement(element);

                                {
                                    // This sample illustrates Explicit Masking.
                                    img = Image.Create(doc, input_path + "peppers.jpg");

                                    // mask is the explicit mask for the primary (base) image
                                    img.SetMask(mask);

                                    element = eb.CreateImage(img, new Matrix2D(200, 0, 0, -200, 320, 380));
                                    writer.WritePlacedElement(element);
                                }

                                writer.End(); // save changes to the current page
                                doc.PagePushBack(page);

                                // Transparency sample ----------------------------------

                                // Start a new page -------------------------------------
                                page = doc.PageCreate();
                                writer.Begin(page);                     // begin writing to this page
                                eb.Reset();                             // Reset the GState to default

                                // Write some transparent text at the bottom of the page.
                                element = eb.CreateTextBegin(Font.Create(doc, Font.StandardType1Font.e_times_roman), 100);

                                // Set the text knockout attribute. Text knockout must be set outside of
                                // the text group.
                                gstate = element.GetGState();
                                gstate.SetTextKnockout(false);
                                gstate.SetBlendMode(GState.BlendMode.e_bl_difference);
                                writer.WriteElement(element);

                                element = eb.CreateTextRun("Transparency");
                                element.SetTextMatrix(1, 0, 0, 1, 30, 30);
                                gstate = element.GetGState();
                                gstate.SetFillColorSpace(ColorSpace.CreateDeviceCMYK());
                                gstate.SetFillColor(new ColorPt(1, 0, 0, 0));

                                gstate.SetFillOpacity(0.5);
                                writer.WriteElement(element);

                                // Write the same text on top the old; shifted by 3 points
                                element.SetTextMatrix(1, 0, 0, 1, 33, 33);
                                gstate.SetFillColor(new ColorPt(0, 1, 0, 0));
                                gstate.SetFillOpacity(0.5);

                                writer.WriteElement(element);
                                writer.WriteElement(eb.CreateTextEnd());

                                // Draw three overlapping transparent circles.
                                eb.PathBegin();                         // start constructing the path
                                eb.MoveTo(459.223, 505.646);
                                eb.CurveTo(459.223, 415.841, 389.85, 343.04, 304.273, 343.04);
                                eb.CurveTo(218.697, 343.04, 149.324, 415.841, 149.324, 505.646);
                                eb.CurveTo(149.324, 595.45, 218.697, 668.25, 304.273, 668.25);
                                eb.CurveTo(389.85, 668.25, 459.223, 595.45, 459.223, 505.646);
                                element = eb.PathEnd();
                                element.SetPathFill(true);

                                gstate = element.GetGState();
                                gstate.SetFillColorSpace(ColorSpace.CreateDeviceRGB());
                                gstate.SetFillColor(new ColorPt(0, 0, 1));                                     // Blue Circle

                                gstate.SetBlendMode(GState.BlendMode.e_bl_normal);
                                gstate.SetFillOpacity(0.5);
                                writer.WriteElement(element);

                                // Translate relative to the Blue Circle
                                gstate.SetTransform(1, 0, 0, 1, 113, -185);
                                gstate.SetFillColor(new ColorPt(0, 1, 0));                                     // Green Circle
                                gstate.SetFillOpacity(0.5);
                                writer.WriteElement(element);

                                // Translate relative to the Green Circle
                                gstate.SetTransform(1, 0, 0, 1, -220, 0);
                                gstate.SetFillColor(new ColorPt(1, 0, 0));                                     // Red Circle
                                gstate.SetFillOpacity(0.5);
                                writer.WriteElement(element);

                                writer.End();                  // save changes to the current page
                                doc.PagePushBack(page);

                                // End page ------------------------------------
                            }

                            doc.Save(output_path + "element_builder.pdf", SDFDoc.SaveOptions.e_remove_unused);
                            Console.WriteLine("Done. Result saved in element_builder.pdf...");
                        }
            }
            catch (PDFNetException e)
            {
                Console.WriteLine(e.Message);
            }
        }
Example #11
0
        public void ReadAdvanced(string input_path)
        {
            PDFNet.Initialize();

            try
            {
                PDFDoc doc = new PDFDoc(input_path);
                doc.InitSecurityHandler();

                Page page = doc.GetPage(1);
                if (page == null)
                {
                    ConsoleLog += "Page not found.";
                    return;
                }

                TextExtractor txt = new TextExtractor();
                txt.Begin(page); // Read the page.
                // Other options you may want to consider...
                // txt.Begin(page, null, TextExtractor.ProcessingFlags.e_no_dup_remove);
                // txt.Begin(page, null, TextExtractor.ProcessingFlags.e_remove_hidden_text);
                // ...

                // Example 1. Get all text on the page in a single string.
                // Words will be separated with space or new line characters.
                if (example1_basic)
                {
                    // Get the word count.
                    ConsoleLog += "Word Count: {0}" + txt.GetWordCount();

                    ConsoleLog += "\n\n- GetAsText --------------------------\n{0}" + txt.GetAsText();
                    ConsoleLog += "-----------------------------------------------------------";
                }

                // Example 2. Get XML logical structure for the page.
                if (example2_xml)
                {
                    String text = txt.GetAsXML(TextExtractor.XMLOutputFlags.e_words_as_elements | TextExtractor.XMLOutputFlags.e_output_bbox | TextExtractor.XMLOutputFlags.e_output_style_info);
                    ConsoleLog += "\n\n- GetAsXML  --------------------------\n{0}" + text;
                    ConsoleLog += "-----------------------------------------------------------";
                }

                // Example 3. Extract words one by one.
                if (example3_wordlist)
                {
                    TextExtractor.Word word;
                    for (TextExtractor.Line line = txt.GetFirstLine(); line.IsValid(); line = line.GetNextLine())
                    {
                        for (word = line.GetFirstWord(); word.IsValid(); word = word.GetNextWord())
                        {
                            ConsoleLog += word.GetString();
                        }
                    }
                    ConsoleLog += "-----------------------------------------------------------";
                }

                // Example 3. A more advanced text extraction example.
                // The output is XML structure containing paragraphs, lines, words,
                // as well as style and positioning information.
                if (example4_advanced)
                {
                    Rect bbox;
                    int  cur_flow_id = -1, cur_para_id = -1;

                    TextExtractor.Line  line;
                    TextExtractor.Word  word;
                    TextExtractor.Style s, line_style;

                    // For each line on the page...
                    for (line = txt.GetFirstLine(); line.IsValid(); line = line.GetNextLine())
                    {
                        if (line.GetNumWords() == 0)
                        {
                            continue;
                        }

                        if (cur_flow_id != line.GetFlowID())
                        {
                            if (cur_flow_id != -1)
                            {
                                if (cur_para_id != -1)
                                {
                                    cur_para_id = -1;
                                    ConsoleLog += "</Para>";
                                }
                                ConsoleLog += "</Flow>";
                            }
                            cur_flow_id = line.GetFlowID();
                            ConsoleLog += "<Flow id=\"{0}\">" + cur_flow_id;
                        }

                        if (cur_para_id != line.GetParagraphID())
                        {
                            if (cur_para_id != -1)
                            {
                                ConsoleLog += "</Para>";
                            }
                            cur_para_id = line.GetParagraphID();
                            ConsoleLog += "<Para id=\"{0}\">" + cur_para_id;
                        }

                        bbox       = line.GetBBox();
                        line_style = line.GetStyle();
                        Console.Write("<Line box=\"" + bbox.y1 + "," + bbox.y2 + "," + bbox.x1 + "," + bbox.x2 + ">");
                        PrintStyle(line_style);
                        ConsoleLog += "";

                        // For each word in the line...
                        for (word = line.GetFirstWord(); word.IsValid(); word = word.GetNextWord())
                        {
                            // Output the bounding box for the word.
                            bbox        = word.GetBBox();
                            ConsoleLog += "<Word box=\"{0}, {1}, {2}, {3}\"" + bbox.x1 + bbox.y1 + bbox.x2 + bbox.y2;

                            int sz = word.GetStringLen();
                            if (sz == 0)
                            {
                                continue;
                            }

                            // If the word style is different from the parent style, output the new style.
                            s = word.GetStyle();
                            if (s != line_style)
                            {
                                PrintStyle(s);
                            }

                            ConsoleLog += ">\n" + word.GetString();
                            ConsoleLog += "</Word>";
                        }
                        ConsoleLog += "</Line>";
                    }

                    if (cur_flow_id != -1)
                    {
                        if (cur_para_id != -1)
                        {
                            cur_para_id = -1;
                            ConsoleLog += "</Para>";
                        }
                        ConsoleLog += "</Flow>";
                    }
                }

                // Note: Calling Dispose() on TextExtractor when it is not anymore in use can result in increased performance and lower memory consumption.
                txt.Dispose();
                doc.Close();
                ConsoleLog += "Done.";
            }
            catch (PDFNetException e)
            {
                ConsoleLog += e.Message;
            }

            // Sample code showing how to use low-level text extraction APIs.
            if (example5_low_level)
            {
                try
                {
                    LowLevelTextExtractUtils util = new LowLevelTextExtractUtils();
                    PDFDoc doc = new PDFDoc(input_path);
                    doc.InitSecurityHandler();

                    // Example 1. Extract all text content from the document
                    ElementReader reader = new ElementReader();
                    PageIterator  itr    = doc.GetPageIterator();
                    //for (; itr.HasNext(); itr.Next()) //  Read every page
                    {
                        reader.Begin(itr.Current());

                        LowLevelTextExtractUtils u = new LowLevelTextExtractUtils();
                        u.DumpAllText(reader);
                        ConsoleLog += u.ConsoleLog;
                        reader.End();
                    }

                    // Example 2. Extract text based on the selection rectangle.
                    ConsoleLog += "----------------------------------------------------";
                    ConsoleLog += "Extract text based on the selection rectangle.";
                    ConsoleLog += "----------------------------------------------------";

                    Page   first_page = doc.GetPage(1);
                    string field1     = util.ReadTextFromRect(first_page, new Rect(27, 392, 563, 534), reader);
                    string field2     = util.ReadTextFromRect(first_page, new Rect(28, 551, 106, 623), reader);
                    string field3     = util.ReadTextFromRect(first_page, new Rect(208, 550, 387, 621), reader);

                    ConsoleLog += "Field 1: {0}" + field1;
                    ConsoleLog += "Field 2: {0}" + field2;
                    ConsoleLog += "Field 3: {0}" + field3;
                    // ...

                    reader.Dispose();
                    doc.Close();
                    ConsoleLog += "Done.";
                }
                catch (PDFNetException e)
                {
                    ConsoleLog += e.Message;
                }
            }

            PDFNet.Terminate();
        }
        static void Main(string[] args)
        {
            PDFNet.Initialize();

            // Relative path to the folder containing test files.
            string input_path = "../../TestFiles/";

            bool example1_basic     = false;
            bool example2_xml       = false;
            bool example3_wordlist  = false;
            bool example4_advanced  = true;
            bool example5_low_level = false;

            // Sample code showing how to use high-level text extraction APIs.
            try
            {
                using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
                {
                    doc.InitSecurityHandler();

                    Page page = doc.GetPage(1);
                    if (page == null)
                    {
                        Console.WriteLine("Page not found.");
                        return;
                    }

                    using (TextExtractor txt = new TextExtractor())
                    {
                        txt.Begin(page);                          // Read the page.
                        // Other options you may want to consider...
                        // txt.Begin(page, null, TextExtractor.ProcessingFlags.e_no_dup_remove);
                        // txt.Begin(page, null, TextExtractor.ProcessingFlags.e_remove_hidden_text);
                        // ...

                        // Example 1. Get all text on the page in a single string.
                        // Words will be separated with space or new line characters.
                        if (example1_basic)
                        {
                            // Get the word count.
                            Console.WriteLine("Word Count: {0}", txt.GetWordCount());

                            Console.WriteLine("\n\n- GetAsText --------------------------\n{0}", txt.GetAsText());
                            Console.WriteLine("-----------------------------------------------------------");
                        }

                        // Example 2. Get XML logical structure for the page.
                        if (example2_xml)
                        {
                            String text = txt.GetAsXML(TextExtractor.XMLOutputFlags.e_words_as_elements | TextExtractor.XMLOutputFlags.e_output_bbox | TextExtractor.XMLOutputFlags.e_output_style_info);
                            Console.WriteLine("\n\n- GetAsXML  --------------------------\n{0}", text);
                            Console.WriteLine("-----------------------------------------------------------");
                        }

                        // Example 3. Extract words one by one.
                        if (example3_wordlist)
                        {
                            TextExtractor.Word word;
                            for (TextExtractor.Line line = txt.GetFirstLine(); line.IsValid(); line = line.GetNextLine())
                            {
                                for (word = line.GetFirstWord(); word.IsValid(); word = word.GetNextWord())
                                {
                                    Console.WriteLine(word.GetString());
                                }
                            }
                            Console.WriteLine("-----------------------------------------------------------");
                        }

                        // Example 3. A more advanced text extraction example.
                        // The output is XML structure containing paragraphs, lines, words,
                        // as well as style and positioning information.
                        if (example4_advanced)
                        {
                            Rect bbox;
                            int  cur_flow_id = -1, cur_para_id = -1;

                            TextExtractor.Line  line;
                            TextExtractor.Word  word;
                            TextExtractor.Style s, line_style;

                            Console.WriteLine("<PDFText>");
                            // For each line on the page...
                            for (line = txt.GetFirstLine(); line.IsValid(); line = line.GetNextLine())
                            {
                                if (line.GetNumWords() == 0)
                                {
                                    continue;
                                }

                                if (cur_flow_id != line.GetFlowID())
                                {
                                    if (cur_flow_id != -1)
                                    {
                                        if (cur_para_id != -1)
                                        {
                                            cur_para_id = -1;
                                            Console.WriteLine("</Para>");
                                        }
                                        Console.WriteLine("</Flow>");
                                    }
                                    cur_flow_id = line.GetFlowID();
                                    Console.WriteLine("<Flow id=\"{0}\">", cur_flow_id);
                                }

                                if (cur_para_id != line.GetParagraphID())
                                {
                                    if (cur_para_id != -1)
                                    {
                                        Console.WriteLine("</Para>");
                                    }
                                    cur_para_id = line.GetParagraphID();
                                    Console.WriteLine("<Para id=\"{0}\">", cur_para_id);
                                }

                                bbox       = line.GetBBox();
                                line_style = line.GetStyle();
                                Console.Write("<Line box=\"{0}, {1}, {2}, {3}\"", bbox.x1.ToString("0.00"), bbox.y1.ToString("0.00"), bbox.x2.ToString("0.00"), bbox.y2.ToString("0.00"));
                                PrintStyle(line_style);
                                Console.Write(" cur_num=\"" + line.GetCurrentNum() + "\"" + ">\n");

                                // For each word in the line...
                                for (word = line.GetFirstWord(); word.IsValid(); word = word.GetNextWord())
                                {
                                    // Output the bounding box for the word.
                                    bbox = word.GetBBox();
                                    Console.Write("<Word box=\"{0}, {1}, {2}, {3}\"", bbox.x1.ToString("0.00"), bbox.y1.ToString("0.00"), bbox.x2.ToString("0.00"), bbox.y2.ToString("0.00"));
                                    Console.Write(" cur_num=\"" + word.GetCurrentNum() + "\"");
                                    int sz = word.GetStringLen();
                                    if (sz == 0)
                                    {
                                        continue;
                                    }

                                    // If the word style is different from the parent style, output the new style.
                                    s = word.GetStyle();
                                    if (s != line_style)
                                    {
                                        PrintStyle(s);
                                    }

                                    Console.Write(">{0}", word.GetString());
                                    Console.WriteLine("</Word>");
                                }
                                Console.WriteLine("</Line>");
                            }

                            if (cur_flow_id != -1)
                            {
                                if (cur_para_id != -1)
                                {
                                    cur_para_id = -1;
                                    Console.WriteLine("</Para>");
                                }
                                Console.WriteLine("</Flow>");
                            }
                        }
                    }
                    Console.WriteLine("</PDFText>");
                }
            }
            catch (PDFNetException e)
            {
                Console.WriteLine(e.Message);
            }

            // Sample code showing how to use low-level text extraction APIs.
            if (example5_low_level)
            {
                try
                {
                    LowLevelTextExtractUtils util = new LowLevelTextExtractUtils();
                    using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
                    {
                        doc.InitSecurityHandler();

                        // Example 1. Extract all text content from the document
                        using (ElementReader reader = new ElementReader())
                        {
                            PageIterator itr = doc.GetPageIterator();
                            //for (; itr.HasNext(); itr.Next()) //  Read every page
                            {
                                reader.Begin(itr.Current());
                                LowLevelTextExtractUtils.DumpAllText(reader);
                                reader.End();
                            }

                            // Example 2. Extract text based on the selection rectangle.
                            Console.WriteLine("----------------------------------------------------");
                            Console.WriteLine("Extract text based on the selection rectangle.");
                            Console.WriteLine("----------------------------------------------------");

                            Page   first_page = doc.GetPage(1);
                            string field1     = util.ReadTextFromRect(first_page, new Rect(27, 392, 563, 534), reader);
                            string field2     = util.ReadTextFromRect(first_page, new Rect(28, 551, 106, 623), reader);
                            string field3     = util.ReadTextFromRect(first_page, new Rect(208, 550, 387, 621), reader);

                            Console.WriteLine("Field 1: {0}", field1);
                            Console.WriteLine("Field 2: {0}", field2);
                            Console.WriteLine("Field 3: {0}", field3);
                            // ...

                            Console.WriteLine("Done.");
                        }
                    }
                }
                catch (PDFNetException e)
                {
                    Console.WriteLine(e.Message);
                }
            }
        }
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        static void Main(string[] args)
        {
            PDFNet.Initialize();
            // Relative path to the folder containing test files.
            string input_path  = "../../TestFiles/";
            string output_path = "../../TestFiles/Output/";

            try              // Extract logical structure from a PDF document
            {
                using (PDFDoc doc = new PDFDoc(input_path + "tagged.pdf"))
                {
                    doc.InitSecurityHandler();

                    bool example1 = true;
                    bool example2 = true;
                    bool example3 = true;

                    if (example1)
                    {
                        Console.WriteLine("____________________________________________________________");
                        Console.WriteLine("Sample 1 - Traverse logical structure tree...");

                        STree tree = doc.GetStructTree();
                        if (tree.IsValid())
                        {
                            Console.WriteLine("Document has a StructTree root.");
                            for (int i = 0; i < tree.GetNumKids(); ++i)
                            {
                                // Recursively get structure  info for all all child elements.
                                ProcessStructElement(tree.GetKid(i), 0);
                            }
                        }
                        else
                        {
                            Console.WriteLine("This document does not contain any logical structure.");
                        }

                        Console.WriteLine();
                        Console.WriteLine("Done 1.");
                    }

                    if (example2)
                    {
                        Console.WriteLine("____________________________________________________________");
                        Console.WriteLine("Sample 2 - Get parent logical structure elements from");
                        Console.WriteLine("layout elements.");

                        ElementReader reader = new ElementReader();
                        for (PageIterator itr = doc.GetPageIterator(); itr.HasNext(); itr.Next())
                        {
                            reader.Begin(itr.Current());
                            ProcessElements(reader);
                            reader.End();
                        }
                        Console.WriteLine();
                        Console.WriteLine("Done 2.");
                    }

                    if (example3)
                    {
                        Console.WriteLine("____________________________________________________________");
                        Console.WriteLine("Sample 3 - 'XML style' extraction of PDF logical structure and page content.");

                        //A map which maps page numbers(as Integers)
                        //to page Maps(which map from struct mcid(as Integers) to
                        //text Strings)
                        Hashtable     mcid_doc_map = new Hashtable();
                        ElementReader reader       = new ElementReader();
                        for (PageIterator itr = doc.GetPageIterator(); itr.HasNext(); itr.Next())
                        {
                            Page pg = itr.Current();
                            reader.Begin(pg);
                            Hashtable page_mcid_map = new Hashtable();
                            mcid_doc_map.Add(pg.GetIndex(), page_mcid_map);
                            ProcessElements2(reader, page_mcid_map);
                            reader.End();
                        }

                        STree tree = doc.GetStructTree();
                        if (tree.IsValid())
                        {
                            for (int i = 0; i < tree.GetNumKids(); ++i)
                            {
                                ProcessStructElement2(tree.GetKid(i), mcid_doc_map, 0);
                            }
                        }
                        Console.WriteLine();
                        Console.WriteLine("Done 3.");
                    }

                    doc.Save(output_path + "bookmark.pdf", 0);
                }
            }
            catch (PDFNetException e)
            {
                Console.WriteLine(e.Message);
            }
        }
        static void Main(string[] args)
        {
            PDFNet.Initialize();

            // Relative path to the folder containing test files.
            string input_path  = "../../TestFiles/";
            string output_path = "../../TestFiles/Output/";

            try
            {
                // Read a PDF document from a stream or pass-in a memory buffer...
                FileStream istm = new FileStream(input_path + "tiger.pdf", FileMode.Open, FileAccess.Read);
                using (PDFDoc doc = new PDFDoc(istm))
                    using (ElementWriter writer = new ElementWriter())
                        using (ElementReader reader = new ElementReader())
                        {
                            doc.InitSecurityHandler();

                            int num_pages = doc.GetPageCount();

                            Element element;

                            // Perform some document editing ...
                            // Here we simply copy all elements from one page to another.
                            for (int i = 1; i <= num_pages; ++i)
                            {
                                Page pg = doc.GetPage(2 * i - 1);

                                reader.Begin(pg);
                                Page new_page = doc.PageCreate(pg.GetMediaBox());
                                doc.PageInsert(doc.GetPageIterator(2 * i), new_page);

                                writer.Begin(new_page);
                                while ((element = reader.Next()) != null)                       // Read page contents
                                {
                                    writer.WriteElement(element);
                                }

                                writer.End();
                                reader.End();
                            }

                            doc.Save(output_path + "doc_memory_edit.pdf", SDFDoc.SaveOptions.e_remove_unused);

                            // Save the document to a stream or a memory buffer...
                            using (FileStream ostm = new FileStream(output_path + "doc_memory_edit.txt", FileMode.Create, FileAccess.Write)) {
                                doc.Save(ostm, SDFDoc.SaveOptions.e_remove_unused);
                            }

                            // Read some data from the file stored in memory
                            reader.Begin(doc.GetPage(1));
                            while ((element = reader.Next()) != null)
                            {
                                if (element.GetType() == Element.Type.e_path)
                                {
                                    Console.Write("Path, ");
                                }
                            }
                            reader.End();

                            Console.WriteLine("");
                            Console.WriteLine("");
                            Console.WriteLine("Done. Result saved in doc_memory_edit.pdf and doc_memory_edit.txt ...");
                        }
            }
            catch (PDFNetException e)
            {
                Console.WriteLine(e.Message);
            }
        }
        static void Main(string[] args)
        {
            PDFNet.Initialize();

            // Example 1:
            // Extract images by traversing the display list for
            // every page. With this approach it is possible to obtain
            // image positioning information and DPI.
            try
            {
                using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
                    using (ElementReader reader = new ElementReader())
                    {
                        doc.InitSecurityHandler();

                        PageIterator itr;
                        for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next())
                        {
                            reader.Begin(itr.Current());
                            ImageExtract(doc, reader);
                            reader.End();
                        }

                        Console.WriteLine("Done.");
                    }
            }
            catch (PDFNetException e)
            {
                Console.WriteLine(e.Message);
            }

            Console.WriteLine("----------------------------------------------------------------");

            // Example 2:
            // Extract images by scanning the low-level document.
            try
            {
                using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
                {
                    doc.InitSecurityHandler();
                    image_counter = 0;

                    SDFDoc cos_doc  = doc.GetSDFDoc();
                    int    num_objs = cos_doc.XRefSize();
                    for (int i = 1; i < num_objs; ++i)
                    {
                        Obj obj = cos_doc.GetObj(i);
                        if (obj != null && !obj.IsFree() && obj.IsStream())
                        {
                            // Process only images
                            DictIterator itr = obj.Find("Subtype");
                            if (!itr.HasNext() || itr.Value().GetName() != "Image")
                            {
                                continue;
                            }

                            itr = obj.Find("Type");
                            if (!itr.HasNext() || itr.Value().GetName() != "XObject")
                            {
                                continue;
                            }

                            pdftron.PDF.Image image = new pdftron.PDF.Image(obj);

                            Console.WriteLine("--> Image: {0}", ++image_counter);
                            Console.WriteLine("    Width: {0}", image.GetImageWidth());
                            Console.WriteLine("    Height: {0}", image.GetImageHeight());
                            Console.WriteLine("    BPC: {0}", image.GetBitsPerComponent());

                            string fname = output_path + "image_extract2_" + image_counter.ToString();
                            image.Export(fname);                              // or ExporAsPng() or ExporAsTiff() ...

                            // Convert PDF bitmap to GDI+ Bitmap...
                            //Bitmap bmp = image.GetBitmap();
                            //bmp.Save(fname, ImageFormat.Png);
                            //bmp.Dispose();

                            // Instead of converting PDF images to a Bitmap, you can also extract
                            // uncompressed/compressed image data directly using element.GetImageData()
                            // as illustrated in ElementReaderAdv sample project.
                        }
                    }
                }
            }
            catch (PDFNetException e)
            {
                Console.WriteLine(e.Message);
            }
        }