Exemplo n.º 1
0
        /// <summary>Extract text from a specified page using an extraction strategy.</summary>
        /// <remarks>
        /// Extract text from a specified page using an extraction strategy.
        /// Also allows registration of custom IContentOperators that can influence
        /// how (and whether or not) the PDF instructions will be parsed.
        /// </remarks>
        /// <param name="page">the page for the text to be extracted from</param>
        /// <param name="strategy">the strategy to use for extracting text</param>
        /// <param name="additionalContentOperators">
        /// an optional map of custom
        /// <see cref="IContentOperator"/>
        /// s for rendering instructions
        /// </param>
        /// <returns>the extracted text</returns>
        public static String GetTextFromPage(PdfPage page, ITextExtractionStrategy strategy, IDictionary <String, IContentOperator
                                                                                                          > additionalContentOperators)
        {
            PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy, additionalContentOperators);

            parser.ProcessPageContent(page);
            return(strategy.GetResultantText());
        }
Exemplo n.º 2
0
        public virtual void TestCharacterRenderInfos()
        {
            PdfCanvasProcessor parser = new PdfCanvasProcessor(new TextRenderInfoTest.CharacterPositionEventListener()
                                                               );

            parser.ProcessPageContent(new PdfDocument(new PdfReader(sourceFolder + "simple_text.pdf")).GetPage(FIRST_PAGE
                                                                                                               ));
        }
Exemplo n.º 3
0
        /// <summary>Processes content from the specified page number using the specified listener.</summary>
        /// <remarks>
        /// Processes content from the specified page number using the specified listener.
        /// Also allows registration of custom IContentOperators that can influence
        /// how (and whether or not) the PDF instructions will be parsed.
        /// </remarks>
        ///
        /// <param name="pageNumber">the page number to process</param>
        /// <param name="renderListener">the listener that will receive render callbacks</param>
        /// <param name="additionalContentOperators">an optional map of custom ContentOperators for rendering instructions
        ///     </param>
        /// <returns>the provided renderListener</returns>
        public virtual E ProcessContent <E>(int pageNumber, E renderListener, IDictionary <String, IContentOperator>
                                            additionalContentOperators)
            where E : IEventListener
        {
            PdfCanvasProcessor processor = new PdfCanvasProcessor(renderListener, additionalContentOperators);

            processor.ProcessPageContent(pdfDocument.GetPage(pageNumber));
            return(renderListener);
        }
        public virtual void TestClosingEmptyPath()
        {
            String             fileName  = "closingEmptyPath.pdf";
            PdfDocument        document  = new PdfDocument(new PdfReader(sourceFolder + fileName));
            PdfCanvasProcessor processor = new PdfCanvasProcessor(new PdfCanvasProcessorTest.NoOpEventListener());

            // Assert than no exception is thrown when an empty path is handled
            processor.ProcessPageContent(document.GetPage(1));
        }
 /// <summary>
 /// Process a PDF page to retrieve tables data from it.
 /// </summary>
 /// <param name="pdfPage">the pdf page which to process</param>
 /// <param name="withBorder">true if tables have fully borders, false otherwise</param>
 public FilterTableEventListener(PdfPage pdfPage, bool withBorder)
 {
     if (withBorder)
     {
         this.pdfPage = pdfPage;
         PdfCanvasProcessor processor = new PdfCanvasProcessor(this);
         processor.ProcessPageContent(pdfPage);
         GetTablesFromborders();
     }
 }
Exemplo n.º 6
0
        static void AnalyzeTextFromListener(string filename)
        {
            using (var pdf = new PdfDocument(new PdfReader(filename)))
            {
                var page = pdf.GetFirstPage();

                var parser = new PdfCanvasProcessor(new AnalyzeTextListener());

                parser.ProcessPageContent(page);
            }
        }
Exemplo n.º 7
0
 public static string GetResultantText(string fileName)
 {
     using (var pdfDoc = new PdfDocument(new PdfReader(fileName)))
     {
         var strategy = new LocationTextExtractionStrategy();
         var parser   = new PdfCanvasProcessor(strategy);
         parser.ProcessPageContent(pdfDoc.GetFirstPage());
         var text = strategy.GetResultantText();
         return(text);
     }
 }
Exemplo n.º 8
0
        static void ShowLinesFromListener(string filename)
        {
            using (var pdf = new PdfDocument(new PdfReader(filename)))
            {
                var page = pdf.GetFirstPage();

                var parser = new PdfCanvasProcessor(new UserPathListener());

                parser.ProcessPageContent(page);
            }
        }
        public virtual void ContentStreamProcessorTest()
        {
            PdfDocument document = new PdfDocument(new PdfReader(sourceFolder + "yaxiststar.pdf"), new PdfWriter(new ByteArrayOutputStream
                                                                                                                     ()));

            for (int i = 1; i <= document.GetNumberOfPages(); ++i)
            {
                PdfPage            page      = document.GetPage(i);
                PdfCanvasProcessor processor = new PdfCanvasProcessor(new _IEventListener_40());
                processor.ProcessPageContent(page);
            }
        }
Exemplo n.º 10
0
 public virtual void ExpectedByteAlignedTiffImageExtractionTest()
 {
     NUnit.Framework.Assert.That(() => {
         //Byte-aligned image is expected in pdf file, but in fact it's not
         String inFileName       = sourceFolder + "expectedByteAlignedTiffImageExtraction.pdf";
         PdfDocument pdfDocument = new PdfDocument(new PdfReader(inFileName));
         GetImageBytesTest.ImageExtractor listener = new GetImageBytesTest.ImageExtractor(this);
         PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);
         processor.ProcessPageContent(pdfDocument.GetPage(1));
     }
                                 , NUnit.Framework.Throws.InstanceOf <iText.IO.IOException>().With.Message.EqualTo(MessageFormatUtil.Format(iText.IO.IOException.ExpectedTrailingZeroBitsForByteAlignedLines)))
     ;
 }
Exemplo n.º 11
0
        private ICollection <Rectangle> ProcessPage(ILocationExtractionStrategy strategy, PdfPage page)
        {
            PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy);

            parser.ProcessPageContent(page);
            IList <Rectangle> retval = new List <Rectangle>();

            foreach (IPdfTextLocation l in strategy.GetResultantLocations())
            {
                retval.Add(l.GetRectangle());
            }
            return(retval);
        }
Exemplo n.º 12
0
        /// <summary>Get extraction strategy for given document.</summary>
        public static ExtractionStrategy GetExtractionStrategy(String pdfPath, String layerName, bool useActualText
                                                               )
        {
            PdfDocument        pdfDocument = new PdfDocument(new PdfReader(pdfPath));
            ExtractionStrategy strategy    = new ExtractionStrategy(layerName);

            strategy.SetUseActualText(useActualText);
            PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy);

            processor.ProcessPageContent(pdfDocument.GetFirstPage());
            pdfDocument.Close();
            return(strategy);
        }
        public virtual void CheckBboxCalculationForType3FontsWithFontMatrix02()
        {
            String      inputPdf    = sourceFolder + "checkBboxCalculationForType3FontsWithFontMatrix02.pdf";
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputPdf));

            GlyphBboxCalculationTest.CharacterPositionEventListener listener = new GlyphBboxCalculationTest.CharacterPositionEventListener
                                                                                   ();
            PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);

            processor.ProcessPageContent(pdfDocument.GetPage(1));
            // font size (36) * |fontMatrix| (1) * glyph width (0.6) = 21.6
            NUnit.Framework.Assert.AreEqual(21.6, listener.glyphWith, 1e-5);
        }
Exemplo n.º 14
0
 public virtual void ParseCircularReferencesInResourcesTest()
 {
     NUnit.Framework.Assert.That(() => {
         String fileName              = "circularReferencesInResources.pdf";
         PdfDocument pdfDocument      = new PdfDocument(new PdfReader(sourceFolder + fileName));
         PdfCanvasProcessor processor = new PdfCanvasProcessor(new PdfCanvasProcessorTest.NoOpEventListener());
         PdfPage page = pdfDocument.GetFirstPage();
         processor.ProcessPageContent(page);
         pdfDocument.Close();
     }
                                 , NUnit.Framework.Throws.InstanceOf <OutOfMemoryException>())
     ;
 }
        /// <summary>Processes content from the specified page number using the specified listener.</summary>
        /// <remarks>
        /// Processes content from the specified page number using the specified listener.
        /// Also allows registration of custom ContentOperators
        /// </remarks>
        ///
        /// <param name="pageNumber">the page number to process</param>
        /// <param name="renderListener">the listener that will receive render callbacks</param>
        /// <param name="additionalContentOperators">an optional map of custom ContentOperators for rendering instructions
        ///     </param>
        /// <returns>the provided renderListener</returns>
        public virtual E ProcessContent <E>(int pageNumber, E renderListener, IDictionary <String, IContentOperator>
                                            additionalContentOperators)
            where E : IEventListener
        {
            PdfCanvasProcessor processor = new PdfCanvasProcessor(renderListener);

            foreach (KeyValuePair <String, IContentOperator> entry in additionalContentOperators)
            {
                processor.RegisterContentOperator(entry.Key, entry.Value);
            }
            processor.ProcessPageContent(pdfDocument.GetPage(pageNumber));
            return(renderListener);
        }
        public virtual void CheckAverageBboxCalculationForType3FontsWithFontMatrix01Test()
        {
            String      inputPdf    = sourceFolder + "checkAverageBboxCalculationForType3FontsWithFontMatrix01.pdf";
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputPdf));

            GlyphBboxCalculationTest.CharacterPositionEventListener listener = new GlyphBboxCalculationTest.CharacterPositionEventListener
                                                                                   ();
            PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);

            processor.ProcessPageContent(pdfDocument.GetPage(1));
            NUnit.Framework.Assert.AreEqual(600, listener.firstTextRenderInfo.GetFont().GetFontProgram().GetAvgWidth()
                                            , 0.01f);
        }
Exemplo n.º 17
0
        public void BackgroundTest(string exeFileName, string commandLineParameter, bool expectedBackground)
        {
            HtmlToPdfRunner runner = new HtmlToPdfRunner(exeFileName);

            string html = @"<!DOCTYPE html>
<html>
  <head>
  </head>
  <body style=""background-color:blue;"">
   Test Page
  </body>
</html>";

            using (TempHtmlFile htmlFile = new TempHtmlFile(html))
            {
                using (TempPdfFile pdfFile = new TempPdfFile(this.TestContext))
                {
                    string commandLine = string.Empty;

                    if (!string.IsNullOrEmpty(commandLineParameter))
                    {
                        commandLine += $"{commandLineParameter} ";
                    }

                    commandLine += $"\"{htmlFile.FilePath}\" \"{pdfFile.FilePath}\"";
                    HtmlToPdfRunResult result = runner.Run(commandLine);
                    Assert.AreEqual(0, result.ExitCode, result.Output);

                    using (PdfReader pdfReader = new PdfReader(pdfFile.FilePath))
                    {
                        using (PdfDocument pdfDocument = new PdfDocument(pdfReader))
                        {
                            int pageCount = pdfDocument.GetNumberOfPages();
                            Assert.AreEqual(1, pageCount);

                            PdfPage page = pdfDocument.GetPage(1);

                            RectangleFinder rectangleFinder = new RectangleFinder();

                            PdfCanvasProcessor processor = new PdfCanvasProcessor(rectangleFinder);
                            processor.ProcessPageContent(page);

                            ICollection <Rectangle> boxes = rectangleFinder.GetBoundingBoxes();

                            Assert.AreEqual(expectedBackground ? 1 : 0, boxes.Count());
                        }
                    }
                }
            }
        }
Exemplo n.º 18
0
        /// <summary>Get text from layer specified by name from page.</summary>
        protected internal virtual String GetTextFromPdfLayer(String pdfPath, String layerName, int page, bool useActualText
                                                              )
        {
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath), new DocumentProperties().SetEventCountingMetaInfo
                                                          (new PdfOcrMetaInfo()));

            IntegrationTestHelper.ExtractionStrategy textExtractionStrategy = new IntegrationTestHelper.ExtractionStrategy
                                                                                  (layerName);
            textExtractionStrategy.SetUseActualText(useActualText);
            PdfCanvasProcessor processor = new PdfCanvasProcessor(textExtractionStrategy);

            processor.ProcessPageContent(pdfDocument.GetPage(page));
            pdfDocument.Close();
            return(textExtractionStrategy.GetResultantText());
        }
Exemplo n.º 19
0
        public static string GetPDFFromFile(string path)
        {
            PdfDocument pdfDoc = new PdfDocument(new PdfReader(path));

            LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy();

            PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy);

            // Known limitation: read more than one page. Sample documents are all one page long.
            parser.ProcessPageContent(pdfDoc.GetFirstPage());

            pdfDoc.Close();

            return(strategy.GetResultantText());
        }
Exemplo n.º 20
0
        public virtual void TestNoninvertibleMatrix()
        {
            String      fileName    = "noninvertibleMatrix.pdf";
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + fileName));
            LocationTextExtractionStrategy strategy  = new LocationTextExtractionStrategy();
            PdfCanvasProcessor             processor = new PdfCanvasProcessor(strategy);
            PdfPage page = pdfDocument.GetFirstPage();

            processor.ProcessPageContent(page);
            String resultantText = strategy.GetResultantText();

            pdfDocument.Close();
            NUnit.Framework.Assert.AreEqual("Hello World!\nHello World!\nHello World!\nHello World! Hello World! Hello World!"
                                            , resultantText);
        }
Exemplo n.º 21
0
        public virtual void ProcessGraphicsStateResourceOperatorFillOpacityTest()
        {
            PdfDocument document   = new PdfDocument(new PdfReader(sourceFolder + "transparentText.pdf"));
            float?      expOpacity = 0.5f;
            IDictionary <String, Object> textRenderInfo = new Dictionary <String, Object>();

            for (int i = 1; i <= document.GetNumberOfPages(); ++i)
            {
                PdfPage            page      = document.GetPage(i);
                PdfCanvasProcessor processor = new PdfCanvasProcessor(new PdfCanvasProcessorIntegrationTest.RecordEveryTextRenderEvent
                                                                          (textRenderInfo));
                processor.ProcessPageContent(page);
            }
            NUnit.Framework.Assert.AreEqual(expOpacity, textRenderInfo.Get("FillOpacity"), "Expected fill opacity not found"
                                            );
        }
Exemplo n.º 22
0
        public virtual void PatternColorParsingNotValidPdfTest()
        {
            String      inputFile   = sourceFolder + "patternColorParsingNotValidPdfTest.pdf";
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputFile));

            for (int i = 1; i <= pdfDocument.GetNumberOfPages(); ++i)
            {
                PdfPage page = pdfDocument.GetPage(i);
                PdfCanvasProcessorIntegrationTest.ColorParsingEventListener colorParsingEventListener = new PdfCanvasProcessorIntegrationTest.ColorParsingEventListener
                                                                                                            ();
                PdfCanvasProcessor processor = new PdfCanvasProcessor(colorParsingEventListener);
                processor.ProcessPageContent(page);
                Color renderInfo = colorParsingEventListener.GetEncounteredPath().GetFillColor();
                NUnit.Framework.Assert.IsNull(renderInfo);
            }
        }
Exemplo n.º 23
0
        private ICollection <string> GetPDF(string filename)
        {
            var pageText = new List <string>();

            using (PdfDocument pdfDocument = new PdfDocument(new PdfReader(filename)))
            {
                var pageNumbers = pdfDocument.GetNumberOfPages();
                for (int i = 1; i <= pageNumbers; i++)
                {
                    LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy();
                    PdfCanvasProcessor             parser   = new PdfCanvasProcessor(strategy);
                    parser.ProcessPageContent(pdfDocument.GetPage(i));
                    pageText.Add(strategy.GetResultantText());
                }
            }
            return(pageText);
        }
Exemplo n.º 24
0
        public virtual void PatternColorParsingValidPdfTest()
        {
            String      inputFile   = sourceFolder + "patternColorParsingValidPdfTest.pdf";
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputFile));

            for (int i = 1; i <= pdfDocument.GetNumberOfPages(); ++i)
            {
                PdfPage page = pdfDocument.GetPage(i);
                PdfCanvasProcessorIntegrationTest.ColorParsingEventListener colorParsingEventListener = new PdfCanvasProcessorIntegrationTest.ColorParsingEventListener
                                                                                                            ();
                PdfCanvasProcessor processor = new PdfCanvasProcessor(colorParsingEventListener);
                processor.ProcessPageContent(page);
                PathRenderInfo renderInfo = colorParsingEventListener.GetEncounteredPath();
                PdfColorSpace  colorSpace = renderInfo.GetGraphicsState().GetFillColor().GetColorSpace();
                NUnit.Framework.Assert.IsTrue(colorSpace is PdfSpecialCs.Pattern);
            }
        }
Exemplo n.º 25
0
        public virtual void TestType3FontWidth()
        {
            String      inFile          = "type3font_text.pdf";
            LineSegment origLineSegment = new LineSegment(new Vector(20.3246f, 769.4974f, 1.0f), new Vector(151.22923f
                                                                                                            , 769.4974f, 1.0f));
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + inFile));

            TextRenderInfoTest.TextPositionEventListener renderListener = new TextRenderInfoTest.TextPositionEventListener
                                                                              ();
            PdfCanvasProcessor processor = new PdfCanvasProcessor(renderListener);

            processor.ProcessPageContent(pdfDocument.GetPage(FIRST_PAGE));
            NUnit.Framework.Assert.AreEqual(renderListener.GetLineSegments()[FIRST_ELEMENT_INDEX].GetStartPoint().Get(
                                                FIRST_ELEMENT_INDEX), origLineSegment.GetStartPoint().Get(FIRST_ELEMENT_INDEX), 1 / 2f);
            NUnit.Framework.Assert.AreEqual(renderListener.GetLineSegments()[FIRST_ELEMENT_INDEX].GetEndPoint().Get(FIRST_ELEMENT_INDEX
                                                                                                                    ), origLineSegment.GetEndPoint().Get(FIRST_ELEMENT_INDEX), 1 / 2f);
        }
Exemplo n.º 26
0
        public void ToTxt(string absoluteFilePath, string outputPath)
        {
            using (var pdfDocument = new PdfDocument(new PdfReader(absoluteFilePath)))
            {
                for (var pageIndex = 1; pageIndex <= pdfDocument.GetNumberOfPages(); pageIndex++)
                {
                    using (var fos = System.IO.File.OpenWrite(outputPath))
                    {
                        var strategy = new LocationTextExtractionStrategy();
                        var parser   = new PdfCanvasProcessor(strategy);
                        parser.ProcessPageContent(pdfDocument.GetPage(pageIndex));
                        var array = Encoding.UTF8.GetBytes(strategy.GetResultantText());
                        fos.Write(array, 0, array.Length);

                        fos.Flush();
                    }
                }
            }
        }
Exemplo n.º 27
0
        //converting pdf to a txt file
        public static void getTextFromPdf(String dest)
        {
            PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC));

            SimpleTextExtractionStrategy strategy = new SimpleTextExtractionStrategy();

            PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy);

            for (var i = 1; i <= pdfDoc.GetNumberOfPages(); i++)
            {
                parser.ProcessPageContent(pdfDoc.GetPage(i));

                byte[] array = Encoding.UTF8.GetBytes(strategy.GetResultantText());
                using (FileStream stream = new FileStream(dest, FileMode.OpenOrCreate))
                {
                    stream.Write(array, 0, array.Length);
                }
            }
        }
        public virtual void TestImageWithoutText()
        {
            String        testName      = "testImageWithoutText";
            String        filePath      = TEST_IMAGES_DIRECTORY + "pantone_blue.jpg";
            String        pdfPath       = GetTargetDirectory() + testName + ".pdf";
            FileInfo      file          = new FileInfo(filePath);
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);

            ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), new PdfWriter(pdfPath)).Close();
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));

            IntegrationTestHelper.ExtractionStrategy strategy = new IntegrationTestHelper.ExtractionStrategy("Text Layer"
                                                                                                             );
            PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy);

            processor.ProcessPageContent(pdfDocument.GetFirstPage());
            pdfDocument.Close();
            NUnit.Framework.Assert.AreEqual("", strategy.GetResultantText());
        }
        public virtual void ExtractSingleInlineImageWithIndexedColorSpaceTest()
        {
            PdfDocument pdf = new PdfDocument(new PdfReader(sourceFolder + "inlineImageExtraction.pdf"));

            InlineImageExtractionTest.InlineImageEventListener eventListener = new InlineImageExtractionTest.InlineImageEventListener
                                                                                   ();
            PdfCanvasProcessor canvasProcessor = new PdfCanvasProcessor(eventListener);

            canvasProcessor.ProcessPageContent(pdf.GetFirstPage());
            pdf.Close();
            IList <PdfStream> inlineImages = eventListener.GetInlineImages();

            NUnit.Framework.Assert.AreEqual(1, inlineImages.Count);
            byte[] imgBytes    = inlineImages[0].GetBytes();
            byte[] cmpImgBytes = File.ReadAllBytes(Path.Combine(sourceFolder, "imgtest.dat"));
            NUnit.Framework.Assert.AreEqual(cmpImgBytes, imgBytes);
            PdfDictionary expectedDict = new PdfDictionary();

            expectedDict.Put(PdfName.BitsPerComponent, new PdfNumber(8));
            expectedDict.Put(PdfName.Height, new PdfNumber(50));
            expectedDict.Put(PdfName.Width, new PdfNumber(50));
            String indexedCsLookupData = "\u007F\u007F\u007Fïïï\u000F\u000F\u000F???¿¿¿ÏÏÏ///\u001F\u001F\u001F___ßßß"
                                         + "\u009F\u009F\u009FOOO¯¯¯ooo\u008F\u008F\u008F°°µ::<ââàuuy,,-ÜÜâ\u000E\u000E\u000Fúúû\u001D\u001D\u001E"
                                         + "ððõXXZ::?\u0004\u0004\u0004226!!$IIK\u0019\u0019\u001Býýþ\u0092\u0092\u0097õõø\f\f\r" + "))-÷÷úììòÍÍÓ66;\b\b\t\u0084\u0084\u0088¡¡¦îîô\u0014\u0014\u0016òòö\u0010\u0010\u0012¾¾Äffiüüýóó÷..2ûûü"
                                         + "ööù%%)ííó\u001D\u001D\u001F>>Døøúññö\u000E\u000E\u000Eééç\u008D\u008D\u008CÓÓÒCCI©©¨\u009B\u009B\u009A"
                                         + "òòñôôózz|888÷÷÷ììëÝÝãµµ¸bbb\u0095\u0095\u0098··¶ûûûºº¼\u0089\u0089\u008Bååãêêë==>ÑÑÖ***qqpààåZZ\\õõõ"
                                         + "\u007F\u007F~\u008E\u008E\u008E\u001E\u001E\u001FÀÀÅååèÆÆÅççåÇÇÊ\u001C\u001C\u001C]]^±±¶TTTççêÉÉÇFFFáá"
                                         + "æÅÅÄyy{ÍÍÎÐÐÕ^^^vvyîîí\u0087\u0087\u008A}}}xxzÊÊËjjl--.ëëò\u0000\u0000\u0000ÿÿÿ{{{|||}}}~~~\u007F\u007F"
                                         + "\u007F\u0080\u0080\u0080\u0081\u0081\u0081\u0082\u0082\u0082\u0083\u0083\u0083\u0084\u0084\u0084\u0085"
                                         + "\u0085\u0085\u0086\u0086\u0086\u0087\u0087\u0087\u0088\u0088\u0088\u0089\u0089\u0089\u008A\u008A\u008A"
                                         + "\u008B\u008B\u008B\u008C\u008C\u008C\u008D\u008D\u008D\u008E\u008E\u008E\u008F\u008F\u008F\u0090\u0090"
                                         + "\u0090\u0091\u0091\u0091\u0092\u0092\u0092\u0093\u0093\u0093\u0094\u0094\u0094\u0095\u0095\u0095\u0096"
                                         + "\u0096\u0096\u0097\u0097\u0097\u0098\u0098\u0098\u0099\u0099\u0099\u009A\u009A\u009A\u009B\u009B\u009B"
                                         + "\u009C\u009C\u009C\u009D\u009D\u009D\u009E\u009E\u009E\u009F\u009F\u009F   ¡¡¡¢¢¢£££¤¤¤¥¥¥¦¦¦§§§¨¨¨©©©"
                                         + "ªªª«««¬¬¬\u00AD\u00AD\u00AD®®®¯¯¯°°°±±±²²²³³³´´´µµµ¶¶¶···¸¸¸¹¹¹ººº»»»¼¼¼½½½¾¾¾¿¿¿ÀÀÀÁÁÁÂÂÂÃÃÃÄÄÄÅÅÅÆÆÆ"
                                         + "ÇÇÇÈÈÈÉÉÉÊÊÊËËËÌÌÌÍÍÍÎÎÎÏÏÏÐÐÐÑÑÑÒÒÒÓÓÓÔÔÔÕÕÕÖÖÖ×××ØØØÙÙÙÚÚÚÛÛÛÜÜÜÝÝÝÞÞÞßßßàààáááâââãããäääåååæææçççèèè"
                                         + "éééêêêëëëìììíííîîîïïïðððñññòòòóóóôôôõõõööö÷÷÷øøøùùùúúúûûûüüüýýýþþþÿÿÿ";

            PdfSpecialCs.Indexed expectedIndexedCs = new PdfSpecialCs.Indexed(PdfName.DeviceRGB, 255, new PdfString(indexedCsLookupData
                                                                                                                    ));
            expectedDict.Put(PdfName.ColorSpace, expectedIndexedCs.GetPdfObject());
            NUnit.Framework.Assert.IsTrue(new CompareTool().CompareDictionaries(inlineImages[0], expectedDict));
        }
Exemplo n.º 30
0
        public virtual void ContentStreamProcessorTest()
        {
            PdfDocument document = new PdfDocument(new PdfReader(sourceFolder + "tableWithImageAndText.pdf"), new PdfWriter
                                                       (new ByteArrayOutputStream()));
            StringBuilder pageEventsLog = new StringBuilder();

            for (int i = 1; i <= document.GetNumberOfPages(); ++i)
            {
                PdfPage            page      = document.GetPage(i);
                PdfCanvasProcessor processor = new PdfCanvasProcessor(new PdfCanvasProcessorIntegrationTest.RecordEveryHighLevelEventListener
                                                                          (pageEventsLog));
                processor.ProcessPageContent(page);
            }
            byte[] logBytes = File.ReadAllBytes(System.IO.Path.Combine(sourceFolder + "contentStreamProcessorTest_events_log.dat"
                                                                       ));
            String expectedPageEventsLog = iText.IO.Util.JavaUtil.GetStringForBytes(logBytes, System.Text.Encoding.UTF8
                                                                                    );

            NUnit.Framework.Assert.AreEqual(expectedPageEventsLog, pageEventsLog.ToString());
        }