コード例 #1
0
        private void btnSelecionarArquivo_Click(object sender, EventArgs e)
        {
            openFileDialog.ShowDialog();
            var caminhoArquivo = openFileDialog.FileName;

            lblArquivoSelecionado.Text = caminhoArquivo;

            _pdfDocument = new PdfDocument(new PdfReader(caminhoArquivo));

            LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy();

            var numberOfPages = _pdfDocument.GetNumberOfPages();

            for (var i = 1; i <= numberOfPages; i++)
            {
                PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy);
                parser.ProcessPageContent(_pdfDocument.GetPage(i));

                byte[] array = Encoding.Default.GetBytes(strategy.GetResultantText());


                var str = Encoding.Default.GetString(array);
            }

            _pdfDocument.Close();
        }
コード例 #2
0
ファイル: PdfConverter.cs プロジェクト: wwwit/ebook
        public Task <File> Convert(string path)
        {
            return(Task.Run(() =>
            {
                var file = new File
                {
                    Path = path,
                    Mime = "application/pdf"
                };

                using (var document = new PdfDocument(new PdfReader(path)))
                {
                    int numOfPages = document.GetNumberOfPages();

                    var listener = new FilteredEventListener();
                    var extractionStrategy = listener
                                             .AttachEventListener(new LocationTextExtractionStrategy());

                    var processor = new PdfCanvasProcessor(listener);
                    var content = new StringBuilder();

                    for (int i = 1; i <= numOfPages; i++)
                    {
                        processor.ProcessPageContent(document.GetPage(i));
                        content.Append(extractionStrategy.GetResultantText());

                        processor.Reset();
                    }

                    file.Content = content.ToString();
                }

                return file;
            }));
        }
コード例 #3
0
        public virtual void TestFontColorInMultiPagePdf()
        {
            String   testName = "testFontColorInMultiPagePdf";
            String   path     = TEST_IMAGES_DIRECTORY + "multîpage.tiff";
            String   pdfPath  = GetTargetDirectory() + testName + ".pdf";
            FileInfo file     = new FileInfo(path);

            tesseractReader.SetTesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties().SetPreprocessingImages
                                                                 (false));
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetTextLayerName("Text1");
            Color color = DeviceCmyk.MAGENTA;

            ocrPdfCreatorProperties.SetTextColor(color);
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, ocrPdfCreatorProperties);
            PdfDocument   doc           = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), GetPdfWriter(
                                                                      pdfPath));

            NUnit.Framework.Assert.IsNotNull(doc);
            doc.Close();
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));

            IntegrationTestHelper.ExtractionStrategy strategy = new IntegrationTestHelper.ExtractionStrategy("Text1");
            PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy);

            processor.ProcessPageContent(pdfDocument.GetPage(1));
            Color fillColor = strategy.GetFillColor();

            NUnit.Framework.Assert.AreEqual(fillColor, color);
            pdfDocument.Close();
        }
コード例 #4
0
        /// <summary>Extract text from a specified page using an extraction strategy.</summary>
        /// <param name="page">the page for the text to be extracted from</param>
        /// <param name="strategy">the strategy to use for extracting text</param>
        /// <returns>the extracted text</returns>
        public static String GetTextFromPage(PdfPage page, ITextExtractionStrategy strategy)
        {
            PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy);

            parser.ProcessPageContent(page);
            return(strategy.GetResultantText());
        }
コード例 #5
0
ファイル: Class1.cs プロジェクト: sdg002/itext.Demo
        public Page[] GetBlocks(byte[] contents)
        {
            List <Page> lstPages = new List <Page>();

            using (var stm = new System.IO.MemoryStream(contents))
            {
                using (var pdfReader = new iText.Kernel.Pdf.PdfReader(stm))
                {
                    using (iText.Kernel.Pdf.PdfDocument doc = new iText.Kernel.Pdf.PdfDocument(pdfReader))
                    {
                        int numOfPages = doc.GetNumberOfPages();
                        for (int page = 1; page <= numOfPages; page++)
                        {
                            var pdfPage  = doc.GetPage(page);
                            var pg       = new Page();
                            var rotation = pdfPage.GetPageSizeWithRotation();
                            pg.Height = rotation.GetHeight();
                            pg.Width  = rotation.GetWidth();
                            var customListener = new CustomEventListener();
                            var parser         = new PdfCanvasProcessor(customListener);
                            parser.ProcessPageContent(pdfPage);
                            var lstBlocks = customListener.Blocks;
                            pg.Blocks = customListener.Blocks.ToArray();
                            lstPages.Add(pg);
                        }
                    }
                }
            }
            return(lstPages.ToArray());
        }
コード例 #6
0
ファイル: PdfArrayTest.cs プロジェクト: jamCats/DCISM_Payrol
        public virtual void PdfUncoloredPatternColorSize1Test()
        {
            PdfDocument   pdfDocument       = new PdfDocument(new PdfWriter(new ByteArrayOutputStream()));
            String        contentColorSpace = "/Cs1 cs\n";
            PdfDictionary pageDictionary    = (PdfDictionary) new PdfDictionary().MakeIndirect(pdfDocument);
            PdfStream     contentStream     = new PdfStream(contentColorSpace.GetBytes());

            pageDictionary.Put(PdfName.Contents, contentStream);
            PdfPage page = pdfDocument.AddNewPage();

            page.GetPdfObject().Put(PdfName.Contents, contentStream);
            PdfArray pdfArray = new PdfArray();

            pdfArray.Add(PdfName.Pattern);
            PdfColorSpace space = PdfColorSpace.MakeColorSpace(pdfArray);

            page.GetResources().AddColorSpace(space);
            Rectangle rectangle = new Rectangle(50, 50, 1000, 1000);

            page.SetMediaBox(rectangle);
            PdfCanvasProcessor processor = new PdfCanvasProcessor(new PdfArrayTest.NoOpListener());

            processor.ProcessPageContent(page);
            // Check if we reach the end of the test without failings together with verifying expected color space instance
            NUnit.Framework.Assert.IsTrue(processor.GetGraphicsState().GetFillColor().GetColorSpace() is PdfSpecialCs.Pattern
                                          );
        }
コード例 #7
0
        public virtual void ExtractByteAlignedG4TiffImageTest()
        {
            String      inFileName       = sourceFolder + "extractByteAlignedG4TiffImage.pdf";
            String      outImageFileName = destinationFolder + "extractedByteAlignedImage.png";
            String      cmpImageFileName = sourceFolder + "cmp_extractByteAlignedG4TiffImage.png";
            PdfDocument pdfDocument      = new PdfDocument(new PdfReader(inFileName));

            GetImageBytesTest.ImageExtractor listener = new GetImageBytesTest.ImageExtractor(this);
            PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);

            processor.ProcessPageContent(pdfDocument.GetPage(1));
            IList <byte[]> images = listener.GetImages();

            NUnit.Framework.Assert.AreEqual(1, images.Count);
            using (FileStream fos = new FileStream(outImageFileName, FileMode.Create)) {
                fos.Write(images[0], 0, images.Count);
            }
            // expected and actual are swapped here for simplicity
            int expectedLen = images[0].Length;

            byte[] buf = new byte[expectedLen];
            using (FileStream @is = new FileStream(cmpImageFileName, FileMode.Open, FileAccess.Read)) {
                int read = @is.JRead(buf, 0, buf.Length);
                NUnit.Framework.Assert.AreEqual(expectedLen, read);
                read = @is.JRead(buf, 0, buf.Length);
                NUnit.Framework.Assert.IsTrue(read <= 0);
            }
            NUnit.Framework.Assert.AreEqual(images[0], buf);
        }
コード例 #8
0
        public virtual void TestWithMultiFilteredRenderListener()
        {
            PdfDocument           pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "test.pdf"));
            float                 x1;
            float                 y1;
            float                 x2;
            float                 y2;
            FilteredEventListener listener = new FilteredEventListener();

            x1 = 122;
            x2 = 22;
            y1 = 678.9f;
            y2 = 12;
            ITextExtractionStrategy region1Listener = listener.AttachEventListener(new LocationTextExtractionStrategy(
                                                                                       ), new TextRegionEventFilter(new Rectangle(x1, y1, x2, y2)));

            x1 = 156;
            x2 = 13;
            y1 = 678.9f;
            y2 = 12;
            ITextExtractionStrategy region2Listener = listener.AttachEventListener(new LocationTextExtractionStrategy(
                                                                                       ), new TextRegionEventFilter(new Rectangle(x1, y1, x2, y2)));
            PdfCanvasProcessor parser = new PdfCanvasProcessor(new GlyphEventListener(listener));

            parser.ProcessPageContent(pdfDocument.GetPage(1));
            NUnit.Framework.Assert.AreEqual("Your", region1Listener.GetResultantText());
            NUnit.Framework.Assert.AreEqual("dju", region2Listener.GetResultantText());
        }
コード例 #9
0
            public PipelinePage ParsePdf <T>()
                where T : class, IEventListener, IPipelineResults <BlockPage>, new()
            {
                var listener = CreateInstance <T>();

                var parser = new PdfCanvasProcessor(listener);

                parser.ProcessPageContent(_pdfPage);

                // retrieve page size. where to store?
                var pageSize = _pdfPage.GetPageSize();

                var page = new PipelinePage(_pdf, _pageNumber);

                page.LastResult = listener.GetResults();

                if (page.LastResult == null)
                {
                    throw new InvalidOperationException();
                }

                if (page.LastResult.AllBlocks == null)
                {
                    throw new InvalidOperationException();
                }

                _page = page;

                return(page);
            }
コード例 #10
0
        /// <exception cref="System.IO.IOException"/>
        protected internal virtual void ParseTag(PdfMcr kid)
        {
            int           mcid       = kid.GetMcid();
            PdfDictionary pageDic    = kid.GetPageObject();
            String        tagContent = "";

            if (mcid != -1)
            {
                if (!parsedTags.ContainsKey(pageDic))
                {
                    TaggedPdfReaderTool.MarkedContentEventListener listener = new TaggedPdfReaderTool.MarkedContentEventListener
                                                                                  (this);
                    PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);
                    PdfPage            page      = document.GetPage(pageDic);
                    processor.ProcessContent(page.GetContentBytes(), page.GetResources());
                    parsedTags[pageDic] = listener.GetMcidContent();
                }
                if (parsedTags.Get(pageDic).ContainsKey(mcid))
                {
                    tagContent = parsedTags.Get(pageDic).Get(mcid);
                }
            }
            else
            {
                PdfObjRef objRef  = (PdfObjRef)kid;
                PdfObject @object = objRef.GetReferencedObject();
                if (@object.IsDictionary())
                {
                    PdfName subtype = ((PdfDictionary)@object).GetAsName(PdfName.Subtype);
                    tagContent = subtype.ToString();
                }
            }
            @out.Write(EscapeXML(tagContent, true));
        }
コード例 #11
0
        public virtual void TestCharacterRenderInfos()
        {
            PdfCanvasProcessor parser = new PdfCanvasProcessor(new TextRenderInfoTest.CharacterPositionEventListener()
                                                               );

            parser.ProcessPageContent(new PdfDocument(new PdfReader(sourceFolder + "simple_text.pdf")).GetPage(FIRST_PAGE
                                                                                                               ));
        }
コード例 #12
0
        /// <summary>Extract text from a specified page using an extraction strategy.</summary>
        /// <remarks>
        /// Extract text from a specified page using an extraction strategy.
        /// Also allows registration of custom IContentOperators that can influence
        /// how (and whether or not) the PDF instructions will be parsed.
        /// </remarks>
        /// <param name="page">the page for the text to be extracted from</param>
        /// <param name="strategy">the strategy to use for extracting text</param>
        /// <param name="additionalContentOperators">
        /// an optional map of custom
        /// <see cref="IContentOperator"/>
        /// s for rendering instructions
        /// </param>
        /// <returns>the extracted text</returns>
        public static String GetTextFromPage(PdfPage page, ITextExtractionStrategy strategy, IDictionary <String, IContentOperator
                                                                                                          > additionalContentOperators)
        {
            PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy, additionalContentOperators);

            parser.ProcessPageContent(page);
            return(strategy.GetResultantText());
        }
コード例 #13
0
        /// <summary>
        /// Parsing data from Oy axis
        /// </summary>
        /// <param name="page">Data of page</param>
        /// <returns>data of Oy axis</returns>
        internal StringBuilder ParsingOyAxis(PdfPage page)
        {
            // temp variable
            Rectangle                      readBox;
            TextRegionEventFilter          readText;
            FilteredEventListener          listener;
            LocationTextExtractionStrategy extractor;
            PdfCanvasProcessor             parser;

            string[]      lines;
            StringBuilder result = new StringBuilder();

            // area limit for read
            readBox = new Rectangle(Margin.Left, Margin.Bottom + 60, 20,
                                    page.GetPageSize().GetHeight() - Margin.Bottom - 160);

            readText = new TextRegionEventFilter(readBox);
            listener = new FilteredEventListener();

            // create a text extraction renderer
            extractor = listener
                        .AttachEventListener(new LocationTextExtractionStrategy(),
                                             readText);

            lock (block)
            {
                (parser = new PdfCanvasProcessor(listener))
                .ProcessPageContent(page);
                parser.Reset();
            }

            // read every line (row)
            lines = extractor
                    .GetResultantText()
                    .Split('\n');

            foreach (string line in lines)
            {
                if (!string.IsNullOrEmpty(line.Trim()))
                {
                    result.AppendLine(line);
                }
            }

            TextExtractionStrategy strategy =
                listener.AttachEventListener(new TextExtractionStrategy(), readText);

            lock (block)
            {
                (parser = new PdfCanvasProcessor(listener))
                .ProcessPageContent(page);
                parser.Reset();
            }

            PositionOyAxis = strategy.TextResult.ToArray();

            return(result);
        }
コード例 #14
0
        /// <summary>Processes content from the specified page number using the specified listener.</summary>
        /// <remarks>
        /// Processes content from the specified page number using the specified listener.
        /// Also allows registration of custom IContentOperators that can influence
        /// how (and whether or not) the PDF instructions will be parsed.
        /// </remarks>
        ///
        /// <param name="pageNumber">the page number to process</param>
        /// <param name="renderListener">the listener that will receive render callbacks</param>
        /// <param name="additionalContentOperators">an optional map of custom ContentOperators for rendering instructions
        ///     </param>
        /// <returns>the provided renderListener</returns>
        public virtual E ProcessContent <E>(int pageNumber, E renderListener, IDictionary <String, IContentOperator>
                                            additionalContentOperators)
            where E : IEventListener
        {
            PdfCanvasProcessor processor = new PdfCanvasProcessor(renderListener, additionalContentOperators);

            processor.ProcessPageContent(pdfDocument.GetPage(pageNumber));
            return(renderListener);
        }
コード例 #15
0
        public virtual void TestClosingEmptyPath()
        {
            String             fileName  = "closingEmptyPath.pdf";
            PdfDocument        document  = new PdfDocument(new PdfReader(sourceFolder + fileName));
            PdfCanvasProcessor processor = new PdfCanvasProcessor(new PdfCanvasProcessorTest.NoOpEventListener());

            // Assert than no exception is thrown when an empty path is handled
            processor.ProcessPageContent(document.GetPage(1));
        }
 /// <summary>
 /// Process a PDF page to retrieve tables data from it.
 /// </summary>
 /// <param name="pdfPage">the pdf page which to process</param>
 /// <param name="withBorder">true if tables have fully borders, false otherwise</param>
 public FilterTableEventListener(PdfPage pdfPage, bool withBorder)
 {
     if (withBorder)
     {
         this.pdfPage = pdfPage;
         PdfCanvasProcessor processor = new PdfCanvasProcessor(this);
         processor.ProcessPageContent(pdfPage);
         GetTablesFromborders();
     }
 }
コード例 #17
0
 public static string GetResultantText(string fileName)
 {
     using (var pdfDoc = new PdfDocument(new PdfReader(fileName)))
     {
         var strategy = new LocationTextExtractionStrategy();
         var parser   = new PdfCanvasProcessor(strategy);
         parser.ProcessPageContent(pdfDoc.GetFirstPage());
         var text = strategy.GetResultantText();
         return(text);
     }
 }
コード例 #18
0
        static void AnalyzeTextFromListener(string filename)
        {
            using (var pdf = new PdfDocument(new PdfReader(filename)))
            {
                var page = pdf.GetFirstPage();

                var parser = new PdfCanvasProcessor(new AnalyzeTextListener());

                parser.ProcessPageContent(page);
            }
        }
コード例 #19
0
        static void ShowLinesFromListener(string filename)
        {
            using (var pdf = new PdfDocument(new PdfReader(filename)))
            {
                var page = pdf.GetFirstPage();

                var parser = new PdfCanvasProcessor(new UserPathListener());

                parser.ProcessPageContent(page);
            }
        }
コード例 #20
0
        public virtual void ContentStreamProcessorTest()
        {
            PdfDocument document = new PdfDocument(new PdfReader(sourceFolder + "yaxiststar.pdf"), new PdfWriter(new ByteArrayOutputStream
                                                                                                                     ()));

            for (int i = 1; i <= document.GetNumberOfPages(); ++i)
            {
                PdfPage            page      = document.GetPage(i);
                PdfCanvasProcessor processor = new PdfCanvasProcessor(new _IEventListener_40());
                processor.ProcessPageContent(page);
            }
        }
コード例 #21
0
        private ICollection <Rectangle> ProcessPage(ILocationExtractionStrategy strategy, PdfPage page)
        {
            PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy);

            parser.ProcessPageContent(page);
            IList <Rectangle> retval = new List <Rectangle>();

            foreach (IPdfTextLocation l in strategy.GetResultantLocations())
            {
                retval.Add(l.GetRectangle());
            }
            return(retval);
        }
コード例 #22
0
 public virtual void ExpectedByteAlignedTiffImageExtractionTest()
 {
     NUnit.Framework.Assert.That(() => {
         //Byte-aligned image is expected in pdf file, but in fact it's not
         String inFileName       = sourceFolder + "expectedByteAlignedTiffImageExtraction.pdf";
         PdfDocument pdfDocument = new PdfDocument(new PdfReader(inFileName));
         GetImageBytesTest.ImageExtractor listener = new GetImageBytesTest.ImageExtractor(this);
         PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);
         processor.ProcessPageContent(pdfDocument.GetPage(1));
     }
                                 , NUnit.Framework.Throws.InstanceOf <iText.IO.IOException>().With.Message.EqualTo(MessageFormatUtil.Format(iText.IO.IOException.ExpectedTrailingZeroBitsForByteAlignedLines)))
     ;
 }
コード例 #23
0
 public virtual void ParseCircularReferencesInResourcesTest()
 {
     NUnit.Framework.Assert.That(() => {
         String fileName              = "circularReferencesInResources.pdf";
         PdfDocument pdfDocument      = new PdfDocument(new PdfReader(sourceFolder + fileName));
         PdfCanvasProcessor processor = new PdfCanvasProcessor(new PdfCanvasProcessorTest.NoOpEventListener());
         PdfPage page = pdfDocument.GetFirstPage();
         processor.ProcessPageContent(page);
         pdfDocument.Close();
     }
                                 , NUnit.Framework.Throws.InstanceOf <OutOfMemoryException>())
     ;
 }
コード例 #24
0
        /// <summary>Get extraction strategy for given document.</summary>
        public static ExtractionStrategy GetExtractionStrategy(String pdfPath, String layerName, bool useActualText
                                                               )
        {
            PdfDocument        pdfDocument = new PdfDocument(new PdfReader(pdfPath));
            ExtractionStrategy strategy    = new ExtractionStrategy(layerName);

            strategy.SetUseActualText(useActualText);
            PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy);

            processor.ProcessPageContent(pdfDocument.GetFirstPage());
            pdfDocument.Close();
            return(strategy);
        }
コード例 #25
0
        public virtual void CheckBboxCalculationForType3FontsWithFontMatrix02()
        {
            String      inputPdf    = sourceFolder + "checkBboxCalculationForType3FontsWithFontMatrix02.pdf";
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputPdf));

            GlyphBboxCalculationTest.CharacterPositionEventListener listener = new GlyphBboxCalculationTest.CharacterPositionEventListener
                                                                                   ();
            PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);

            processor.ProcessPageContent(pdfDocument.GetPage(1));
            // font size (36) * |fontMatrix| (1) * glyph width (0.6) = 21.6
            NUnit.Framework.Assert.AreEqual(21.6, listener.glyphWith, 1e-5);
        }
コード例 #26
0
        /// <summary>Processes content from the specified page number using the specified listener.</summary>
        /// <remarks>
        /// Processes content from the specified page number using the specified listener.
        /// Also allows registration of custom ContentOperators
        /// </remarks>
        ///
        /// <param name="pageNumber">the page number to process</param>
        /// <param name="renderListener">the listener that will receive render callbacks</param>
        /// <param name="additionalContentOperators">an optional map of custom ContentOperators for rendering instructions
        ///     </param>
        /// <returns>the provided renderListener</returns>
        public virtual E ProcessContent <E>(int pageNumber, E renderListener, IDictionary <String, IContentOperator>
                                            additionalContentOperators)
            where E : IEventListener
        {
            PdfCanvasProcessor processor = new PdfCanvasProcessor(renderListener);

            foreach (KeyValuePair <String, IContentOperator> entry in additionalContentOperators)
            {
                processor.RegisterContentOperator(entry.Key, entry.Value);
            }
            processor.ProcessPageContent(pdfDocument.GetPage(pageNumber));
            return(renderListener);
        }
コード例 #27
0
        public virtual void CheckAverageBboxCalculationForType3FontsWithFontMatrix01Test()
        {
            String      inputPdf    = sourceFolder + "checkAverageBboxCalculationForType3FontsWithFontMatrix01.pdf";
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputPdf));

            GlyphBboxCalculationTest.CharacterPositionEventListener listener = new GlyphBboxCalculationTest.CharacterPositionEventListener
                                                                                   ();
            PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);

            processor.ProcessPageContent(pdfDocument.GetPage(1));
            NUnit.Framework.Assert.AreEqual(600, listener.firstTextRenderInfo.GetFont().GetFontProgram().GetAvgWidth()
                                            , 0.01f);
        }
コード例 #28
0
        public void BackgroundTest(string exeFileName, string commandLineParameter, bool expectedBackground)
        {
            HtmlToPdfRunner runner = new HtmlToPdfRunner(exeFileName);

            string html = @"<!DOCTYPE html>
<html>
  <head>
  </head>
  <body style=""background-color:blue;"">
   Test Page
  </body>
</html>";

            using (TempHtmlFile htmlFile = new TempHtmlFile(html))
            {
                using (TempPdfFile pdfFile = new TempPdfFile(this.TestContext))
                {
                    string commandLine = string.Empty;

                    if (!string.IsNullOrEmpty(commandLineParameter))
                    {
                        commandLine += $"{commandLineParameter} ";
                    }

                    commandLine += $"\"{htmlFile.FilePath}\" \"{pdfFile.FilePath}\"";
                    HtmlToPdfRunResult result = runner.Run(commandLine);
                    Assert.AreEqual(0, result.ExitCode, result.Output);

                    using (PdfReader pdfReader = new PdfReader(pdfFile.FilePath))
                    {
                        using (PdfDocument pdfDocument = new PdfDocument(pdfReader))
                        {
                            int pageCount = pdfDocument.GetNumberOfPages();
                            Assert.AreEqual(1, pageCount);

                            PdfPage page = pdfDocument.GetPage(1);

                            RectangleFinder rectangleFinder = new RectangleFinder();

                            PdfCanvasProcessor processor = new PdfCanvasProcessor(rectangleFinder);
                            processor.ProcessPageContent(page);

                            ICollection <Rectangle> boxes = rectangleFinder.GetBoundingBoxes();

                            Assert.AreEqual(expectedBackground ? 1 : 0, boxes.Count());
                        }
                    }
                }
            }
        }
コード例 #29
0
        public virtual void TestNoninvertibleMatrix()
        {
            String      fileName    = "noninvertibleMatrix.pdf";
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + fileName));
            LocationTextExtractionStrategy strategy  = new LocationTextExtractionStrategy();
            PdfCanvasProcessor             processor = new PdfCanvasProcessor(strategy);
            PdfPage page = pdfDocument.GetFirstPage();

            processor.ProcessPageContent(page);
            String resultantText = strategy.GetResultantText();

            pdfDocument.Close();
            NUnit.Framework.Assert.AreEqual("Hello World!\nHello World!\nHello World!\nHello World! Hello World! Hello World!"
                                            , resultantText);
        }
コード例 #30
0
        public static string GetPDFFromFile(string path)
        {
            PdfDocument pdfDoc = new PdfDocument(new PdfReader(path));

            LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy();

            PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy);

            // Known limitation: read more than one page. Sample documents are all one page long.
            parser.ProcessPageContent(pdfDoc.GetFirstPage());

            pdfDoc.Close();

            return(strategy.GetResultantText());
        }