virtual public void TestWithMultiFilteredRenderListener() {
            PdfReader pdfReader = TestResourceUtils.GetResourceAsPdfReader(TEST_RESOURCES_PATH, "test.pdf");
            PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader);

            float x1, y1, x2, y2;

            MultiFilteredRenderListener listener = new MultiFilteredRenderListener();
            x1 = 122;
            x2 = 144;
            y1 = 841.9f - 151;
            y2 = 841.9f - 163;
            ITextExtractionStrategy region1Listener = listener.AttachRenderListener(
                new LocationTextExtractionStrategy(), new RegionTextRenderFilter(new Rectangle(x1, y1, x2, y2)));

            x1 = 156;
            x2 = 169;
            y1 = 841.9f - 151;
            y2 = 841.9f - 163;
            ITextExtractionStrategy region2Listener = listener.AttachRenderListener(
                new LocationTextExtractionStrategy(), new RegionTextRenderFilter(new Rectangle(x1, y1, x2, y2)));

            parser.ProcessContent(1, new GlyphRenderListener(listener));
            Assert.AreEqual("Your", region1Listener.GetResultantText());
            Assert.AreEqual("dju", region2Listener.GetResultantText());
        }
        virtual public void MultipleFiltersForOneRegionTest()
        {
            PdfReader pdfReader = TestResourceUtils.GetResourceAsPdfReader(TEST_RESOURCES_PATH, "test.pdf");

            Rectangle[] regions = new Rectangle[] {
                new Rectangle(0, 0, 500, 650),
                new Rectangle(0, 0, 400, 400), new Rectangle(200, 200, 500, 600), new Rectangle(100, 100, 450, 400)
            };

            RegionTextRenderFilter[] regionFilters = new RegionTextRenderFilter[regions.Length];
            for (int i = 0; i < regions.Length; i++)
            {
                regionFilters[i] = new RegionTextRenderFilter(regions[i]);
            }

            MultiFilteredRenderListener    listener           = new MultiFilteredRenderListener();
            LocationTextExtractionStrategy extractionStrategy =
                (LocationTextExtractionStrategy)
                listener.AttachRenderListener(new LocationTextExtractionStrategy(), regionFilters);

            new PdfReaderContentParser(pdfReader).ProcessContent(1, listener);
            String actualText = extractionStrategy.GetResultantText();

            String expectedText = PdfTextExtractor.GetTextFromPage(pdfReader, 1,
                                                                   new FilteredTextRenderListener(new LocationTextExtractionStrategy(), regionFilters));

            Assert.AreEqual(expectedText, actualText);
        }
        virtual public void TestWithMultiFilteredRenderListener()
        {
            PdfReader pdfReader           = TestResourceUtils.GetResourceAsPdfReader(TEST_RESOURCES_PATH, "test.pdf");
            PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader);

            float x1, y1, x2, y2;

            MultiFilteredRenderListener listener = new MultiFilteredRenderListener();

            x1 = 122;
            x2 = 144;
            y1 = 841.9f - 151;
            y2 = 841.9f - 163;
            ITextExtractionStrategy region1Listener = listener.AttachRenderListener(
                new LocationTextExtractionStrategy(), new RegionTextRenderFilter(new Rectangle(x1, y1, x2, y2)));

            x1 = 156;
            x2 = 169;
            y1 = 841.9f - 151;
            y2 = 841.9f - 163;
            ITextExtractionStrategy region2Listener = listener.AttachRenderListener(
                new LocationTextExtractionStrategy(), new RegionTextRenderFilter(new Rectangle(x1, y1, x2, y2)));

            parser.ProcessContent(1, new GlyphRenderListener(listener));
            Assert.AreEqual("Your", region1Listener.GetResultantText());
            Assert.AreEqual("dju", region2Listener.GetResultantText());
        }
        virtual public void Test()
        {
            PdfReader pdfReader = TestResourceUtils.GetResourceAsPdfReader(TEST_RESOURCES_PATH, "test.pdf");

            String[] expectedText = new String[] {
                "PostScript Compatibility",
                "Because the PostScript language does not support the transparent imaging \n" +
                "model, PDF 1.4 consumer applications must have some means for converting the \n" +
                "appearance of a document that uses transparency to a purely opaque description \n" +
                "for printing on PostScript output devices. Similar techniques can also be used to \n" +
                "convert such documents to a form that can be correctly viewed by PDF 1.3 and \n" +
                "earlier consumers. ",
                "Otherwise, flatten the colors to some assumed device color space with pre-\n" +
                "determined calibration. In the generated PostScript output, paint the flattened \n" +
                "colors in a CIE-based color space having that calibration. "
            };

            Rectangle[] regions = new Rectangle[] {
                new Rectangle(90, 605, 220, 581),
                new Rectangle(80, 578, 450, 486), new Rectangle(103, 196, 460, 143)
            };

            RegionTextRenderFilter[] regionFilters = new RegionTextRenderFilter[regions.Length];
            for (int i = 0; i < regions.Length; i++)
            {
                regionFilters[i] = new RegionTextRenderFilter(regions[i]);
            }


            MultiFilteredRenderListener listener = new MultiFilteredRenderListener();

            LocationTextExtractionStrategy[] extractionStrategies = new LocationTextExtractionStrategy[regions.Length];
            for (int i = 0; i < regions.Length; i++)
            {
                extractionStrategies[i] =
                    (LocationTextExtractionStrategy)
                    listener.AttachRenderListener(new LocationTextExtractionStrategy(), regionFilters[i]);
            }

            new PdfReaderContentParser(pdfReader).ProcessContent(1, listener);

            for (int i = 0; i < regions.Length; i++)
            {
                String actualText = extractionStrategies[i].GetResultantText();
                Assert.AreEqual(expectedText[i], actualText);
            }
        }
Exemplo n.º 5
0
        public void Test() {
            PdfReader pdfReader = TestResourceUtils.GetResourceAsPdfReader(TEST_RESOURCES_PATH, "test.pdf");

            String[] expectedText = new String[] {
                "PostScript Compatibility",
                "Because the PostScript language does not support the transparent imaging \n" +
                "model, PDF 1.4 consumer applications must have some means for converting the \n" +
                "appearance of a document that uses transparency to a purely opaque description \n" +
                "for printing on PostScript output devices. Similar techniques can also be used to \n" +
                "convert such documents to a form that can be correctly viewed by PDF 1.3 and \n" +
                "earlier consumers. ",
                "Otherwise, flatten the colors to some assumed device color space with pre-\n" +
                "determined calibration. In the generated PostScript output, paint the flattened \n" +
                "colors in a CIE-based color space having that calibration. "
            };

            Rectangle[] regions = new Rectangle[] {
                new Rectangle(90, 605, 220, 581),
                new Rectangle(80, 578, 450, 486), new Rectangle(103, 196, 460, 143)
            };

            RegionTextRenderFilter[] regionFilters = new RegionTextRenderFilter[regions.Length];
            for (int i = 0; i < regions.Length; i++)
                regionFilters[i] = new RegionTextRenderFilter(regions[i]);


            MultiFilteredRenderListener listener = new MultiFilteredRenderListener();
            LocationTextExtractionStrategy[] extractionStrategies = new LocationTextExtractionStrategy[regions.Length];
            for (int i = 0; i < regions.Length; i++)
                extractionStrategies[i] =
                    (LocationTextExtractionStrategy)
                        listener.AttachRenderListener(new LocationTextExtractionStrategy(), regionFilters[i]);

            new PdfReaderContentParser(pdfReader).ProcessContent(1, listener);

            for (int i = 0; i < regions.Length; i++) {
                String actualText = extractionStrategies[i].GetResultantText();
                Assert.AreEqual(expectedText[i], actualText);
            }
        }
Exemplo n.º 6
0
        public void MultipleFiltersForOneRegionTest() {
            PdfReader pdfReader = TestResourceUtils.GetResourceAsPdfReader(TEST_RESOURCES_PATH, "test.pdf");

            Rectangle[] regions = new Rectangle[] {
                new Rectangle(0, 0, 500, 650),
                new Rectangle(0, 0, 400, 400), new Rectangle(200, 200, 500, 600), new Rectangle(100, 100, 450, 400)
            };

            RegionTextRenderFilter[] regionFilters = new RegionTextRenderFilter[regions.Length];
            for (int i = 0; i < regions.Length; i++)
                regionFilters[i] = new RegionTextRenderFilter(regions[i]);

            MultiFilteredRenderListener listener = new MultiFilteredRenderListener();
            LocationTextExtractionStrategy extractionStrategy =
                (LocationTextExtractionStrategy)
                    listener.AttachRenderListener(new LocationTextExtractionStrategy(), regionFilters);
            new PdfReaderContentParser(pdfReader).ProcessContent(1, listener);
            String actualText = extractionStrategy.GetResultantText();

            String expectedText = PdfTextExtractor.GetTextFromPage(pdfReader, 1,
                new FilteredTextRenderListener(new LocationTextExtractionStrategy(), regionFilters));

            Assert.AreEqual(expectedText, actualText);
        }