Пример #1
0
        public virtual void TestTextFromPdfLayers()
        {
            String   testName = "testTextFromPdfLayers";
            String   path     = PdfHelper.GetDefaultImagePath();
            String   pdfPath  = PdfHelper.GetTargetDirectory() + testName + ".pdf";
            FileInfo file     = new FileInfo(path);
            OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();

            properties.SetImageLayerName("Image Layer");
            properties.SetTextLayerName("Text Layer");
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);
            PdfDocument   doc           = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), PdfHelper.GetPdfWriter
                                                                      (pdfPath));

            NUnit.Framework.Assert.IsNotNull(doc);
            IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers();

            NUnit.Framework.Assert.AreEqual(2, layers.Count);
            NUnit.Framework.Assert.AreEqual("Image Layer", layers[0].GetPdfObject().Get(PdfName.Name).ToString());
            NUnit.Framework.Assert.IsTrue(layers[0].IsOn());
            NUnit.Framework.Assert.AreEqual("Text Layer", layers[1].GetPdfObject().Get(PdfName.Name).ToString());
            NUnit.Framework.Assert.IsTrue(layers[1].IsOn());
            doc.Close();
            NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, PdfHelper.GetTextFromPdfLayer(pdfPath, "Text Layer"
                                                                                                  ));
            NUnit.Framework.Assert.AreEqual("", PdfHelper.GetTextFromPdfLayer(pdfPath, "Image Layer"));
        }
Пример #2
0
        public virtual void ComparePdfA3uRGBSpanishJPG()
        {
            String testName        = "comparePdfA3uRGBSpanishJPG";
            String filename        = "spanish_01";
            String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + filename + "_a3u.pdf";
            String resultPdfPath   = GetTargetDirectory() + filename + "_" + testName + "_a3u.pdf";
            Tesseract4OcrEngineProperties properties = new Tesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties
                                                                                             ());

            properties.SetPathToTessData(GetTessDataDirectory());
            properties.SetLanguages(JavaCollectionsUtil.SingletonList <String>("spa"));
            tesseractReader.SetTesseract4OcrEngineProperties(properties);
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetPdfLang("en-US");
            ocrPdfCreatorProperties.SetTitle("");
            ocrPdfCreatorProperties.SetTextColor(DeviceRgb.BLACK);
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, ocrPdfCreatorProperties);
            PdfDocument   doc           = ocrPdfCreator.CreatePdfA(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(TEST_IMAGES_DIRECTORY
                                                                                                                             + filename + ".jpg")), GetPdfWriter(resultPdfPath), GetRGBPdfOutputIntent());

            NUnit.Framework.Assert.IsNotNull(doc);
            doc.Close();
            NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, GetTargetDirectory
                                                                                 (), "diff_"));
        }
Пример #3
0
        public virtual void ColourSpaceTest03()
        {
            PdfWriter    writer = new PdfWriter(new ByteArrayOutputStream());
            Stream       @is    = new FileStream(sourceFolder + "sRGB Color Space Profile.icm", FileMode.Open, FileAccess.Read);
            PdfADocument doc    = new PdfADocument(writer, PdfAConformanceLevel.PDF_A_2B, new PdfOutputIntent("Custom", ""
                                                                                                              , "http://www.color.org", "sRGB IEC61966-2.1", @is));
            PdfPage       page           = doc.AddNewPage();
            PdfColorSpace alternateSpace = new PdfDeviceCs.Rgb();
            //Tint transformation function is a dictionary
            PdfArray  domain = new PdfArray(new float[] { 0, 1 });
            PdfArray  range  = new PdfArray(new float[] { 0, 1, 0, 1, 0, 1 });
            PdfArray  C0     = new PdfArray(new float[] { 0, 0, 0 });
            PdfArray  C1     = new PdfArray(new float[] { 1, 1, 1 });
            PdfNumber n      = new PdfNumber(1);

            PdfFunction.Type2 type2          = new PdfFunction.Type2(domain, range, C0, C1, n);
            PdfCanvas         canvas         = new PdfCanvas(page);
            String            separationName = "separationTest";

            canvas.SetColor(new Separation(separationName, alternateSpace, type2, 0.5f), true);
            PdfDictionary attributes    = new PdfDictionary();
            PdfDictionary colorantsDict = new PdfDictionary();

            colorantsDict.Put(new PdfName(separationName), new PdfSpecialCs.Separation(separationName, alternateSpace,
                                                                                       type2).GetPdfObject());
            attributes.Put(PdfName.Colorants, colorantsDict);
            DeviceN deviceN = new DeviceN(new PdfSpecialCs.NChannel(JavaCollectionsUtil.SingletonList(separationName),
                                                                    alternateSpace, type2, attributes), new float[] { 0.5f });

            canvas.SetColor(deviceN, true);
            doc.Close();
        }
Пример #4
0
        internal static PdfLayer PrepareLayerDesignIntent()
        {
            PdfLayer pdfLayer = PrepareNewLayer();

            pdfLayer.SetIntents(JavaCollectionsUtil.SingletonList(PdfName.Design));
            return(pdfLayer);
        }
Пример #5
0
        public virtual void TestTextFromMultiPageTiff()
        {
            String   testName   = "testTextFromMultiPageTiff";
            bool     preprocess = tesseractReader.GetTesseract4OcrEngineProperties().IsPreprocessingImages();
            String   path       = TEST_IMAGES_DIRECTORY + "multîpage.tiff";
            String   pdfPath    = GetTargetDirectory() + testName + ".pdf";
            FileInfo file       = new FileInfo(path);

            tesseractReader.SetTesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties().SetPreprocessingImages
                                                                 (false));
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
            PdfDocument   doc           = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), GetPdfWriter(
                                                                      pdfPath));

            NUnit.Framework.Assert.IsNotNull(doc);
            int numOfPages          = doc.GetNumberOfPages();
            IList <PdfLayer> layers = doc.GetCatalog().GetOCProperties(true).GetLayers();

            NUnit.Framework.Assert.AreEqual(0, layers.Count);
            doc.Close();
            // Text layer should contain all text
            // Image layer shouldn't contain any text
            String expectedOutput = "Multipage\nTIFF\nExample\nPage 5";

            NUnit.Framework.Assert.AreEqual(expectedOutput, GetTextFromPdfLayer(pdfPath, null, 5));
            NUnit.Framework.Assert.IsFalse(tesseractReader.GetTesseract4OcrEngineProperties().IsPreprocessingImages());
            tesseractReader.SetTesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties().SetPreprocessingImages
                                                                 (preprocess));
        }
Пример #6
0
        public virtual void ComparePdfA3uCMYKColorSpaceJPG()
        {
            String testName        = "comparePdfA3uCMYKColorSpaceJPG";
            String filename        = "numbers_01";
            String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + filename + "_a3u.pdf";
            String resultPdfPath   = GetTargetDirectory() + filename + "_" + testName + "_a3u.pdf";

            try {
                OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
                ocrPdfCreatorProperties.SetPdfLang("en-US");
                ocrPdfCreatorProperties.SetTitle("");
                OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, ocrPdfCreatorProperties);
                tesseractReader.SetTesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties().SetTextPositioning
                                                                     (TextPositioning.BY_WORDS));
                NUnit.Framework.Assert.AreEqual(tesseractReader, ocrPdfCreator.GetOcrEngine());
                ocrPdfCreator.SetOcrEngine(tesseractReader);
                PdfDocument doc = ocrPdfCreator.CreatePdfA(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(TEST_IMAGES_DIRECTORY
                                                                                                                     + filename + ".jpg")), GetPdfWriter(resultPdfPath), GetCMYKPdfOutputIntent());
                NUnit.Framework.Assert.IsNotNull(doc);
                doc.Close();
                NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, GetTargetDirectory
                                                                                     (), "diff_"));
            }
            finally {
                NUnit.Framework.Assert.AreEqual(TextPositioning.BY_WORDS, tesseractReader.GetTesseract4OcrEngineProperties
                                                    ().GetTextPositioning());
                tesseractReader.SetTesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties().SetTextPositioning
                                                                     (TextPositioning.BY_LINES));
            }
        }
        public virtual void HocrOutputFromHalftoneFile()
        {
            String   path       = TEST_IMAGES_DIRECTORY + "halftone.jpg";
            String   expected01 = "Silliness";
            String   expected02 = "Enablers";
            String   expected03 = "You";
            String   expected04 = "Middle";
            String   expected05 = "André";
            String   expected06 = "QUANTITY";
            String   expected07 = "DESCRIPTION";
            String   expected08 = "Silliness Enablers";
            String   expected09 = "QUANTITY DESCRIPTION UNIT PRICE TOTAL";
            FileInfo imgFile    = new FileInfo(path);
            FileInfo outputFile = new FileInfo(GetTargetDirectory() + "hocrOutputFromHalftoneFile.hocr");

            tesseractReader.DoTesseractOcr(imgFile, outputFile, OutputFormat.HOCR);
            IDictionary <int, IList <TextInfo> > pageData = TesseractHelper.ParseHocrFile(JavaCollectionsUtil.SingletonList
                                                                                          <FileInfo>(outputFile), TextPositioning.BY_WORDS);

            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected01));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected02));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected03));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected04));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected05));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected06));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected07));
            pageData = TesseractHelper.ParseHocrFile(JavaCollectionsUtil.SingletonList <FileInfo>(outputFile), TextPositioning
                                                     .BY_LINES);
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected08));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected09));
        }
Пример #8
0
        /// <summary>Gets a collection of current intents specified for this layer.</summary>
        /// <remarks>
        /// Gets a collection of current intents specified for this layer.
        /// The default value is
        /// <see cref="iText.Kernel.Pdf.PdfName.View"/>
        /// , so it will be the only element of the
        /// resultant collection if no intents are currently specified.
        /// </remarks>
        /// <returns>the collection of intents.</returns>
        public virtual ICollection <PdfName> GetIntents()
        {
            PdfObject intent = GetPdfObject().Get(PdfName.Intent);

            if (intent is PdfName)
            {
                return(JavaCollectionsUtil.SingletonList((PdfName)intent));
            }
            else
            {
                if (intent is PdfArray)
                {
                    PdfArray intentArr = (PdfArray)intent;
                    ICollection <PdfName> intentsCollection = new List <PdfName>(intentArr.Size());
                    foreach (PdfObject i in intentArr)
                    {
                        if (i is PdfName)
                        {
                            intentsCollection.Add((PdfName)i);
                        }
                    }
                    return(intentsCollection);
                }
            }
            return(JavaCollectionsUtil.SingletonList(PdfName.View));
        }
        public virtual void TestFontColorInMultiPagePdf()
        {
            String   testName = "testFontColorInMultiPagePdf";
            String   path     = TEST_IMAGES_DIRECTORY + "multîpage.tiff";
            String   pdfPath  = GetTargetDirectory() + testName + ".pdf";
            FileInfo file     = new FileInfo(path);

            tesseractReader.SetTesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties().SetPreprocessingImages
                                                                 (false));
            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();

            ocrPdfCreatorProperties.SetTextLayerName("Text1");
            Color color = DeviceCmyk.MAGENTA;

            ocrPdfCreatorProperties.SetTextColor(color);
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, ocrPdfCreatorProperties);
            PdfDocument   doc           = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(file), GetPdfWriter(
                                                                      pdfPath));

            NUnit.Framework.Assert.IsNotNull(doc);
            doc.Close();
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));

            IntegrationTestHelper.ExtractionStrategy strategy = new IntegrationTestHelper.ExtractionStrategy("Text1");
            PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy);

            processor.ProcessPageContent(pdfDocument.GetPage(1));
            Color fillColor = strategy.GetFillColor();

            NUnit.Framework.Assert.AreEqual(fillColor, color);
            pdfDocument.Close();
        }
Пример #10
0
        public virtual void BeginMarkerContentOperatorTest()
        {
            PdfCanvasProcessor processor       = new _PdfCanvasProcessor_42(new FilteredEventListener());
            IContentOperator   contentOperator = processor.RegisterContentOperator("BMC", null);

            processor.RegisterContentOperator("BMC", contentOperator);
            contentOperator.Invoke(processor, null, JavaCollectionsUtil.SingletonList((PdfObject)null));
        }
 public virtual void TestIncorrectLanguage()
 {
     NUnit.Framework.Assert.That(() => {
         FileInfo file = new FileInfo(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
         GetTextFromPdf(tesseractReader, file, JavaCollectionsUtil.SingletonList <String>("spa_new"));
     }
                                 , NUnit.Framework.Throws.InstanceOf <Tesseract4OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.INCORRECT_LANGUAGE, "spa_new.traineddata", new FileInfo(LANG_TESS_DATA_DIRECTORY).FullName)))
     ;
 }
Пример #12
0
 protected internal override IList <ContextualSubstRule> GetSetOfRulesForStartGlyph(int startId)
 {
     SubTableLookup6Format3.SubstRuleFormat3 ruleFormat3 = (SubTableLookup6Format3.SubstRuleFormat3) this.substitutionRule;
     if (ruleFormat3.inputCoverages[0].Contains(startId) && !openReader.IsSkip(startId, lookupFlag))
     {
         return(JavaCollectionsUtil.SingletonList <ContextualSubstRule>(this.substitutionRule));
     }
     return(JavaCollectionsUtil.EmptyList <ContextualSubstRule>());
 }
Пример #13
0
 /// <summary>
 /// Check whether tesseract executable is installed on the machine and
 /// provided path to tesseract executable is correct.
 /// </summary>
 /// <param name="execPath">path to tesseract executable</param>
 private void CheckTesseractInstalled(String execPath)
 {
     try {
         TesseractHelper.RunCommand(execPath, JavaCollectionsUtil.SingletonList <String>("--version"));
     }
     catch (Tesseract4OcrException e) {
         throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_NOT_FOUND, e);
     }
 }
 private static void CleanFirstPageAndDrawCleanupRegion(Rectangle cleanupRegion, String input, String output
                                                        )
 {
     using (PdfDocument pdfDocument = new PdfDocument(new PdfReader(input), new PdfWriter(output))) {
         PdfCleaner.CleanUp(pdfDocument, JavaCollectionsUtil.SingletonList(new iText.PdfCleanup.PdfCleanUpLocation(
                                                                               1, cleanupRegion)));
         DrawCleanupRegionOnPage(pdfDocument, cleanupRegion);
     }
 }
        public override AccessibilityProperties AddAttributes(PdfDictionary attributes)
        {
            PdfObject attributesObject   = backingElem.GetAttributes(false);
            PdfObject combinedAttributes = CombineAttributesList(attributesObject, JavaCollectionsUtil.SingletonList(attributes
                                                                                                                     ), backingElem.GetPdfObject().GetAsNumber(PdfName.R));

            backingElem.SetAttributes(combinedAttributes);
            return(this);
        }
 public virtual void TestIncorrectScriptsName()
 {
     NUnit.Framework.Assert.That(() => {
         FileInfo file = new FileInfo(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
         tesseractReader.SetTesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties().SetPathToTessData
                                                              (new FileInfo(SCRIPT_TESS_DATA_DIRECTORY)));
         GetTextFromPdf(tesseractReader, file, JavaCollectionsUtil.SingletonList <String>("English"));
     }
                                 , NUnit.Framework.Throws.InstanceOf <Tesseract4OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(Tesseract4OcrException.INCORRECT_LANGUAGE, "English.traineddata", new FileInfo(SCRIPT_TESS_DATA_DIRECTORY).FullName)))
     ;
 }
Пример #17
0
        public virtual void CleanWatermarkAnnotation()
        {
            // TODO: update cmp file after DEVSIX-2471 fix
            String input  = inputPath + "watermarkAnnotation.pdf";
            String output = outputPath + "watermarkAnnotation.pdf";
            String cmp    = inputPath + "cmp_watermarkAnnotation.pdf";

            CleanUp(input, output, JavaCollectionsUtil.SingletonList(new iText.PdfCleanup.PdfCleanUpLocation(1, new Rectangle
                                                                                                                 (410, 410, 50, 50), ColorConstants.YELLOW)));
            CompareByContent(cmp, output, outputPath);
        }
        public virtual void TestInputMultipagesTIFFWithPreprocessing()
        {
            String   path           = TEST_IMAGES_DIRECTORY + "multîpage.tiff";
            String   expectedOutput = "Multipage\nTIFF\nExample\nPage 5";
            FileInfo file           = new FileInfo(path);
            String   realOutputHocr = GetTextFromPdf(tesseractReader, file, 5, JavaCollectionsUtil.SingletonList <String>
                                                         ("eng"));

            NUnit.Framework.Assert.IsNotNull(realOutputHocr);
            NUnit.Framework.Assert.AreEqual(expectedOutput, realOutputHocr);
        }
 public virtual void TestPathToTessDataWithoutData()
 {
     NUnit.Framework.Assert.That(() => {
         FileInfo file = new FileInfo(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
         tesseractReader.SetTesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties().SetPathToTessData
                                                              (new FileInfo("test/")));
         GetTextFromPdf(tesseractReader, file, JavaCollectionsUtil.SingletonList <String>("eng"));
     }
                                 , NUnit.Framework.Throws.InstanceOf <Tesseract4OcrException>().With.Message.EqualTo(Tesseract4OcrException.PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID))
     ;
 }
Пример #20
0
 public virtual IList <int> GetAllPagesInRange(int nbPages)
 {
     if (page <= nbPages)
     {
         return(JavaCollectionsUtil.SingletonList(page));
     }
     else
     {
         return(JavaCollectionsUtil.EmptyList <int>());
     }
 }
Пример #21
0
        public virtual void TestBengaliScript()
        {
            String   imgPath  = TEST_IMAGES_DIRECTORY + "bengali_01.jpeg";
            FileInfo file     = new FileInfo(imgPath);
            String   expected = "ইংরজে";

            tesseractReader.SetTesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties().SetPathToTessData
                                                                 (new FileInfo(SCRIPT_TESS_DATA_DIRECTORY)));
            // correct result with specified spanish language
            NUnit.Framework.Assert.IsTrue(GetTextFromPdf(tesseractReader, file, 1, JavaCollectionsUtil.SingletonList <String
                                                                                                                      >("Bengali"), JavaUtil.ArraysAsList(FREE_SANS_FONT_PATH, KOSUGI_FONT_PATH)).StartsWith(expected));
        }
Пример #22
0
        public virtual ICollection <byte[]> GetEncoded(X509Certificate checkCert, String url)
        {
            ICollection <byte[]> crls = null;

            try {
                byte[] crl = crlBuilder.MakeCrl(caPrivateKey);
                crls = JavaCollectionsUtil.SingletonList(crl);
            }
            catch (Exception) {
            }
            return(crls);
        }
Пример #23
0
 /* (non-Javadoc)
  * @see com.itextpdf.styledxmlparser.css.CssStatement#getCssRuleSets(com.itextpdf.styledxmlparser.html.node.INode, com.itextpdf.styledxmlparser.css.media.MediaDeviceDescription)
  */
 public override IList <iText.StyledXmlParser.Css.CssRuleSet> GetCssRuleSets(INode element, MediaDeviceDescription
                                                                             deviceDescription)
 {
     if (selector.Matches(element))
     {
         return(JavaCollectionsUtil.SingletonList(this));
     }
     else
     {
         return(base.GetCssRuleSets(element, deviceDescription));
     }
 }
Пример #24
0
        /// <summary>
        /// Perform OCR with custom ocr engine using provided input image and set
        /// of properties and save to the given path.
        /// </summary>
        public static void CreatePdf(String pdfPath, FileInfo inputFile, OcrPdfCreatorProperties properties)
        {
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);

            try {
                using (PdfWriter pdfWriter = GetPdfWriter(pdfPath)) {
                    ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(inputFile), pdfWriter).Close();
                }
            }
            catch (System.IO.IOException e) {
                LOGGER.Error(e.Message);
            }
        }
        public virtual void CompareBmp02()
        {
            String testName        = "compareBmp02";
            String fileName        = "englishText";
            String path            = TEST_IMAGES_DIRECTORY + fileName + ".bmp";
            String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + fileName + "_" + testType + ".pdf";
            String resultPdfPath   = GetTargetDirectory() + fileName + "_" + testName + "_" + testType + ".pdf";

            DoOcrAndSavePdfToPath(tesseractReader, path, resultPdfPath, JavaCollectionsUtil.SingletonList <String>("eng"
                                                                                                                   ), DeviceCmyk.MAGENTA);
            NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, GetTargetDirectory
                                                                                 (), "diff_"));
        }
Пример #26
0
        public virtual void TestGeorgianTextWithScript()
        {
            String   imgPath = TEST_IMAGES_DIRECTORY + "georgian_01.jpg";
            FileInfo file    = new FileInfo(imgPath);
            // First sentence
            String expected = "ღმერთი";

            tesseractReader.SetTesseract4OcrEngineProperties(tesseractReader.GetTesseract4OcrEngineProperties().SetPathToTessData
                                                                 (new FileInfo(SCRIPT_TESS_DATA_DIRECTORY)));
            // correct result with specified georgian+eng language
            NUnit.Framework.Assert.IsTrue(GetTextFromPdf(tesseractReader, file, JavaCollectionsUtil.SingletonList <String
                                                                                                                   >("Georgian"), FREE_SANS_FONT_PATH).StartsWith(expected));
        }
Пример #27
0
        public virtual void JavaCollectionsUtilTest()
        {
            IList <int> emptyList = JavaCollectionsUtil.EmptyList <int>();

            Assert.IsEmpty(emptyList);
            Assert.Throws <NotSupportedException>(() => emptyList.Add(10));

            IDictionary <int, int> emptyMap = JavaCollectionsUtil.EmptyMap <int, int>();

            Assert.IsEmpty(emptyMap);
            Assert.Throws <NotSupportedException>(() => { emptyMap[5] = 10; });

            IEnumerator <int> emptyIterator = JavaCollectionsUtil.EmptyIterator <int>();

            Assert.False(emptyIterator.MoveNext());

            IList <int> unmodifiableList = JavaCollectionsUtil.UnmodifiableList <int>(new int[] { 10, 20, 30, 20 }.ToList());

            Assert.Throws <NotSupportedException>(() => unmodifiableList.Insert(0, 20));
            Assert.Throws <NotSupportedException>(() => { unmodifiableList[2] = 50; });
            int test = unmodifiableList[3];

            Assert.Throws <NotSupportedException>(() => JavaCollectionsUtil.Sort(unmodifiableList));

            IDictionary <int, int> unodifiableMap = JavaCollectionsUtil.UnmodifiableMap(new Dictionary <int, int>()
            {
                { 1, 20 },
                { 2, 40 },
                { 70, 80 },
            });

            test = unodifiableMap[2];
            Assert.Throws <KeyNotFoundException>(() => { int temp = unodifiableMap[3]; });
            Assert.Throws <NotSupportedException>(() => { unodifiableMap[11] = 11; });

            IList <int> singletonList = JavaCollectionsUtil.SingletonList(4);

            Assert.AreEqual(4, singletonList[0]);
            Assert.Throws <NotSupportedException>(() => singletonList.Add(9));

            List <int> x = new int[] { 60, 50, 20 }.ToList();

            JavaCollectionsUtil.Sort(x);
            Assert.AreEqual(20, x[0]);
            Assert.AreEqual(60, x[2]);

            x = new int[] { -1, 0, 1 }.ToList();
            JavaCollectionsUtil.Reverse(x);
            Assert.AreEqual(1, x[0]);
            Assert.AreEqual(0, x[1]);
        }
Пример #28
0
        private static void RunTest(String fileName)
        {
            String input  = inputPath + fileName + ".pdf";
            String output = outputPath + fileName + "_cleaned.pdf";
            String cmp    = inputPath + "cmp_" + fileName + ".pdf";
            IList <PdfCleanUpLocation> cleanUpLocations = JavaCollectionsUtil.SingletonList(new PdfCleanUpLocation(1, new
                                                                                                                   Rectangle(308, 520, 200, 75)));
            PdfDocument    pdfDocument = new PdfDocument(new PdfReader(input), new PdfWriter(output));
            PdfCleanUpTool cleaner     = new PdfCleanUpTool(pdfDocument, cleanUpLocations);

            cleaner.CleanUp();
            pdfDocument.Close();
            NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(output, cmp, outputPath));
        }
Пример #29
0
        public virtual IDictionary <int, IList <TextInfo> > DoImageOcr(FileInfo input)
        {
            IDictionary <int, IList <TextInfo> > result = new Dictionary <int, IList <TextInfo> >();
            String text = PdfHelper.DEFAULT_TEXT;

            if (input.FullName.Contains(PdfHelper.THAI_IMAGE_NAME))
            {
                text = PdfHelper.THAI_TEXT;
            }
            TextInfo textInfo = new TextInfo(text, JavaUtil.ArraysAsList(204.0f, 158.0f, 742.0f, 294.0f));

            result.Put(1, JavaCollectionsUtil.SingletonList <TextInfo>(textInfo));
            return(result);
        }
Пример #30
0
 private ContextManager()
 {
     RegisterGenericContext(JavaUtil.ArraysAsList(NamespaceConstant.CORE_IO, NamespaceConstant.CORE_KERNEL, NamespaceConstant
                                                  .CORE_LAYOUT, NamespaceConstant.CORE_BARCODES, NamespaceConstant.CORE_PDFA, NamespaceConstant.CORE_SIGN
                                                  , NamespaceConstant.CORE_FORMS, NamespaceConstant.CORE_SXP, NamespaceConstant.CORE_SVG), JavaCollectionsUtil
                            .SingletonList(NamespaceConstant.ITEXT));
     RegisterGenericContext(JavaCollectionsUtil.SingletonList(NamespaceConstant.PDF_DEBUG), JavaCollectionsUtil
                            .SingletonList(NamespaceConstant.PDF_DEBUG));
     RegisterGenericContext(JavaCollectionsUtil.SingletonList(NamespaceConstant.PDF_HTML), JavaCollectionsUtil.
                            SingletonList(NamespaceConstant.PDF_HTML));
     RegisterGenericContext(JavaCollectionsUtil.SingletonList(NamespaceConstant.PDF_INVOICE), JavaCollectionsUtil
                            .SingletonList(NamespaceConstant.PDF_INVOICE));
     RegisterGenericContext(JavaCollectionsUtil.SingletonList(NamespaceConstant.PDF_SWEEP), JavaCollectionsUtil
                            .SingletonList(NamespaceConstant.PDF_SWEEP));
 }