コード例 #1
0
        private static void DoImageToPdfOcr(AbstractTesseract4OcrEngine tesseractReader, IList <FileInfo> imageFiles
                                            )
        {
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);

            ocrPdfCreator.CreatePdf(imageFiles, new PdfWriter(new MemoryStream()));
        }
コード例 #2
0
        /// <summary>Parse text from image and compare with expected.</summary>
        private void TestImageOcrText(AbstractTesseract4OcrEngine tesseractReader, String path, String expectedOutput
                                      )
        {
            FileInfo ex1            = new FileInfo(path);
            String   realOutputHocr = GetTextUsingTesseractFromImage(tesseractReader, ex1);

            NUnit.Framework.Assert.IsTrue(realOutputHocr.Contains(expectedOutput));
        }
コード例 #3
0
        /// <summary>Do OCR for given image and compare result text file with expected one.</summary>
        private bool DoOcrAndCompareTxtFiles(AbstractTesseract4OcrEngine tesseractReader, String imgPath, String expectedPath
                                             , IList <String> languages)
        {
            String resultTxtFile = GetTargetDirectory() + GetImageName(imgPath, languages) + ".txt";

            DoOcrAndSaveToTextFile(tesseractReader, imgPath, resultTxtFile, languages);
            return(CompareTxtFiles(expectedPath, resultTxtFile));
        }
コード例 #4
0
 internal DoImageOcrRunnable(AbstractTesseract4OcrEngine tesseractReader, IMetaInfo metaInfo, FileInfo imgFile
                             , FileInfo outputFile, bool createPdf)
 {
     this.tesseractReader = tesseractReader;
     this.metaInfo        = metaInfo;
     this.imgFile         = imgFile;
     this.outputFile      = outputFile;
     this.createPdf       = createPdf;
 }
コード例 #5
0
 public EventCountingTest(IntegrationTestHelper.ReaderType type)
 {
     isExecutableReaderType = type.Equals(IntegrationTestHelper.ReaderType.EXECUTABLE);
     if (isExecutableReaderType)
     {
         testFileTypeName = "executable";
     }
     else
     {
         testFileTypeName = "lib";
     }
     tesseractReader = GetTesseractReader(type);
 }
コード例 #6
0
        /// <summary>Retrieve text from specified page from given PDF document.</summary>
        protected internal virtual String GetTextFromPdf(AbstractTesseract4OcrEngine tesseractReader, FileInfo file
                                                         , int page, IList <String> languages, IList <String> fonts)
        {
            String result  = null;
            String pdfPath = null;

            try {
                pdfPath = GetTargetDirectory() + GetImageName(file.FullName, languages) + ".pdf";
                DoOcrAndSavePdfToPath(tesseractReader, file.FullName, pdfPath, languages, fonts);
                result = GetTextFromPdfLayer(pdfPath, null, page);
            }
            catch (System.IO.IOException e) {
                LOGGER.Error(e.Message);
            }
            return(result);
        }
コード例 #7
0
        /// <summary>
        /// Perform OCR using provided path to image (imgPath),
        /// save to file and get text from file.
        /// </summary>
        protected internal virtual String GetRecognizedTextFromTextFile(AbstractTesseract4OcrEngine tesseractReader
                                                                        , String input, IList <String> languages)
        {
            String result  = null;
            String txtPath = null;

            try {
                txtPath = GetTargetDirectory() + GetImageName(input, languages) + ".txt";
                DoOcrAndSaveToTextFile(tesseractReader, input, txtPath, languages);
                result = GetTextFromTextFile(new FileInfo(txtPath));
            }
            catch (Exception e) {
                LOGGER.Error(e.Message);
            }
            return(result);
        }
コード例 #8
0
        /// <summary>
        /// Perform OCR using provided path to image (imgPath)
        /// and save result PDF document to "pdfPath".
        /// </summary>
        /// <remarks>
        /// Perform OCR using provided path to image (imgPath)
        /// and save result PDF document to "pdfPath".
        /// (Method is used for compare tool)
        /// </remarks>
        protected internal virtual void DoOcrAndSavePdfToPath(AbstractTesseract4OcrEngine tesseractReader, String
                                                              imgPath, String pdfPath, IList <String> languages, IList <String> fonts, Color color)
        {
            if (languages != null)
            {
                Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();
                properties.SetLanguages(languages);
                tesseractReader.SetTesseract4OcrEngineProperties(properties);
            }
            OcrPdfCreatorProperties properties_1 = new OcrPdfCreatorProperties();

            properties_1.SetPdfLang("en-US");
            properties_1.SetTitle("");
            if (fonts != null && fonts.Count > 0)
            {
                FontProvider fontProvider = new FontProvider();
                foreach (String fontPath in fonts)
                {
                    String name = FONT_PATH_TO_FONT_NAME_MAP.Get(fontPath);
                    fontProvider.GetFontSet().AddFont(fontPath, PdfEncodings.IDENTITY_H, name);
                }
                properties_1.SetFontProvider(fontProvider);
            }
            if (color != null)
            {
                properties_1.SetTextColor(color);
            }
            if (languages != null)
            {
                NUnit.Framework.Assert.AreEqual(languages.Count, tesseractReader.GetTesseract4OcrEngineProperties().GetLanguages
                                                    ().Count);
            }
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, properties_1);

            try {
                using (PdfWriter pdfWriter = GetPdfWriter(pdfPath)) {
                    PdfDocument doc = ocrPdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(imgPath
                                                                                                                        )), pdfWriter);
                    NUnit.Framework.Assert.IsNotNull(doc);
                    doc.Close();
                }
            }
            catch (System.IO.IOException e) {
                LOGGER.Error(e.Message);
            }
        }
コード例 #9
0
 /// <summary>
 /// Perform OCR using provided path to image (imgPath)
 /// and save result to text file.
 /// </summary>
 protected internal virtual void DoOcrAndSaveToTextFile(AbstractTesseract4OcrEngine tesseractReader, String
                                                        imgPath, String txtPath, IList <String> languages)
 {
     if (languages != null)
     {
         Tesseract4OcrEngineProperties properties = tesseractReader.GetTesseract4OcrEngineProperties();
         properties.SetLanguages(languages);
         tesseractReader.SetTesseract4OcrEngineProperties(properties);
     }
     tesseractReader.CreateTxtFile(JavaCollectionsUtil.SingletonList <FileInfo>(new FileInfo(imgPath)), new FileInfo
                                       (txtPath));
     if (languages != null)
     {
         NUnit.Framework.Assert.AreEqual(languages.Count, tesseractReader.GetTesseract4OcrEngineProperties().GetLanguages
                                             ().Count);
     }
 }
コード例 #10
0
        private static void DoImageToPdfAOcr(AbstractTesseract4OcrEngine tesseractReader, IList <FileInfo> imageFiles
                                             )
        {
            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, new OcrPdfCreatorProperties().SetPdfLang(
                                                                "en-US"));
            Stream @is = null;

            try {
                @is = new FileStream(PROFILE_FOLDER + "sRGB_CS_profile.icm", FileMode.Open, FileAccess.Read);
            }
            catch (FileNotFoundException) {
            }
            // No expected
            PdfOutputIntent outputIntent = new PdfOutputIntent("Custom", "", "http://www.color.org", "sRGB IEC61966-2.1"
                                                               , @is);

            ocrPdfCreator.CreatePdfA(imageFiles, new PdfWriter(new MemoryStream()), outputIntent);
        }
コード例 #11
0
 /// <summary>Retrieve text from specified page from given PDF document.</summary>
 protected internal virtual String GetTextFromPdf(AbstractTesseract4OcrEngine tesseractReader, FileInfo file
                                                  , int page, IList <String> languages, String fontPath)
 {
     return(GetTextFromPdf(tesseractReader, file, page, languages, JavaCollectionsUtil.SingletonList <String>(fontPath
                                                                                                              )));
 }
コード例 #12
0
 private static void DoImageOcr(AbstractTesseract4OcrEngine tesseractReader, FileInfo imageFile)
 {
     tesseractReader.DoImageOcr(imageFile);
 }
コード例 #13
0
 public ImageFormatIntegrationTest(IntegrationTestHelper.ReaderType type)
 {
     tesseractReader = GetTesseractReader(type);
     this.testType   = type.ToString().ToLowerInvariant();
 }
コード例 #14
0
 public MultiThreadingTest(IntegrationTestHelper.ReaderType type)
 {
     tesseractReader = GetTesseractReader(type);
 }
コード例 #15
0
 /// <summary>Retrieve text from the first page of given PDF document setting font.</summary>
 protected internal virtual String GetTextFromPdf(AbstractTesseract4OcrEngine tesseractReader, FileInfo file
                                                  , IList <String> languages, String fontPath)
 {
     return(GetTextFromPdf(tesseractReader, file, 1, languages, fontPath));
 }
コード例 #16
0
 /// <summary>
 /// Perform OCR using provided path to image (imgPath),
 /// save to file and get text from file.
 /// </summary>
 protected internal virtual String GetRecognizedTextFromTextFile(AbstractTesseract4OcrEngine tesseractReader
                                                                 , String input)
 {
     return(GetRecognizedTextFromTextFile(tesseractReader, input, null));
 }
コード例 #17
0
 /// <summary>Retrieve text from the first page of given PDF document.</summary>
 protected internal virtual String GetTextFromPdf(AbstractTesseract4OcrEngine tesseractReader, FileInfo file
                                                  )
 {
     return(GetTextFromPdf(tesseractReader, file, 1, null, new List <String>()));
 }
コード例 #18
0
 /// <summary>Retrieve text from the required page of given PDF document.</summary>
 protected internal virtual String GetTextFromPdf(AbstractTesseract4OcrEngine tesseractReader, FileInfo file
                                                  , int page, IList <String> languages)
 {
     return(GetTextFromPdf(tesseractReader, file, page, languages, new List <String>()));
 }
コード例 #19
0
 public BasicTesseractIntegrationTest(IntegrationTestHelper.ReaderType type)
 {
     tesseractReader = GetTesseractReader(type);
 }
コード例 #20
0
 /// <summary>
 /// Perform OCR using provided path to image (imgPath)
 /// and save result PDF document to "pdfPath".
 /// </summary>
 /// <remarks>
 /// Perform OCR using provided path to image (imgPath)
 /// and save result PDF document to "pdfPath".
 /// (Text will be invisible)
 /// </remarks>
 protected internal virtual void DoOcrAndSavePdfToPath(AbstractTesseract4OcrEngine tesseractReader, String
                                                       imgPath, String pdfPath, IList <String> languages, IList <String> fonts)
 {
     DoOcrAndSavePdfToPath(tesseractReader, imgPath, pdfPath, languages, fonts, null);
 }
コード例 #21
0
 public PdfA3UIntegrationTest(IntegrationTestHelper.ReaderType type)
 {
     tesseractReader = GetTesseractReader(type);
 }
コード例 #22
0
 /// <summary>
 /// Perform OCR using provided path to image (imgPath)
 /// and save result PDF document to "pdfPath".
 /// </summary>
 /// <remarks>
 /// Perform OCR using provided path to image (imgPath)
 /// and save result PDF document to "pdfPath".
 /// (Method is used for compare tool)
 /// </remarks>
 protected internal virtual void DoOcrAndSavePdfToPath(AbstractTesseract4OcrEngine tesseractReader, String
                                                       imgPath, String pdfPath)
 {
     DoOcrAndSavePdfToPath(tesseractReader, imgPath, pdfPath, null, null, null);
 }