public virtual void HocrOutputFromHalftoneFile()
        {
            String   path       = TEST_IMAGES_DIRECTORY + "halftone.jpg";
            String   expected01 = "Silliness";
            String   expected02 = "Enablers";
            String   expected03 = "You";
            String   expected04 = "Middle";
            String   expected05 = "André";
            String   expected06 = "QUANTITY";
            String   expected07 = "DESCRIPTION";
            String   expected08 = "Silliness Enablers";
            String   expected09 = "QUANTITY DESCRIPTION UNIT PRICE TOTAL";
            FileInfo imgFile    = new FileInfo(path);
            FileInfo outputFile = new FileInfo(GetTargetDirectory() + "hocrOutputFromHalftoneFile.hocr");

            tesseractReader.DoTesseractOcr(imgFile, outputFile, OutputFormat.HOCR);
            IDictionary <int, IList <TextInfo> > pageData = TesseractHelper.ParseHocrFile(JavaCollectionsUtil.SingletonList
                                                                                          <FileInfo>(outputFile), TextPositioning.BY_WORDS);

            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected01));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected02));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected03));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected04));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected05));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected06));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected07));
            pageData = TesseractHelper.ParseHocrFile(JavaCollectionsUtil.SingletonList <FileInfo>(outputFile), TextPositioning
                                                     .BY_LINES);
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected08));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected09));
        }
        public virtual void TestTesseract4OcrForOnePageWithHocrFormat()
        {
            String   path       = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
            String   expected   = "619121";
            FileInfo imgFile    = new FileInfo(path);
            FileInfo outputFile = new FileInfo(GetTargetDirectory() + "testTesseract4OcrForOnePage.hocr");

            tesseractReader.DoTesseractOcr(imgFile, outputFile, OutputFormat.HOCR);
            IDictionary <int, IList <TextInfo> > pageData = TesseractHelper.ParseHocrFile(JavaCollectionsUtil.SingletonList
                                                                                          <FileInfo>(outputFile), tesseractReader.GetTesseract4OcrEngineProperties().GetTextPositioning());
            String result = GetTextFromPage(pageData.Get(1));

            NUnit.Framework.Assert.AreEqual(expected, result.Trim());
        }