public virtual void HocrOutputFromHalftoneFile() { String path = TEST_IMAGES_DIRECTORY + "halftone.jpg"; String expected01 = "Silliness"; String expected02 = "Enablers"; String expected03 = "You"; String expected04 = "Middle"; String expected05 = "André"; String expected06 = "QUANTITY"; String expected07 = "DESCRIPTION"; String expected08 = "Silliness Enablers"; String expected09 = "QUANTITY DESCRIPTION UNIT PRICE TOTAL"; FileInfo imgFile = new FileInfo(path); FileInfo outputFile = new FileInfo(GetTargetDirectory() + "hocrOutputFromHalftoneFile.hocr"); tesseractReader.DoTesseractOcr(imgFile, outputFile, OutputFormat.HOCR); IDictionary <int, IList <TextInfo> > pageData = TesseractHelper.ParseHocrFile(JavaCollectionsUtil.SingletonList <FileInfo>(outputFile), TextPositioning.BY_WORDS); NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected01)); NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected02)); NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected03)); NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected04)); NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected05)); NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected06)); NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected07)); pageData = TesseractHelper.ParseHocrFile(JavaCollectionsUtil.SingletonList <FileInfo>(outputFile), TextPositioning .BY_LINES); NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected08)); NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected09)); }
static void Main(string[] args) { var jabberwock = Image.FromFile(Environment.CurrentDirectory + @"\Images\Jabberwock.JPG"); var text = TesseractHelper.OCRImageWithTesseract(jabberwock); Console.WriteLine(text); Console.ReadLine(); }
static void Main(string[] args) { var jabberwock = Image.FromFile(Environment.CurrentDirectory + @"\Images\Jabberwock.JPG"); var ocrResult = TesseractHelper.OCRImageWithTesseract(jabberwock); Console.WriteLine($"Confidence: {ocrResult.MeanConfidence}"); Console.WriteLine(ocrResult.OcrText); Console.ReadLine(); }
public static async Task Run( [BlobTrigger("testimages/{name}")] Stream imageToOCR, string name, TraceWriter log) { log.Info($"Blob Trigger fired for {name}"); var jabberwock = Image.FromStream(imageToOCR); var text = TesseractHelper.OCRImageWithTesseract(jabberwock); log.Info($"OCR Text: {text}"); }
public virtual void TestTesseract4OcrForOnePageWithHocrFormat() { String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg"; String expected = "619121"; FileInfo imgFile = new FileInfo(path); FileInfo outputFile = new FileInfo(GetTargetDirectory() + "testTesseract4OcrForOnePage.hocr"); tesseractReader.DoTesseractOcr(imgFile, outputFile, OutputFormat.HOCR); IDictionary <int, IList <TextInfo> > pageData = TesseractHelper.ParseHocrFile(JavaCollectionsUtil.SingletonList <FileInfo>(outputFile), tesseractReader.GetTesseract4OcrEngineProperties().GetTextPositioning()); String result = GetTextFromPage(pageData.Get(1)); NUnit.Framework.Assert.AreEqual(expected, result.Trim()); }
public static void Run([TimerTrigger("0 0 * * * *", RunOnStartup = true)] TimerInfo myTimer, TraceWriter log, ExecutionContext context) { log.Info($"C# Timer trigger function executed at: {DateTime.Now}"); try { var jabberwock = Image.FromFile(context.FunctionAppDirectory + @"\Images\Jabberwock.JPG"); var ocrResult = TesseractHelper.OCRImageWithTesseract(jabberwock); log.Info($"Confidence: {ocrResult.MeanConfidence}"); log.Info(ocrResult.OcrText); } catch (Exception ex) { log.Error($"Exception: {ex}"); } log.Info($"C# Timer trigger function finished at: {DateTime.Now}"); }
static void Main(string[] args) { // Simple Version var jabberwock = Image.FromFile(Environment.CurrentDirectory + @"\Images\Jabberwock.JPG"); var text = TesseractHelper.OCRImageText(jabberwock); Console.WriteLine(text.TrimEnd()); Console.WriteLine("---------------------------------------------------------------------"); // More complex version showing JSON result. You can use getHOCR or getXHTML as well var projback = Image.FromFile(Environment.CurrentDirectory + @"\Images\ProjectBackground.png"); var result = TesseractHelper.OCRImageResult(projback, getJSON: true); Console.WriteLine(result.Text); Console.WriteLine(); Console.WriteLine($"Mean Confidence: {result.MeanConfidence}"); Console.WriteLine(); Console.WriteLine(result.JSON); Console.WriteLine(); Console.WriteLine("Press enter to exit..."); Console.ReadLine(); }
public static void Main(string[] args) { if (args.Any()) { var files = Directory.GetFiles(args[0]); if (files.Any()) { Console.WriteLine("{0,-15} {1,15} {2,35} {3,25} {4,20}", "TIME", "Nº ELEITORAL", "NOME", "OCR", "MATCH TYPE"); foreach (var imageFilePath in Directory.GetFiles(args[0])) { var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(imageFilePath); if (fileNameWithoutExtension != null) { var candidato = fileNameWithoutExtension.Split('_'); var santinhoPolitico = new SantinhoPolitico { ImageFilePath = imageFilePath, NumeroEleitoral = int.Parse(candidato[0]), NomeCandidato = candidato[1] }; #region Tesseract santinhoPolitico.TextoTesseract = TesseractHelper.UploadAndRecognizeImage(imageFilePath); santinhoPolitico.MatchTesseract = PesquisarCandidatoTexto(santinhoPolitico.NomeCandidato, santinhoPolitico.NumeroEleitoral, santinhoPolitico.TextoTesseract); switch (santinhoPolitico.MatchTesseract) { case MatchType.Nome: MatchesTesseract.QtdeMatchesNome++; break; case MatchType.NumeroEleitoral: MatchesTesseract.QtdeMatchesNumeroEleitoral++; break; case MatchType.NomeENumeroEleitoral: MatchesTesseract.QtdeMatchesNomeENumeroEleitoral++; break; } Console.WriteLine("{0,-15} {1,15} {2,35} {3,25} {4,20}", DateTime.Now.ToLongTimeString(), santinhoPolitico.NumeroEleitoral, santinhoPolitico.NomeCandidato, "Tesseract", santinhoPolitico.MatchTesseract); #endregion SantinhosPoliticos.Add(santinhoPolitico); } } } var t = MainAsync(); t.Wait(); ExportResultToExcel(); } else { Console.WriteLine("O diretório não foi informado."); } Console.WriteLine("Precione qualquer tecla para encerrar."); Console.ReadKey(); }