public virtual void HocrOutputFromHalftoneFile()
        {
            String   path       = TEST_IMAGES_DIRECTORY + "halftone.jpg";
            String   expected01 = "Silliness";
            String   expected02 = "Enablers";
            String   expected03 = "You";
            String   expected04 = "Middle";
            String   expected05 = "André";
            String   expected06 = "QUANTITY";
            String   expected07 = "DESCRIPTION";
            String   expected08 = "Silliness Enablers";
            String   expected09 = "QUANTITY DESCRIPTION UNIT PRICE TOTAL";
            FileInfo imgFile    = new FileInfo(path);
            FileInfo outputFile = new FileInfo(GetTargetDirectory() + "hocrOutputFromHalftoneFile.hocr");

            tesseractReader.DoTesseractOcr(imgFile, outputFile, OutputFormat.HOCR);
            IDictionary <int, IList <TextInfo> > pageData = TesseractHelper.ParseHocrFile(JavaCollectionsUtil.SingletonList
                                                                                          <FileInfo>(outputFile), TextPositioning.BY_WORDS);

            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected01));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected02));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected03));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected04));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected05));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected06));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected07));
            pageData = TesseractHelper.ParseHocrFile(JavaCollectionsUtil.SingletonList <FileInfo>(outputFile), TextPositioning
                                                     .BY_LINES);
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected08));
            NUnit.Framework.Assert.IsTrue(FindTextInPageData(pageData, 1, expected09));
        }
示例#2
0
        static void Main(string[] args)
        {
            var jabberwock = Image.FromFile(Environment.CurrentDirectory + @"\Images\Jabberwock.JPG");
            var text       = TesseractHelper.OCRImageWithTesseract(jabberwock);

            Console.WriteLine(text);
            Console.ReadLine();
        }
        static void Main(string[] args)
        {
            var jabberwock = Image.FromFile(Environment.CurrentDirectory + @"\Images\Jabberwock.JPG");
            var ocrResult  = TesseractHelper.OCRImageWithTesseract(jabberwock);

            Console.WriteLine($"Confidence: {ocrResult.MeanConfidence}");
            Console.WriteLine(ocrResult.OcrText);
            Console.ReadLine();
        }
示例#4
0
        public static async Task Run(
            [BlobTrigger("testimages/{name}")] Stream imageToOCR,
            string name,
            TraceWriter log)
        {
            log.Info($"Blob Trigger fired for {name}");
            var jabberwock = Image.FromStream(imageToOCR);
            var text       = TesseractHelper.OCRImageWithTesseract(jabberwock);

            log.Info($"OCR Text: {text}");
        }
        public virtual void TestTesseract4OcrForOnePageWithHocrFormat()
        {
            String   path       = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
            String   expected   = "619121";
            FileInfo imgFile    = new FileInfo(path);
            FileInfo outputFile = new FileInfo(GetTargetDirectory() + "testTesseract4OcrForOnePage.hocr");

            tesseractReader.DoTesseractOcr(imgFile, outputFile, OutputFormat.HOCR);
            IDictionary <int, IList <TextInfo> > pageData = TesseractHelper.ParseHocrFile(JavaCollectionsUtil.SingletonList
                                                                                          <FileInfo>(outputFile), tesseractReader.GetTesseract4OcrEngineProperties().GetTextPositioning());
            String result = GetTextFromPage(pageData.Get(1));

            NUnit.Framework.Assert.AreEqual(expected, result.Trim());
        }
 public static void Run([TimerTrigger("0 0 * * * *", RunOnStartup = true)] TimerInfo myTimer, TraceWriter log, ExecutionContext context)
 {
     log.Info($"C# Timer trigger function executed at: {DateTime.Now}");
     try
     {
         var jabberwock = Image.FromFile(context.FunctionAppDirectory + @"\Images\Jabberwock.JPG");
         var ocrResult  = TesseractHelper.OCRImageWithTesseract(jabberwock);
         log.Info($"Confidence: {ocrResult.MeanConfidence}");
         log.Info(ocrResult.OcrText);
     }
     catch (Exception ex)
     {
         log.Error($"Exception: {ex}");
     }
     log.Info($"C# Timer trigger function finished at: {DateTime.Now}");
 }
示例#7
0
        static void Main(string[] args)
        {
            // Simple Version
            var jabberwock = Image.FromFile(Environment.CurrentDirectory + @"\Images\Jabberwock.JPG");
            var text       = TesseractHelper.OCRImageText(jabberwock);

            Console.WriteLine(text.TrimEnd());

            Console.WriteLine("---------------------------------------------------------------------");

            // More complex version showing JSON result. You can use getHOCR or getXHTML as well
            var projback = Image.FromFile(Environment.CurrentDirectory + @"\Images\ProjectBackground.png");
            var result   = TesseractHelper.OCRImageResult(projback, getJSON: true);

            Console.WriteLine(result.Text);
            Console.WriteLine();
            Console.WriteLine($"Mean Confidence: {result.MeanConfidence}");
            Console.WriteLine();
            Console.WriteLine(result.JSON);
            Console.WriteLine();

            Console.WriteLine("Press enter to exit...");
            Console.ReadLine();
        }
示例#8
0
        public static void Main(string[] args)
        {
            if (args.Any())
            {
                var files = Directory.GetFiles(args[0]);

                if (files.Any())
                {
                    Console.WriteLine("{0,-15} {1,15} {2,35} {3,25} {4,20}", "TIME", "Nº ELEITORAL", "NOME", "OCR",
                                      "MATCH TYPE");

                    foreach (var imageFilePath in Directory.GetFiles(args[0]))
                    {
                        var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(imageFilePath);

                        if (fileNameWithoutExtension != null)
                        {
                            var candidato = fileNameWithoutExtension.Split('_');

                            var santinhoPolitico = new SantinhoPolitico
                            {
                                ImageFilePath   = imageFilePath,
                                NumeroEleitoral = int.Parse(candidato[0]),
                                NomeCandidato   = candidato[1]
                            };

                            #region Tesseract

                            santinhoPolitico.TextoTesseract = TesseractHelper.UploadAndRecognizeImage(imageFilePath);

                            santinhoPolitico.MatchTesseract = PesquisarCandidatoTexto(santinhoPolitico.NomeCandidato,
                                                                                      santinhoPolitico.NumeroEleitoral, santinhoPolitico.TextoTesseract);

                            switch (santinhoPolitico.MatchTesseract)
                            {
                            case MatchType.Nome:
                                MatchesTesseract.QtdeMatchesNome++;
                                break;

                            case MatchType.NumeroEleitoral:
                                MatchesTesseract.QtdeMatchesNumeroEleitoral++;
                                break;

                            case MatchType.NomeENumeroEleitoral:
                                MatchesTesseract.QtdeMatchesNomeENumeroEleitoral++;
                                break;
                            }

                            Console.WriteLine("{0,-15} {1,15} {2,35} {3,25} {4,20}", DateTime.Now.ToLongTimeString(),
                                              santinhoPolitico.NumeroEleitoral, santinhoPolitico.NomeCandidato, "Tesseract",
                                              santinhoPolitico.MatchTesseract);

                            #endregion

                            SantinhosPoliticos.Add(santinhoPolitico);
                        }
                    }
                }
                var t = MainAsync();
                t.Wait();
                ExportResultToExcel();
            }
            else
            {
                Console.WriteLine("O diretório não foi informado.");
            }
            Console.WriteLine("Precione qualquer tecla para encerrar.");
            Console.ReadKey();
        }