Exemple #1
0
        /// <summary>
        /// Processes a file using ResultRenderers.
        /// </summary>
        /// <param name="renderer"></param>
        /// <param name="filename"></param>
        private void ProcessImageFile(IResultRenderer renderer, string filename)
        {
            IEnumerable <string> configs_file = new List <string>()
            {
                CONFIGS_FILE
            };

            using (TesseractEngine engine = new TesseractEngine(Datapath, Language, EngineMode, configs_file))
            {
                var imageName = Path.GetFileNameWithoutExtension(filename);

                using (var pixA = LoadPixArray(filename))
                {
                    using (renderer.BeginDocument(imageName))
                    {
                        foreach (var pix in pixA)
                        {
                            using (var page = engine.Process(pix, imageName))
                            {
                                var addedPage = renderer.AddPage(page);
                            }
                        }
                    }
                }
            }
        }
Exemple #2
0
        private void ProcessByPage(string pFilePath)
        {
            string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(pFilePath);
            string caminhoPdf = Path.Combine(txbDirSaida.Text, fileNameWithoutExtension);                //Caminho onde ira salvar o pdf com OCR sem informar extensao

            using (IResultRenderer render = ResultRenderer.CreatePdfRenderer(caminhoPdf, _tessDataPath)) //Caminho pdf e caminho para a fonte do pdf
                using (PixArray pages = PixArray.LoadMultiPageTiffFromFile(pFilePath))                   //Carrega todas as páginas do tiff
                    using (render.BeginDocument(fileNameWithoutExtension))                               //Cria o pdf
                    {
                        List <Task>            lstTasksRunning   = new List <Task>();
                        SortedList <int, Page> lstProcessedPages = new SortedList <int, Page>();
                        int currentPage = 0;
                        foreach (Pix page in pages)
                        {
                            int pageIndex = currentPage;
                            lstTasksRunning.Add(Task.Factory.StartNew(() =>
                            {
                                using (TesseractEngine tesseract = new TesseractEngine(_tessDataPath, "por")) //Caminho da pasta com arquivos de config/ idioma do OCR
                                {
                                    Page pagina = tesseract.Process(page, Path.GetFileNameWithoutExtension(fileNameWithoutExtension));
                                    lstProcessedPages.Add(pageIndex, pagina); //Processa o arquivo podendo retirar as informacoes de OCR etc.
                                }
                            }));
                            currentPage++;
                        }

                        Task.WaitAll(lstTasksRunning.ToArray());
                        foreach (var pageKeyValue in lstProcessedPages)
                        {
                            render.AddPage(pageKeyValue.Value); //Adiciona a pagina
                        }
                    }
        }
        private void ProcessImageFile(IResultRenderer renderer, string filename)
        {
            var imageName = Path.GetFileNameWithoutExtension(filename);

            using (var pixA = ReadImageFileIntoPixArray(filename))
            {
                int expectedPageNumber = -1;
                using (renderer.BeginDocument(imageName))
                {
                    Assert.AreEqual(renderer.PageNumber, expectedPageNumber);
                    foreach (var pix in pixA)
                    {
                        using (var page = _engine.Process(pix, imageName))
                        {
                            var addedPage = renderer.AddPage(page);
                            expectedPageNumber++;

                            Assert.That(addedPage, Is.True);
                            Assert.That(renderer.PageNumber, Is.EqualTo(expectedPageNumber));
                        }
                    }
                }

                Assert.That(renderer.PageNumber, Is.EqualTo(expectedPageNumber));
            }
        }
        private void ProcessMultipageTiff(IResultRenderer renderer, string filename)
        {
            var imageName = Path.GetFileNameWithoutExtension(filename);

            using (var pixA = PixArray.LoadMultiPageTiffFromFile(filename))
            {
                var expectedPageNumber = -1;
                using (renderer.BeginDocument(imageName))
                {
                    Assert.AreEqual(renderer.PageNumber, expectedPageNumber);
                    foreach (var pix in pixA)
                    {
                        using (var page = _engine.Process(pix))
                        {
                            var addedPage = renderer.AddPage(page);
                            expectedPageNumber++;

                            Assert.IsTrue(addedPage);
                            Assert.AreEqual(renderer.PageNumber, expectedPageNumber);
                        }
                    }
                }

                Assert.AreEqual(renderer.PageNumber, expectedPageNumber);
            }
        }
 public PicklerApplication(
     IArgumentParser argumentParser,
     IFileLoader fileLoader,
     IFeatureExtractor featureExtractor,
     ITrxResultsParser trxResultsParser,
     IResultRenderer resultRenderer)
 {
     _argumentParser   = argumentParser;
     _fileLoader       = fileLoader;
     _featureExtractor = featureExtractor;
     _trxResultsParser = trxResultsParser;
     _resultRenderer   = resultRenderer;
 }
Exemple #6
0
 public void Render(IResultRenderer renderer)
 {
     foreach (var token in _tokens)
     {
         if (token.Changed)
         {
             renderer.AppendChanged(token.Text);
         }
         else
         {
             renderer.AppendUnchanged(token.Text);
         }
     }
     renderer.End();
 }
Exemple #7
0
        /// <summary>
        /// Processes a file using ResultRenderers.
        /// </summary>
        /// <param name="renderer"></param>
        /// <param name="filename"></param>
        private void ProcessImageFile(IResultRenderer renderer, string filename)
        {
            IEnumerable <string> configs_file = new List <string>()
            {
                CONFIGS_FILE
            };

            using (TesseractEngine engine = new TesseractEngine(Datapath, Language, EngineMode, configs_file))
            {
                ControlParameters(engine);
                Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), PageSegMode);

                var imageName = Path.GetFileNameWithoutExtension(filename);

                using (var pixA = LoadPixArray(filename))
                {
                    using (renderer.BeginDocument(imageName))
                    {
                        foreach (var pix in pixA)
                        {
                            Pix pixd = null;

                            try
                            {
                                if (Deskew)
                                {
                                    pixd = pix.Deskew(new ScewSweep(range: 45), Pix.DefaultBinarySearchReduction, Pix.DefaultBinaryThreshold, out Scew scew);
                                }
                                using (var page = engine.Process(pixd ?? pix, imageName, psm))
                                {
                                    var addedPage = renderer.AddPage(page);
                                }
                            }
                            finally
                            {
                                if (pixd != null)
                                {
                                    ((IDisposable)pixd).Dispose();
                                }
                            }
                        }
                    }
                }
            }
        }
        private void ProcessFile(IResultRenderer renderer, string filename)
        {
            var imageName = Path.GetFileNameWithoutExtension(filename);

            using (var pix = Pix.LoadFromFile(filename)) {
                using (renderer.BeginDocument(imageName)) {
                    Assert.AreEqual(renderer.PageNumber, -1);
                    using (var page = _engine.Process(pix, imageName)) {
                        var addedPage = renderer.AddPage(page);

                        Assert.That(addedPage, Is.True);
                        Assert.That(renderer.PageNumber, Is.EqualTo(0));
                    }
                }

                Assert.AreEqual(renderer.PageNumber, 0);
            }
        }
Exemple #9
0
 private void ProcessByFile(string pFilePath)
 {
     using (TesseractEngine tesseract = new TesseractEngine(_tessDataPath, "por")) //Caminho da pasta com arquivos de config/ idioma do OCR
     {
         string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(pFilePath);
         string caminhoPdf = Path.Combine(txbDirSaida.Text, fileNameWithoutExtension);                //Caminho onde ira salvar o pdf com OCR sem informar extensao
         using (IResultRenderer render = ResultRenderer.CreatePdfRenderer(caminhoPdf, _tessDataPath)) //Caminho pdf e caminho para a fonte do pdf
             using (PixArray pages = PixArray.LoadMultiPageTiffFromFile(pFilePath))                   //Carrega todas as páginas do tiff
                 using (render.BeginDocument(fileNameWithoutExtension))                               //Cria o pdf
                 {
                     foreach (Pix page in pages)
                     {
                         using (page)
                             using (Page processedPage = tesseract.Process(page, Path.GetFileNameWithoutExtension(fileNameWithoutExtension))) //Processa o arquivo podendo retirar as informacoes de OCR etc.
                                 render.AddPage(processedPage);                                                                               //Adiciona a pagina
                     }
                 }
     }
 }
        private static void ProcessImageFile(IResultRenderer renderer, string filename)
        {
            var fileName = Path.GetFileNameWithoutExtension(filename);

            using (var engine = new TesseractEngine(TesseractData, "eng", EngineMode.Default))
            {
                using (var pixA = ReadImageFileIntoPixArray(filename))
                {
                    int expectedPageNumber = -1;

                    foreach (var pix in pixA)
                    {
                        using (var page = engine.Process(pix, fileName))
                        {
                            using (renderer.BeginDocument("multipage_tiff_example"))
                            {
                                var addedPage = renderer.AddPage(page);
                                expectedPageNumber++;
                            }
                        }
                    }
                }
            }
        }
        /// <summary>
        ///     Creates renderers for specified output formats.
        /// </summary>
        /// <param name="outputbase"></param>
        /// <param name="dataPath">The directory containing the pdf font data, normally same as your tessdata directory.</param>
        /// <param name="outputFormats"></param>
        /// <returns></returns>
        public static IResultRenderer CreateRenderers(string outputbase, string dataPath,
                                                      List <RenderedFormat> outputFormats)
        {
            IResultRenderer renderer = null;

            foreach (var format in outputFormats)
            {
                switch (format)
                {
                case RenderedFormat.TEXT:
                    if (renderer == null)
                    {
                        renderer = CreateTextRenderer(outputbase);
                    }
                    else
                    {
                        TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle,
                                                            new TextResultRenderer(outputbase).Handle);
                    }
                    break;

                case RenderedFormat.HOCR:
                    if (renderer == null)
                    {
                        renderer = CreateHOcrRenderer(outputbase);
                    }
                    else
                    {
                        TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle,
                                                            new HOcrResultRenderer(outputbase).Handle);
                    }
                    break;

                case RenderedFormat.PDF:
                    if (renderer == null)
                    {
                        renderer = CreatePdfRenderer(outputbase, dataPath, false);
                    }
                    else
                    {
                        TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle,
                                                            new PdfResultRenderer(outputbase, dataPath, false).Handle);
                    }
                    break;

                case RenderedFormat.BOX:
                    if (renderer == null)
                    {
                        renderer = CreateBoxRenderer(outputbase);
                    }
                    else
                    {
                        TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle,
                                                            new BoxResultRenderer(outputbase).Handle);
                    }
                    break;

                case RenderedFormat.UNLV:
                    if (renderer == null)
                    {
                        renderer = CreateUnlvRenderer(outputbase);
                    }
                    else
                    {
                        TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle,
                                                            new UnlvResultRenderer(outputbase).Handle);
                    }
                    break;
                }
            }

            return(renderer);
        }
Exemple #12
0
        public static void Main(string[] args)
        {
            var testImagePath = "./closeup_scan.png";//"./phototest.tif";

            if (args.Length > 0)
            {
                testImagePath = args[0];
            }

            try
            {
                using (IResultRenderer renderer = ResultRenderer.CreatePdfRenderer(@"./output", @"./tessdata"))
                {
                    using (renderer.BeginDocument("PDF Test"))
                    {
                        using (TesseractEngine engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.TesseractAndCube))
                        {
                            var list = Directory.GetFiles(@"images");
                            foreach (var item in list)
                            {
                                using (var tifFile = new Bitmap(item))
                                {
                                    //using (var img = PixConverter.ToPix(tifFile))
                                    //{
                                    using (var page = engine.Process(tifFile, "test"))
                                    {
                                        renderer.AddPage(page);
                                    }
                                    //}
                                }
                            }
                        }
                    }
                }
                //using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                //{
                //    using (var img = Pix.LoadFromFile(testImagePath))
                //    {
                //        using (var page = engine.Process(img))
                //        {
                //            var text = page.GetText();
                //            Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());

                //            Console.WriteLine("Text (GetText): \r\n{0}", text);
                //            Console.WriteLine("Text (iterator):");
                //            using (var iter = page.GetIterator())
                //            {
                //                iter.Begin();

                //                do
                //                {
                //                    do
                //                    {
                //                        do
                //                        {
                //                            do
                //                            {
                //                                if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
                //                                {
                //                                    Console.WriteLine("<BLOCK>");
                //                                }

                //                                Console.Write(iter.GetText(PageIteratorLevel.Word));
                //                                Console.Write(" ");

                //                                if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
                //                                {
                //                                    Console.WriteLine();
                //                                }
                //                            } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));

                //                            if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
                //                            {
                //                                Console.WriteLine();
                //                            }
                //                        } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                //                    } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
                //                } while (iter.Next(PageIteratorLevel.Block));
                //            }
                //        }
                //    }
                //}
            }
            catch (Exception e)
            {
                Trace.TraceError(e.ToString());
                Console.WriteLine("Unexpected Error: " + e.Message);
                Console.WriteLine("Details: ");
                Console.WriteLine(e.ToString());
            }
            Console.Write("Press any key to continue . . . ");
            Console.ReadKey(true);
        }