Example #1
0
        private void ProcessByPage(string pFilePath)
        {
            string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(pFilePath);
            string caminhoPdf = Path.Combine(txbDirSaida.Text, fileNameWithoutExtension);                //Caminho onde ira salvar o pdf com OCR sem informar extensao

            using (IResultRenderer render = ResultRenderer.CreatePdfRenderer(caminhoPdf, _tessDataPath)) //Caminho pdf e caminho para a fonte do pdf
                using (PixArray pages = PixArray.LoadMultiPageTiffFromFile(pFilePath))                   //Carrega todas as páginas do tiff
                    using (render.BeginDocument(fileNameWithoutExtension))                               //Cria o pdf
                    {
                        List <Task>            lstTasksRunning   = new List <Task>();
                        SortedList <int, Page> lstProcessedPages = new SortedList <int, Page>();
                        int currentPage = 0;
                        foreach (Pix page in pages)
                        {
                            int pageIndex = currentPage;
                            lstTasksRunning.Add(Task.Factory.StartNew(() =>
                            {
                                using (TesseractEngine tesseract = new TesseractEngine(_tessDataPath, "por")) //Caminho da pasta com arquivos de config/ idioma do OCR
                                {
                                    Page pagina = tesseract.Process(page, Path.GetFileNameWithoutExtension(fileNameWithoutExtension));
                                    lstProcessedPages.Add(pageIndex, pagina); //Processa o arquivo podendo retirar as informacoes de OCR etc.
                                }
                            }));
                            currentPage++;
                        }

                        Task.WaitAll(lstTasksRunning.ToArray());
                        foreach (var pageKeyValue in lstProcessedPages)
                        {
                            render.AddPage(pageKeyValue.Value); //Adiciona a pagina
                        }
                    }
        }
Example #2
0
 public void CanCreatePixArray()
 {
     using (var pixA = PixArray.Create(0))
     {
         Assert.That(pixA.Count, Is.EqualTo(0));
     }
 }
        private void ProcessMultipageTiff(IResultRenderer renderer, string filename)
        {
            var imageName = Path.GetFileNameWithoutExtension(filename);

            using (var pixA = PixArray.LoadMultiPageTiffFromFile(filename))
            {
                var expectedPageNumber = -1;
                using (renderer.BeginDocument(imageName))
                {
                    Assert.AreEqual(renderer.PageNumber, expectedPageNumber);
                    foreach (var pix in pixA)
                    {
                        using (var page = _engine.Process(pix))
                        {
                            var addedPage = renderer.AddPage(page);
                            expectedPageNumber++;

                            Assert.IsTrue(addedPage);
                            Assert.AreEqual(renderer.PageNumber, expectedPageNumber);
                        }
                    }
                }

                Assert.AreEqual(renderer.PageNumber, expectedPageNumber);
            }
        }
Example #4
0
        /// <summary>
        /// Processes a file using ResultRenderers.
        /// </summary>
        /// <param name="renderer"></param>
        /// <param name="filename"></param>
        private void ProcessTiffFile(IResultRenderer renderer, string filename)
        {
            IEnumerable <string> configs_file = new List <string>()
            {
                CONFIGS_FILE
            };

            using (TesseractEngine engine = new TesseractEngine(Datapath, Language, EngineMode, configs_file))
            {
                var imageName = Path.GetFileNameWithoutExtension(filename);

                using (var pixA = PixArray.LoadMultiPageTiffFromFile(filename))
                {
                    using (renderer.BeginDocument(imageName))
                    {
                        foreach (var pix in pixA)
                        {
                            using (var page = engine.Process(pix, imageName))
                            {
                                var addedPage = renderer.AddPage(page);
                            }
                        }
                    }
                }
            }
        }
Example #5
0
 public void CanCreatePixArray()
 {
     using (var pixA = PixArray.Create(0))
     {
         Assert.AreEqual(pixA.Count, 0);
     }
 }
 private static PixArray ReadImageFileIntoPixArray(string filename)
 {
     if (filename.ToLower().EndsWith(".tif") || filename.ToLower().EndsWith(".tiff"))
     {
         return(PixArray.LoadMultiPageTiffFromFile(filename));
     }
     else
     {
         PixArray pa = PixArray.Create(0);
         pa.Add(Pix.LoadFromFile(filename));
         return(pa);
     }
 }
Example #7
0
 private PixArray LoadPixArray(string filename)
 {
     if (filename.ToLower().EndsWith(".tif") || filename.ToLower().EndsWith(".tiff"))
     {
         return(PixArray.LoadMultiPageTiffFromFile(filename));
     }
     else
     {
         PixArray pixA = PixArray.Create(0);
         pixA.Add(Pix.LoadFromFile(filename));
         return(pixA);
     }
 }
Example #8
0
        public static void Main(string[] args)
        {
            string testImagePath;

            if (args.Length > 0)
            {
                testImagePath = args[0];
            }
            else
            {
                testImagePath = "../../cms_1500_02-12.png";
            }

            try
            {
                using (var engine = new TesseractEngine(@"C:\Users\Chris\Documents\GitHub\ComboTessFormSharp\ConsoleDemo\", "eng", EngineMode.TesseractAndCube))
                {
                    var imageFile = System.Drawing.Image.FromFile(testImagePath);
                    if (imageFile.GetFrameCount(System.Drawing.Imaging.FrameDimension.Page) > 1)
                    {
                        using (var imgPages = PixArray.LoadMultiPageTiffFromFile(testImagePath))
                        {
                            int pageNum = 1;
                            foreach (Tesseract.Pix img in imgPages)
                            {
                                processImage(engine, img, testImagePath, pageNum);
                                pageNum++;
                            }
                        }
                    }
                    else
                    {
                        using (var img = Pix.LoadFromFile(testImagePath))
                        {
                            processImage(engine, img, testImagePath, 1);
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Trace.TraceError(e.ToString());
                Console.WriteLine("Unexpected Error: " + e.Message);
                Console.WriteLine("Details: ");
                Console.WriteLine(e.ToString());
            }
            Console.Write("Press any key to continue . . . ");
            Console.ReadKey(true);
        }
Example #9
0
        public void CanRemovePixFromArray()
        {
            var sourcePixPath = TestFilePath(@"Ocr\phototest.tif");

            using (var pixA = PixArray.Create(0))
            {
                using (var sourcePix = Pix.LoadFromFile(sourcePixPath))
                {
                    pixA.Add(sourcePix);
                }

                pixA.Remove(0);
                Assert.That(pixA.Count, Is.EqualTo(0));
            }
        }
Example #10
0
        public void CanClearPixArray()
        {
            var sourcePixPath = TestFilePath(@"Ocr\phototest.tif");

            using (var pixA = PixArray.Create(0))
            {
                using (var sourcePix = Pix.LoadFromFile(sourcePixPath))
                {
                    pixA.Add(sourcePix);
                }

                pixA.Clear();

                Assert.AreEqual(pixA.Count, 0);
            }
        }
Example #11
0
        public void CanAddPixToPixArray()
        {
            var sourcePixPath = TestFilePath(@"Ocr\phototest.tif");

            using (var pixA = PixArray.Create(0))
            {
                using (var sourcePix = Pix.LoadFromFile(sourcePixPath))
                {
                    pixA.Add(sourcePix);
                    Assert.That(pixA.Count, Is.EqualTo(1));
                    using (var targetPix = pixA.GetPix(0))
                    {
                        Assert.That(targetPix, Is.EqualTo(sourcePix));
                    }
                }
            }
        }
Example #12
0
        private PixArray LoadPixArray(string filename)
        {
            if (filename.ToLower().EndsWith(".tif") || filename.ToLower().EndsWith(".tiff"))
            {
                return(PixArray.LoadMultiPageTiffFromFile(filename));
            }
            else
            {
                PixArray      pixA      = PixArray.Create(0);
                IList <Image> imageList = ImageIOHelper.GetImageList(new FileInfo(filename));
                foreach (Image image in imageList)
                {
                    pixA.Add(ConvertBitmapToPix(image));
                }

                return(pixA);
            }
        }
Example #13
0
        public void CanProcessMultipageTif()
        {
            using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) {
                using (var pixA = PixArray.LoadMultiPageTiffFromFile("./Data/processing/multi-page.tif")) {
                    int i = 1;
                    foreach (var pix in pixA)
                    {
                        using (var page = engine.Process(pix)) {
                            var text = page.GetText().Trim();

                            string expectedText = String.Format("Page {0}", i);
                            Assert.That(text, Is.EqualTo(expectedText));
                        }
                        i++;
                    }
                }
            }
        }
Example #14
0
        public void CanParseMultipageTif()
        {
            using (var engine = CreateEngine()) {
                using (var pixA = PixArray.LoadMultiPageTiffFromFile(TestFilePath("./processing/multi-page.tif"))) {
                    int i = 1;
                    foreach (var pix in pixA)
                    {
                        using (var page = engine.Process(pix)) {
                            var text = page.GetText().Trim();

                            string expectedText = String.Format("Page {0}", i);
                            Assert.That(text, Is.EqualTo(expectedText));
                        }
                        i++;
                    }
                }
            }
        }
Example #15
0
 private void ProcessByFile(string pFilePath)
 {
     using (TesseractEngine tesseract = new TesseractEngine(_tessDataPath, "por")) //Caminho da pasta com arquivos de config/ idioma do OCR
     {
         string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(pFilePath);
         string caminhoPdf = Path.Combine(txbDirSaida.Text, fileNameWithoutExtension);                //Caminho onde ira salvar o pdf com OCR sem informar extensao
         using (IResultRenderer render = ResultRenderer.CreatePdfRenderer(caminhoPdf, _tessDataPath)) //Caminho pdf e caminho para a fonte do pdf
             using (PixArray pages = PixArray.LoadMultiPageTiffFromFile(pFilePath))                   //Carrega todas as páginas do tiff
                 using (render.BeginDocument(fileNameWithoutExtension))                               //Cria o pdf
                 {
                     foreach (Pix page in pages)
                     {
                         using (page)
                             using (Page processedPage = tesseract.Process(page, Path.GetFileNameWithoutExtension(fileNameWithoutExtension))) //Processa o arquivo podendo retirar as informacoes de OCR etc.
                                 render.AddPage(processedPage);                                                                               //Adiciona a pagina
                     }
                 }
     }
 }
Example #16
0
        private void FillPagesData(ATAPY.Document.Data.Core.Document document, string pathToImage, string language)
        {
            var extension = Path.GetExtension(pathToImage);

            using (var engine = new TesseractEngine(ENGINE_DATAPATH, language))
            {
                if (IsSinglePageImage(extension))
                {
                    Pix pageData = null;
                    try
                    {
                        pageData = Pix.LoadFromFile(pathToImage);
                        FillDocumentPage(document, language, engine, pageData);
                    }
                    finally
                    {
                        pageData?.Dispose();
                    }
                }
                else if (IsMultiPageImage(extension))
                {
                    PixArray pixes = null;
                    try
                    {
                        pixes = PixArray.LoadMultiPageTiffFromFile(pathToImage);
                        foreach (Pix pageData in pixes)
                        {
                            FillDocumentPage(document, language, engine, pageData);
                        }
                    }
                    finally
                    {
                        pixes?.Dispose();
                    }
                }
                else
                {
                    throw new FormatException("Please specify path to the image file");
                }
            }
        }
Example #17
0
        protected override void Execute(NativeActivityContext context)
        {
            try
            {
                string        filepath = ImagePath.Get(context);
                List <string> result   = new List <string>();

                using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                {
                    using (var pixA = PixArray.LoadMultiPageTiffFromFile(filepath))
                    {
                        int i = 1;
                        foreach (var pix in pixA)
                        {
                            using (var page = engine.Process(pix))
                            {
                                var text = page.GetText().Trim();

                                string expectedText = String.Format("Page {0}", i);
                                result.Add(text);
                            }
                            i++;
                        }
                    }
                }
                string[] strvalues = result.ToArray();
                if ((strvalues != null) && (strvalues.Length > 0))
                {
                    TextValues.Set(context, strvalues);
                }
            }
            catch (Exception ex)
            {
                Log.Logger.LogData(ex.Message + " in activity MultipageTifToText", LogLevel.Error);
                if (!ContinueOnError)
                {
                    context.Abort();
                }
            }
        }