private void ProcessByPage(string pFilePath) { string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(pFilePath); string caminhoPdf = Path.Combine(txbDirSaida.Text, fileNameWithoutExtension); //Caminho onde ira salvar o pdf com OCR sem informar extensao using (IResultRenderer render = ResultRenderer.CreatePdfRenderer(caminhoPdf, _tessDataPath)) //Caminho pdf e caminho para a fonte do pdf using (PixArray pages = PixArray.LoadMultiPageTiffFromFile(pFilePath)) //Carrega todas as páginas do tiff using (render.BeginDocument(fileNameWithoutExtension)) //Cria o pdf { List <Task> lstTasksRunning = new List <Task>(); SortedList <int, Page> lstProcessedPages = new SortedList <int, Page>(); int currentPage = 0; foreach (Pix page in pages) { int pageIndex = currentPage; lstTasksRunning.Add(Task.Factory.StartNew(() => { using (TesseractEngine tesseract = new TesseractEngine(_tessDataPath, "por")) //Caminho da pasta com arquivos de config/ idioma do OCR { Page pagina = tesseract.Process(page, Path.GetFileNameWithoutExtension(fileNameWithoutExtension)); lstProcessedPages.Add(pageIndex, pagina); //Processa o arquivo podendo retirar as informacoes de OCR etc. } })); currentPage++; } Task.WaitAll(lstTasksRunning.ToArray()); foreach (var pageKeyValue in lstProcessedPages) { render.AddPage(pageKeyValue.Value); //Adiciona a pagina } } }
public void CanCreatePixArray() { using (var pixA = PixArray.Create(0)) { Assert.That(pixA.Count, Is.EqualTo(0)); } }
private void ProcessMultipageTiff(IResultRenderer renderer, string filename) { var imageName = Path.GetFileNameWithoutExtension(filename); using (var pixA = PixArray.LoadMultiPageTiffFromFile(filename)) { var expectedPageNumber = -1; using (renderer.BeginDocument(imageName)) { Assert.AreEqual(renderer.PageNumber, expectedPageNumber); foreach (var pix in pixA) { using (var page = _engine.Process(pix)) { var addedPage = renderer.AddPage(page); expectedPageNumber++; Assert.IsTrue(addedPage); Assert.AreEqual(renderer.PageNumber, expectedPageNumber); } } } Assert.AreEqual(renderer.PageNumber, expectedPageNumber); } }
/// <summary> /// Processes a file using ResultRenderers. /// </summary> /// <param name="renderer"></param> /// <param name="filename"></param> private void ProcessTiffFile(IResultRenderer renderer, string filename) { IEnumerable <string> configs_file = new List <string>() { CONFIGS_FILE }; using (TesseractEngine engine = new TesseractEngine(Datapath, Language, EngineMode, configs_file)) { var imageName = Path.GetFileNameWithoutExtension(filename); using (var pixA = PixArray.LoadMultiPageTiffFromFile(filename)) { using (renderer.BeginDocument(imageName)) { foreach (var pix in pixA) { using (var page = engine.Process(pix, imageName)) { var addedPage = renderer.AddPage(page); } } } } } }
public void CanCreatePixArray() { using (var pixA = PixArray.Create(0)) { Assert.AreEqual(pixA.Count, 0); } }
private static PixArray ReadImageFileIntoPixArray(string filename) { if (filename.ToLower().EndsWith(".tif") || filename.ToLower().EndsWith(".tiff")) { return(PixArray.LoadMultiPageTiffFromFile(filename)); } else { PixArray pa = PixArray.Create(0); pa.Add(Pix.LoadFromFile(filename)); return(pa); } }
private PixArray LoadPixArray(string filename) { if (filename.ToLower().EndsWith(".tif") || filename.ToLower().EndsWith(".tiff")) { return(PixArray.LoadMultiPageTiffFromFile(filename)); } else { PixArray pixA = PixArray.Create(0); pixA.Add(Pix.LoadFromFile(filename)); return(pixA); } }
public static void Main(string[] args) { string testImagePath; if (args.Length > 0) { testImagePath = args[0]; } else { testImagePath = "../../cms_1500_02-12.png"; } try { using (var engine = new TesseractEngine(@"C:\Users\Chris\Documents\GitHub\ComboTessFormSharp\ConsoleDemo\", "eng", EngineMode.TesseractAndCube)) { var imageFile = System.Drawing.Image.FromFile(testImagePath); if (imageFile.GetFrameCount(System.Drawing.Imaging.FrameDimension.Page) > 1) { using (var imgPages = PixArray.LoadMultiPageTiffFromFile(testImagePath)) { int pageNum = 1; foreach (Tesseract.Pix img in imgPages) { processImage(engine, img, testImagePath, pageNum); pageNum++; } } } else { using (var img = Pix.LoadFromFile(testImagePath)) { processImage(engine, img, testImagePath, 1); } } } } catch (Exception e) { Trace.TraceError(e.ToString()); Console.WriteLine("Unexpected Error: " + e.Message); Console.WriteLine("Details: "); Console.WriteLine(e.ToString()); } Console.Write("Press any key to continue . . . "); Console.ReadKey(true); }
public void CanRemovePixFromArray() { var sourcePixPath = TestFilePath(@"Ocr\phototest.tif"); using (var pixA = PixArray.Create(0)) { using (var sourcePix = Pix.LoadFromFile(sourcePixPath)) { pixA.Add(sourcePix); } pixA.Remove(0); Assert.That(pixA.Count, Is.EqualTo(0)); } }
public void CanClearPixArray() { var sourcePixPath = TestFilePath(@"Ocr\phototest.tif"); using (var pixA = PixArray.Create(0)) { using (var sourcePix = Pix.LoadFromFile(sourcePixPath)) { pixA.Add(sourcePix); } pixA.Clear(); Assert.AreEqual(pixA.Count, 0); } }
public void CanAddPixToPixArray() { var sourcePixPath = TestFilePath(@"Ocr\phototest.tif"); using (var pixA = PixArray.Create(0)) { using (var sourcePix = Pix.LoadFromFile(sourcePixPath)) { pixA.Add(sourcePix); Assert.That(pixA.Count, Is.EqualTo(1)); using (var targetPix = pixA.GetPix(0)) { Assert.That(targetPix, Is.EqualTo(sourcePix)); } } } }
private PixArray LoadPixArray(string filename) { if (filename.ToLower().EndsWith(".tif") || filename.ToLower().EndsWith(".tiff")) { return(PixArray.LoadMultiPageTiffFromFile(filename)); } else { PixArray pixA = PixArray.Create(0); IList <Image> imageList = ImageIOHelper.GetImageList(new FileInfo(filename)); foreach (Image image in imageList) { pixA.Add(ConvertBitmapToPix(image)); } return(pixA); } }
public void CanProcessMultipageTif() { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { using (var pixA = PixArray.LoadMultiPageTiffFromFile("./Data/processing/multi-page.tif")) { int i = 1; foreach (var pix in pixA) { using (var page = engine.Process(pix)) { var text = page.GetText().Trim(); string expectedText = String.Format("Page {0}", i); Assert.That(text, Is.EqualTo(expectedText)); } i++; } } } }
public void CanParseMultipageTif() { using (var engine = CreateEngine()) { using (var pixA = PixArray.LoadMultiPageTiffFromFile(TestFilePath("./processing/multi-page.tif"))) { int i = 1; foreach (var pix in pixA) { using (var page = engine.Process(pix)) { var text = page.GetText().Trim(); string expectedText = String.Format("Page {0}", i); Assert.That(text, Is.EqualTo(expectedText)); } i++; } } } }
private void ProcessByFile(string pFilePath) { using (TesseractEngine tesseract = new TesseractEngine(_tessDataPath, "por")) //Caminho da pasta com arquivos de config/ idioma do OCR { string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(pFilePath); string caminhoPdf = Path.Combine(txbDirSaida.Text, fileNameWithoutExtension); //Caminho onde ira salvar o pdf com OCR sem informar extensao using (IResultRenderer render = ResultRenderer.CreatePdfRenderer(caminhoPdf, _tessDataPath)) //Caminho pdf e caminho para a fonte do pdf using (PixArray pages = PixArray.LoadMultiPageTiffFromFile(pFilePath)) //Carrega todas as páginas do tiff using (render.BeginDocument(fileNameWithoutExtension)) //Cria o pdf { foreach (Pix page in pages) { using (page) using (Page processedPage = tesseract.Process(page, Path.GetFileNameWithoutExtension(fileNameWithoutExtension))) //Processa o arquivo podendo retirar as informacoes de OCR etc. render.AddPage(processedPage); //Adiciona a pagina } } } }
private void FillPagesData(ATAPY.Document.Data.Core.Document document, string pathToImage, string language) { var extension = Path.GetExtension(pathToImage); using (var engine = new TesseractEngine(ENGINE_DATAPATH, language)) { if (IsSinglePageImage(extension)) { Pix pageData = null; try { pageData = Pix.LoadFromFile(pathToImage); FillDocumentPage(document, language, engine, pageData); } finally { pageData?.Dispose(); } } else if (IsMultiPageImage(extension)) { PixArray pixes = null; try { pixes = PixArray.LoadMultiPageTiffFromFile(pathToImage); foreach (Pix pageData in pixes) { FillDocumentPage(document, language, engine, pageData); } } finally { pixes?.Dispose(); } } else { throw new FormatException("Please specify path to the image file"); } } }
protected override void Execute(NativeActivityContext context) { try { string filepath = ImagePath.Get(context); List <string> result = new List <string>(); using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { using (var pixA = PixArray.LoadMultiPageTiffFromFile(filepath)) { int i = 1; foreach (var pix in pixA) { using (var page = engine.Process(pix)) { var text = page.GetText().Trim(); string expectedText = String.Format("Page {0}", i); result.Add(text); } i++; } } } string[] strvalues = result.ToArray(); if ((strvalues != null) && (strvalues.Length > 0)) { TextValues.Set(context, strvalues); } } catch (Exception ex) { Log.Logger.LogData(ex.Message + " in activity MultipageTifToText", LogLevel.Error); if (!ContinueOnError) { context.Abort(); } } }