/// <summary> /// Processes a file using ResultRenderers. /// </summary> /// <param name="renderer"></param> /// <param name="filename"></param> private void ProcessImageFile(IResultRenderer renderer, string filename) { IEnumerable <string> configs_file = new List <string>() { CONFIGS_FILE }; using (TesseractEngine engine = new TesseractEngine(Datapath, Language, EngineMode, configs_file)) { var imageName = Path.GetFileNameWithoutExtension(filename); using (var pixA = LoadPixArray(filename)) { using (renderer.BeginDocument(imageName)) { foreach (var pix in pixA) { using (var page = engine.Process(pix, imageName)) { var addedPage = renderer.AddPage(page); } } } } } }
private void ProcessByPage(string pFilePath) { string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(pFilePath); string caminhoPdf = Path.Combine(txbDirSaida.Text, fileNameWithoutExtension); //Caminho onde ira salvar o pdf com OCR sem informar extensao using (IResultRenderer render = ResultRenderer.CreatePdfRenderer(caminhoPdf, _tessDataPath)) //Caminho pdf e caminho para a fonte do pdf using (PixArray pages = PixArray.LoadMultiPageTiffFromFile(pFilePath)) //Carrega todas as páginas do tiff using (render.BeginDocument(fileNameWithoutExtension)) //Cria o pdf { List <Task> lstTasksRunning = new List <Task>(); SortedList <int, Page> lstProcessedPages = new SortedList <int, Page>(); int currentPage = 0; foreach (Pix page in pages) { int pageIndex = currentPage; lstTasksRunning.Add(Task.Factory.StartNew(() => { using (TesseractEngine tesseract = new TesseractEngine(_tessDataPath, "por")) //Caminho da pasta com arquivos de config/ idioma do OCR { Page pagina = tesseract.Process(page, Path.GetFileNameWithoutExtension(fileNameWithoutExtension)); lstProcessedPages.Add(pageIndex, pagina); //Processa o arquivo podendo retirar as informacoes de OCR etc. } })); currentPage++; } Task.WaitAll(lstTasksRunning.ToArray()); foreach (var pageKeyValue in lstProcessedPages) { render.AddPage(pageKeyValue.Value); //Adiciona a pagina } } }
private void ProcessImageFile(IResultRenderer renderer, string filename) { var imageName = Path.GetFileNameWithoutExtension(filename); using (var pixA = ReadImageFileIntoPixArray(filename)) { int expectedPageNumber = -1; using (renderer.BeginDocument(imageName)) { Assert.AreEqual(renderer.PageNumber, expectedPageNumber); foreach (var pix in pixA) { using (var page = _engine.Process(pix, imageName)) { var addedPage = renderer.AddPage(page); expectedPageNumber++; Assert.That(addedPage, Is.True); Assert.That(renderer.PageNumber, Is.EqualTo(expectedPageNumber)); } } } Assert.That(renderer.PageNumber, Is.EqualTo(expectedPageNumber)); } }
private void ProcessMultipageTiff(IResultRenderer renderer, string filename) { var imageName = Path.GetFileNameWithoutExtension(filename); using (var pixA = PixArray.LoadMultiPageTiffFromFile(filename)) { var expectedPageNumber = -1; using (renderer.BeginDocument(imageName)) { Assert.AreEqual(renderer.PageNumber, expectedPageNumber); foreach (var pix in pixA) { using (var page = _engine.Process(pix)) { var addedPage = renderer.AddPage(page); expectedPageNumber++; Assert.IsTrue(addedPage); Assert.AreEqual(renderer.PageNumber, expectedPageNumber); } } } Assert.AreEqual(renderer.PageNumber, expectedPageNumber); } }
public PicklerApplication( IArgumentParser argumentParser, IFileLoader fileLoader, IFeatureExtractor featureExtractor, ITrxResultsParser trxResultsParser, IResultRenderer resultRenderer) { _argumentParser = argumentParser; _fileLoader = fileLoader; _featureExtractor = featureExtractor; _trxResultsParser = trxResultsParser; _resultRenderer = resultRenderer; }
public void Render(IResultRenderer renderer) { foreach (var token in _tokens) { if (token.Changed) { renderer.AppendChanged(token.Text); } else { renderer.AppendUnchanged(token.Text); } } renderer.End(); }
/// <summary> /// Processes a file using ResultRenderers. /// </summary> /// <param name="renderer"></param> /// <param name="filename"></param> private void ProcessImageFile(IResultRenderer renderer, string filename) { IEnumerable <string> configs_file = new List <string>() { CONFIGS_FILE }; using (TesseractEngine engine = new TesseractEngine(Datapath, Language, EngineMode, configs_file)) { ControlParameters(engine); Tesseract.PageSegMode psm = (PageSegMode)Enum.Parse(typeof(PageSegMode), PageSegMode); var imageName = Path.GetFileNameWithoutExtension(filename); using (var pixA = LoadPixArray(filename)) { using (renderer.BeginDocument(imageName)) { foreach (var pix in pixA) { Pix pixd = null; try { if (Deskew) { pixd = pix.Deskew(new ScewSweep(range: 45), Pix.DefaultBinarySearchReduction, Pix.DefaultBinaryThreshold, out Scew scew); } using (var page = engine.Process(pixd ?? pix, imageName, psm)) { var addedPage = renderer.AddPage(page); } } finally { if (pixd != null) { ((IDisposable)pixd).Dispose(); } } } } } } }
private void ProcessFile(IResultRenderer renderer, string filename) { var imageName = Path.GetFileNameWithoutExtension(filename); using (var pix = Pix.LoadFromFile(filename)) { using (renderer.BeginDocument(imageName)) { Assert.AreEqual(renderer.PageNumber, -1); using (var page = _engine.Process(pix, imageName)) { var addedPage = renderer.AddPage(page); Assert.That(addedPage, Is.True); Assert.That(renderer.PageNumber, Is.EqualTo(0)); } } Assert.AreEqual(renderer.PageNumber, 0); } }
private void ProcessByFile(string pFilePath) { using (TesseractEngine tesseract = new TesseractEngine(_tessDataPath, "por")) //Caminho da pasta com arquivos de config/ idioma do OCR { string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(pFilePath); string caminhoPdf = Path.Combine(txbDirSaida.Text, fileNameWithoutExtension); //Caminho onde ira salvar o pdf com OCR sem informar extensao using (IResultRenderer render = ResultRenderer.CreatePdfRenderer(caminhoPdf, _tessDataPath)) //Caminho pdf e caminho para a fonte do pdf using (PixArray pages = PixArray.LoadMultiPageTiffFromFile(pFilePath)) //Carrega todas as páginas do tiff using (render.BeginDocument(fileNameWithoutExtension)) //Cria o pdf { foreach (Pix page in pages) { using (page) using (Page processedPage = tesseract.Process(page, Path.GetFileNameWithoutExtension(fileNameWithoutExtension))) //Processa o arquivo podendo retirar as informacoes de OCR etc. render.AddPage(processedPage); //Adiciona a pagina } } } }
private static void ProcessImageFile(IResultRenderer renderer, string filename) { var fileName = Path.GetFileNameWithoutExtension(filename); using (var engine = new TesseractEngine(TesseractData, "eng", EngineMode.Default)) { using (var pixA = ReadImageFileIntoPixArray(filename)) { int expectedPageNumber = -1; foreach (var pix in pixA) { using (var page = engine.Process(pix, fileName)) { using (renderer.BeginDocument("multipage_tiff_example")) { var addedPage = renderer.AddPage(page); expectedPageNumber++; } } } } } }
/// <summary> /// Creates renderers for specified output formats. /// </summary> /// <param name="outputbase"></param> /// <param name="dataPath">The directory containing the pdf font data, normally same as your tessdata directory.</param> /// <param name="outputFormats"></param> /// <returns></returns> public static IResultRenderer CreateRenderers(string outputbase, string dataPath, List <RenderedFormat> outputFormats) { IResultRenderer renderer = null; foreach (var format in outputFormats) { switch (format) { case RenderedFormat.TEXT: if (renderer == null) { renderer = CreateTextRenderer(outputbase); } else { TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle, new TextResultRenderer(outputbase).Handle); } break; case RenderedFormat.HOCR: if (renderer == null) { renderer = CreateHOcrRenderer(outputbase); } else { TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle, new HOcrResultRenderer(outputbase).Handle); } break; case RenderedFormat.PDF: if (renderer == null) { renderer = CreatePdfRenderer(outputbase, dataPath, false); } else { TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle, new PdfResultRenderer(outputbase, dataPath, false).Handle); } break; case RenderedFormat.BOX: if (renderer == null) { renderer = CreateBoxRenderer(outputbase); } else { TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle, new BoxResultRenderer(outputbase).Handle); } break; case RenderedFormat.UNLV: if (renderer == null) { renderer = CreateUnlvRenderer(outputbase); } else { TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle, new UnlvResultRenderer(outputbase).Handle); } break; } } return(renderer); }
public static void Main(string[] args) { var testImagePath = "./closeup_scan.png";//"./phototest.tif"; if (args.Length > 0) { testImagePath = args[0]; } try { using (IResultRenderer renderer = ResultRenderer.CreatePdfRenderer(@"./output", @"./tessdata")) { using (renderer.BeginDocument("PDF Test")) { using (TesseractEngine engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.TesseractAndCube)) { var list = Directory.GetFiles(@"images"); foreach (var item in list) { using (var tifFile = new Bitmap(item)) { //using (var img = PixConverter.ToPix(tifFile)) //{ using (var page = engine.Process(tifFile, "test")) { renderer.AddPage(page); } //} } } } } } //using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) //{ // using (var img = Pix.LoadFromFile(testImagePath)) // { // using (var page = engine.Process(img)) // { // var text = page.GetText(); // Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence()); // Console.WriteLine("Text (GetText): \r\n{0}", text); // Console.WriteLine("Text (iterator):"); // using (var iter = page.GetIterator()) // { // iter.Begin(); // do // { // do // { // do // { // do // { // if (iter.IsAtBeginningOf(PageIteratorLevel.Block)) // { // Console.WriteLine("<BLOCK>"); // } // Console.Write(iter.GetText(PageIteratorLevel.Word)); // Console.Write(" "); // if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word)) // { // Console.WriteLine(); // } // } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); // if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine)) // { // Console.WriteLine(); // } // } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); // } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para)); // } while (iter.Next(PageIteratorLevel.Block)); // } // } // } //} } catch (Exception e) { Trace.TraceError(e.ToString()); Console.WriteLine("Unexpected Error: " + e.Message); Console.WriteLine("Details: "); Console.WriteLine(e.ToString()); } Console.Write("Press any key to continue . . . "); Console.ReadKey(true); }