private void ProcessByPage(string pFilePath) { string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(pFilePath); string caminhoPdf = Path.Combine(txbDirSaida.Text, fileNameWithoutExtension); //Caminho onde ira salvar o pdf com OCR sem informar extensao using (IResultRenderer render = ResultRenderer.CreatePdfRenderer(caminhoPdf, _tessDataPath)) //Caminho pdf e caminho para a fonte do pdf using (PixArray pages = PixArray.LoadMultiPageTiffFromFile(pFilePath)) //Carrega todas as páginas do tiff using (render.BeginDocument(fileNameWithoutExtension)) //Cria o pdf { List <Task> lstTasksRunning = new List <Task>(); SortedList <int, Page> lstProcessedPages = new SortedList <int, Page>(); int currentPage = 0; foreach (Pix page in pages) { int pageIndex = currentPage; lstTasksRunning.Add(Task.Factory.StartNew(() => { using (TesseractEngine tesseract = new TesseractEngine(_tessDataPath, "por")) //Caminho da pasta com arquivos de config/ idioma do OCR { Page pagina = tesseract.Process(page, Path.GetFileNameWithoutExtension(fileNameWithoutExtension)); lstProcessedPages.Add(pageIndex, pagina); //Processa o arquivo podendo retirar as informacoes de OCR etc. } })); currentPage++; } Task.WaitAll(lstTasksRunning.ToArray()); foreach (var pageKeyValue in lstProcessedPages) { render.AddPage(pageKeyValue.Value); //Adiciona a pagina } } }
public override void ProcessFile(string filename) { List <IResultRenderer> resultRenderers = new List <IResultRenderer>(); switch (OutputFormat) { case "text": resultRenderers.Add(ResultRenderer.CreateTextRenderer(OutputFile)); break; case "hocr": resultRenderers.Add(ResultRenderer.CreateHOcrRenderer(OutputFile)); break; case "pdf": resultRenderers.Add(ResultRenderer.CreatePdfRenderer(OutputFile, Datapath + "\\tessdata")); break; } using (IResultRenderer renderer = new AggregateResultRenderer(resultRenderers)) { if (filename.EndsWith(".tif")) { ProcessTiffFile(renderer, filename); } else { ProcessFile(renderer, filename); } } }
/// <summary> /// Converts the given input TIF file to searchable PDF using tesseract engine. /// </summary> /// <param name="inputFilePath">TIF file path</param> /// <param name="outputFilePath">Searchable PDF file path</param> public static void Convert(string inputFilePath, string outputFilePath) { using (var renderer = ResultRenderer.CreatePdfRenderer(outputFilePath, TesseractData)) { ProcessImageFile(renderer, inputFilePath); Console.WriteLine("Conversion completed for file: " + inputFilePath); } }
public void Scan(string fullPath, bool force = false) { var file = System.IO.Path.GetFileNameWithoutExtension(fullPath); var dstPath = _destination ?? System.IO.Path.GetDirectoryName(fullPath); var dstFile = System.IO.Path.Combine(dstPath, $"{file}.ocr"); if (!File.Exists(dstFile + ".pdf") || force) { Console.WriteLine($"Scanning {file}"); using (var pdfReader = new PdfReader(fullPath)) { var parser = new PdfReaderContentParser(pdfReader); var extractor = new ImageExtractor(dstPath); var dllDir = AppDomain.CurrentDomain.BaseDirectory; bool containsText = false; using (var engine = new TesseractEngine($"{dllDir}/tessdata", "deu", EngineMode.Default)) { using (var pdf = ResultRenderer.CreatePdfRenderer(dstFile, @"./tessdata")) { pdf.BeginDocument(file); for (int i = 1; i <= pdfReader.NumberOfPages; i++) { extractor.Rotation = pdfReader.GetPageRotation(i); parser.ProcessContent(i, extractor); var tempFile = extractor.TempFile; containsText |= extractor.ContainsText; if (containsText) { break; // Don't process files that contain text. } using (var img = Pix.LoadFromFile(tempFile)) { Console.WriteLine($"Scanning page {i}"); using (var page = engine.Process(img, $"page-{i}")) { pdf.AddPage(page); } } File.Delete(tempFile); } } } // Don't duplicate files that contain text. if (containsText) { Console.WriteLine($"Skipping {file}, as it contains text."); File.Delete(dstFile + ".pdf"); } } } }
public void CanRenderMultiplePageDocumentToPdfFile() { var resultPath = TestResultRunFile(@"ResultRenderers\PDF\multi-page"); using (var renderer = ResultRenderer.CreatePdfRenderer(resultPath, DataPath)) { var examplePixPath = TestFilePath("processing/multi-page.tif"); ProcessMultipageTiff(renderer, examplePixPath); } var expectedOutputFilename = Path.ChangeExtension(resultPath, "pdf"); Assert.That(File.Exists(expectedOutputFilename), $"Expected a PDF file \"{expectedOutputFilename}\" to have been created; but none was found."); }
public void CanRenderResultsIntoPdfFile() { var resultPath = TestResultRunFile(@"ResultRenderers\PDF\phototest"); using (var renderer = ResultRenderer.CreatePdfRenderer(resultPath, DataPath)) { var examplePixPath = TestFilePath("Ocr/phototest.tif"); ProcessFile(renderer, examplePixPath); } var expectedOutputFilename = Path.ChangeExtension(resultPath, "pdf"); Assert.That(File.Exists(expectedOutputFilename), $"Expected a PDF file \"{expectedOutputFilename}\" to have been created; but none was found."); }
public void CanRenderResultsIntoBoxFile() { var resultPath = TestResultRunFile(@"ResultRenderers\Box\phototest"); using (var renderer = ResultRenderer.CreateBoxRenderer(resultPath)) { var examplePixPath = TestFilePath("Ocr/phototest.tif"); ProcessFile(renderer, examplePixPath); } var expectedOutputFilename = Path.ChangeExtension(resultPath, "box"); Assert.IsTrue(File.Exists(expectedOutputFilename), $"Expected a Box file \"{expectedOutputFilename}\" to have been created; but none was found."); }
public void CanRenderMultiplePageDocumentIntoMultipleResultRenderers() { var resultPath = TestResultRunFile(@"ResultRenderers\Aggregate\multi-page"); using (var renderer = new AggregateResultRenderer(ResultRenderer.CreatePdfRenderer(resultPath, DataPath), ResultRenderer.CreateTextRenderer(resultPath))) { var examplePixPath = TestFilePath("processing/multi-page.tif"); ProcessMultipageTiff(renderer, examplePixPath); } var expectedPdfOutputFilename = Path.ChangeExtension(resultPath, "pdf"); Assert.That(File.Exists(expectedPdfOutputFilename), $"Expected a PDF file \"{expectedPdfOutputFilename}\" to have been created; but non was found."); var expectedTxtOutputFilename = Path.ChangeExtension(resultPath, "txt"); Assert.That(File.Exists(expectedTxtOutputFilename), $"Expected a Text file \"{expectedTxtOutputFilename}\" to have been created; but non was found."); }
private void ProcessByFile(string pFilePath) { using (TesseractEngine tesseract = new TesseractEngine(_tessDataPath, "por")) //Caminho da pasta com arquivos de config/ idioma do OCR { string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(pFilePath); string caminhoPdf = Path.Combine(txbDirSaida.Text, fileNameWithoutExtension); //Caminho onde ira salvar o pdf com OCR sem informar extensao using (IResultRenderer render = ResultRenderer.CreatePdfRenderer(caminhoPdf, _tessDataPath)) //Caminho pdf e caminho para a fonte do pdf using (PixArray pages = PixArray.LoadMultiPageTiffFromFile(pFilePath)) //Carrega todas as páginas do tiff using (render.BeginDocument(fileNameWithoutExtension)) //Cria o pdf { foreach (Pix page in pages) { using (page) using (Page processedPage = tesseract.Process(page, Path.GetFileNameWithoutExtension(fileNameWithoutExtension))) //Processa o arquivo podendo retirar as informacoes de OCR etc. render.AddPage(processedPage); //Adiciona a pagina } } } }
public void CanRenderResultsIntoMultipleOutputFormats() { var resultPath = TestResultRunFile(@"ResultRenderers\PDF\phototest"); List <RenderedFormat> formats = new List <RenderedFormat> { RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT }; using (var renderer = ResultRenderer.CreateRenderers(resultPath, DataPath, formats)) { var examplePixPath = TestFilePath("Ocr/phototest.tif"); ProcessFile(renderer, examplePixPath); } var expectedOutputFilename = Path.ChangeExtension(resultPath, "pdf"); Assert.That(File.Exists(expectedOutputFilename), $"Expected a PDF file \"{expectedOutputFilename}\" to have been created; but none was found."); expectedOutputFilename = Path.ChangeExtension(resultPath, "hocr"); Assert.That(File.Exists(expectedOutputFilename), $"Expected a HOCR file \"{expectedOutputFilename}\" to have been created; but none was found."); expectedOutputFilename = Path.ChangeExtension(resultPath, "txt"); Assert.That(File.Exists(expectedOutputFilename), $"Expected a TEXT file \"{expectedOutputFilename}\" to have been created; but none was found."); }
public MainSceneController() { this.configs = new Configs(); this.materials = new Materials(configs); this.characters = new Characters(configs); new BankRenderer(this.configs.LeftBankPos, this.materials, this.configs); new BankRenderer(this.configs.RightBankPos, this.materials, this.configs); this.boatMove = new LinearMove2D(this.configs.BoatMoveSpeed); this.boat = new BoatRenderer(this.configs.RightBoatPos, this.materials, this.boatMove.Clone(), this, this.configs); this.priests = new PriestRenderer[this.configs.PeopleNum]; this.evils = new EvilRenderer[this.configs.PeopleNum]; this.charMove = new ParabolicMove2D(this.configs.CharMoveSummit, this.configs.CharMoveSpeed); for (int i = 0; i < this.configs.PeopleNum; i++) { this.priests[i] = new PriestRenderer(i, this.configs.RightCharPos[i + 2], materials, this.charMove.Clone(), this, this.configs); } for (int i = 0; i < this.configs.PeopleNum; i++) { this.evils[i] = new EvilRenderer(i, this.configs.RightCharPos[i + 2 + this.configs.PeopleNum], materials, this.charMove.Clone(), this, this.configs); } this.result = new ResultRenderer(this.configs.ResultPlanePos, this.configs.ResultPlaneRotation, this.configs.ResultPlaneScale, this.materials); }
public override void ProcessFile(string filename) { List <IResultRenderer> resultRenderers = new List <IResultRenderer>(); switch (OutputFormat) { case "text": resultRenderers.Add(ResultRenderer.CreateTextRenderer(OutputFile)); break; case "hocr": resultRenderers.Add(ResultRenderer.CreateHOcrRenderer(OutputFile)); break; case "pdf": resultRenderers.Add(ResultRenderer.CreatePdfRenderer(OutputFile, Datapath, false)); break; } using (IResultRenderer renderer = new AggregateResultRenderer(resultRenderers)) { ProcessImageFile(renderer, filename); } }
public static void Main(string[] args) { var testImagePath = "./closeup_scan.png";//"./phototest.tif"; if (args.Length > 0) { testImagePath = args[0]; } try { using (IResultRenderer renderer = ResultRenderer.CreatePdfRenderer(@"./output", @"./tessdata")) { using (renderer.BeginDocument("PDF Test")) { using (TesseractEngine engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.TesseractAndCube)) { var list = Directory.GetFiles(@"images"); foreach (var item in list) { using (var tifFile = new Bitmap(item)) { //using (var img = PixConverter.ToPix(tifFile)) //{ using (var page = engine.Process(tifFile, "test")) { renderer.AddPage(page); } //} } } } } } //using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) //{ // using (var img = Pix.LoadFromFile(testImagePath)) // { // using (var page = engine.Process(img)) // { // var text = page.GetText(); // Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence()); // Console.WriteLine("Text (GetText): \r\n{0}", text); // Console.WriteLine("Text (iterator):"); // using (var iter = page.GetIterator()) // { // iter.Begin(); // do // { // do // { // do // { // do // { // if (iter.IsAtBeginningOf(PageIteratorLevel.Block)) // { // Console.WriteLine("<BLOCK>"); // } // Console.Write(iter.GetText(PageIteratorLevel.Word)); // Console.Write(" "); // if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word)) // { // Console.WriteLine(); // } // } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); // if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine)) // { // Console.WriteLine(); // } // } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); // } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para)); // } while (iter.Next(PageIteratorLevel.Block)); // } // } // } //} } catch (Exception e) { Trace.TraceError(e.ToString()); Console.WriteLine("Unexpected Error: " + e.Message); Console.WriteLine("Details: "); Console.WriteLine(e.ToString()); } Console.Write("Press any key to continue . . . "); Console.ReadKey(true); }