static void Main(string[] args) { //converte pdf em imagem SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); f.OpenPdf(@"D:\Projetos\OCR_Demo\Resultado\PDF.pdf"); if (f.PageCount > 0) { f.ImageOptions.Dpi = 300; f.ToMultipageTiff(@"D:\Projetos\OCR_Demo\Resultado\imagem\PDF.tiff"); } ConvertTiffToJpeg(@"D:\Projetos\OCR_Demo\Resultado\imagem\PDF.tiff"); var files = Directory.GetFiles(@"D:\Projetos\OCR_Demo\Resultado\imagem\").Where(x => x.EndsWith("jpg")); StringBuilder stringBuilder = new StringBuilder(); foreach (var file in files) { try { using (var engine = new TesseractEngine(@"tessdata", "por", EngineMode.Default)) { using (var img = Pix.LoadFromFile(file)) { using (var page = engine.Process(img)) { var texto = page.GetText(); stringBuilder.Append(texto); } } } } catch (Exception ex) { Console.WriteLine("Erro {0}", ex.Message); } finally { } } string filePath = @"D:\Projetos\OCR_Demo\Resultado\textos\teste.txt"; File.WriteAllText(filePath, stringBuilder.ToString()); }
static void Main(string[] args) { //Convert PDF file to Multipage TIFF file SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); // You may download the latest version of SDK here: // www.sautinsoft.com/products/pdf-focus/download.php string pdfPath = @"d:\Tempos\table.pdf"; string tiffPath = @"d:\Tempos\table.tiff"; f.OpenPdf(pdfPath); if (f.PageCount > 0) { f.ImageOptions.Dpi = 120; if (f.ToMultipageTiff(tiffPath) == 0) { System.Diagnostics.Process.Start(tiffPath); } } }
/// <summary> /// Converts PDF to DOCX, RTF, HTML, XML, Excel (XLS), PNG, Multipage TIFF, Text. /// </summary> public static void ConvertPdfToAll() { SautinSoft.PdfFocus f = new SautinSoft.PdfFocus(); string pdfFile = @"..\..\..\..\simple text.pdf"; string outFile = String.Empty; f.OpenPdf(pdfFile); if (f.PageCount > 0) { // To Docx. outFile = Path.ChangeExtension(pdfFile, ".docx"); f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx; if (f.ToWord(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To Rtf. outFile = Path.ChangeExtension(pdfFile, ".rtf"); f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Rtf; if (f.ToWord(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To Excel. outFile = Path.ChangeExtension(pdfFile, ".xls"); f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true; if (f.ToExcel(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To HTML. outFile = Path.ChangeExtension(pdfFile, ".html"); f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true; if (f.ToHtml(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To XML. outFile = Path.ChangeExtension(pdfFile, ".xml"); f.XmlOptions.ConvertNonTabularDataToSpreadsheet = true; if (f.ToXml(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To Image. outFile = Path.ChangeExtension(pdfFile, ".png"); f.ImageOptions.Dpi = 300; f.ImageOptions.ImageFormat = System.Drawing.Imaging.ImageFormat.Png; if (f.ToImage(outFile, 1) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To Multipage Tiff (Black & White). outFile = Path.ChangeExtension(pdfFile, ".tiff"); f.ImageOptions.ColorDepth = PdfFocus.CImageOptions.eColorDepth.BlackWhite1bpp; if (f.ToMultipageTiff(outFile, System.Drawing.Imaging.EncoderValue.CompressionCCITT4) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } // To Text. outFile = Path.ChangeExtension(pdfFile, ".txt"); if (f.ToText(outFile) == 0) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } } else { Console.WriteLine("Error: {0}!", f.Exception.Message); Console.ReadLine(); } }