Пример #1
0
        static void Main(string[] args)
        {
            //converte pdf em imagem
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            f.OpenPdf(@"D:\Projetos\OCR_Demo\Resultado\PDF.pdf");

            if (f.PageCount > 0)
            {
                f.ImageOptions.Dpi = 300;
                f.ToMultipageTiff(@"D:\Projetos\OCR_Demo\Resultado\imagem\PDF.tiff");
            }

            ConvertTiffToJpeg(@"D:\Projetos\OCR_Demo\Resultado\imagem\PDF.tiff");

            var files = Directory.GetFiles(@"D:\Projetos\OCR_Demo\Resultado\imagem\").Where(x => x.EndsWith("jpg"));

            StringBuilder stringBuilder = new StringBuilder();

            foreach (var file in files)
            {
                try
                {
                    using (var engine = new TesseractEngine(@"tessdata", "por", EngineMode.Default))
                    {
                        using (var img = Pix.LoadFromFile(file))
                        {
                            using (var page = engine.Process(img))
                            {
                                var texto = page.GetText();
                                stringBuilder.Append(texto);
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Erro {0}", ex.Message);
                }
                finally
                {
                }
            }
            string filePath = @"D:\Projetos\OCR_Demo\Resultado\textos\teste.txt";

            File.WriteAllText(filePath, stringBuilder.ToString());
        }
        static void Main(string[] args)
        {
            //Convert PDF file to Multipage TIFF file
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
            // You may download the latest version of SDK here:
            // www.sautinsoft.com/products/pdf-focus/download.php


            string pdfPath  = @"d:\Tempos\table.pdf";
            string tiffPath = @"d:\Tempos\table.tiff";

            f.OpenPdf(pdfPath);

            if (f.PageCount > 0)
            {
                f.ImageOptions.Dpi = 120;
                if (f.ToMultipageTiff(tiffPath) == 0)
                {
                    System.Diagnostics.Process.Start(tiffPath);
                }
            }
        }
Пример #3
0
        /// <summary>
        /// Converts PDF to DOCX, RTF, HTML, XML, Excel (XLS), PNG, Multipage TIFF, Text.
        /// </summary>
        public static void ConvertPdfToAll()
        {
            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            string pdfFile = @"..\..\..\..\simple text.pdf";
            string outFile = String.Empty;

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                // To Docx.
                outFile = Path.ChangeExtension(pdfFile, ".docx");
                f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx;
                if (f.ToWord(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To Rtf.
                outFile = Path.ChangeExtension(pdfFile, ".rtf");
                f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Rtf;
                if (f.ToWord(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To Excel.
                outFile = Path.ChangeExtension(pdfFile, ".xls");
                f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToExcel(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To HTML.
                outFile = Path.ChangeExtension(pdfFile, ".html");
                f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToHtml(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To XML.
                outFile = Path.ChangeExtension(pdfFile, ".xml");
                f.XmlOptions.ConvertNonTabularDataToSpreadsheet = true;
                if (f.ToXml(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To Image.
                outFile                    = Path.ChangeExtension(pdfFile, ".png");
                f.ImageOptions.Dpi         = 300;
                f.ImageOptions.ImageFormat = System.Drawing.Imaging.ImageFormat.Png;
                if (f.ToImage(outFile, 1) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To Multipage Tiff (Black & White).
                outFile = Path.ChangeExtension(pdfFile, ".tiff");
                f.ImageOptions.ColorDepth = PdfFocus.CImageOptions.eColorDepth.BlackWhite1bpp;
                if (f.ToMultipageTiff(outFile, System.Drawing.Imaging.EncoderValue.CompressionCCITT4) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }

                // To Text.
                outFile = Path.ChangeExtension(pdfFile, ".txt");
                if (f.ToText(outFile) == 0)
                {
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                    {
                        UseShellExecute = true
                    });
                }
            }
            else
            {
                Console.WriteLine("Error: {0}!", f.Exception.Message);
                Console.ReadLine();
            }
        }