コード例 #1
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            var documentText = new StringBuilder();

            using (var pdf = new PdfDocument("Sample data/Freedman Scora.pdf"))
            {
                using (var engine = new TesseractEngine(@"tessdata", "eng", EngineMode.Default))
                {
                    for (int i = 0; i < pdf.PageCount; ++i)
                    {
                        if (documentText.Length > 0)
                        {
                            documentText.Append("\r\n\r\n");
                        }

                        PdfPage page           = pdf.Pages[i];
                        string  searchableText = page.GetText();

                        // Simple check if the page contains searchable text.
                        // We do not need to do OCR in that case.
                        if (!string.IsNullOrEmpty(searchableText.Trim()))
                        {
                            documentText.Append(searchableText);
                            continue;
                        }

                        // Save PDF page as high-resolution image
                        PdfDrawOptions options = PdfDrawOptions.Create();
                        options.BackgroundColor      = new PdfRgbColor(255, 255, 255);
                        options.HorizontalResolution = 600;
                        options.VerticalResolution   = 600;

                        string pageImage = $"page_{i}.png";
                        page.Save(pageImage, options);

                        using (var img = Pix.LoadFromFile(pageImage))
                        {
                            using (var recognizedPage = engine.Process(img))
                            {
                                var recognizedText = recognizedPage.GetText();
                                Console.WriteLine($"Mean confidence for page #{i}: {recognizedPage.GetMeanConfidence()}");

                                documentText.Append(recognizedText);
                            }
                        }
                    }
                }

                const string Result = "result.txt";
                using (var writer = new StreamWriter(Result))
                    writer.Write(documentText.ToString());

                Process.Start(Result);
            }
        }
コード例 #2
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            string outputDocumentPath = "SaveAsBitonalTiff.tiff";
            string outputPagePath     = "SaveAsBitonalTiff_page0.tiff";

            using (PdfDocument pdf = new PdfDocument(@"..\Sample Data\jfif3.pdf"))
            {
                PdfDrawOptions options = PdfDrawOptions.Create();
                options.BackgroundColor      = new PdfRgbColor(255, 255, 255);
                options.HorizontalResolution = 300;
                options.VerticalResolution   = 300;

                // specify bitonal TIFF as the desired output compression
                options.Compression = ImageCompressionOptions.CreateBitonalTiff();

                // save one page
                pdf.Pages[0].Save(outputPagePath, options);

                // save the whole document as multipage TIFF
                pdf.SaveAsTiff(outputDocumentPath, options);
            }

            Console.WriteLine($"The output is located in {Environment.CurrentDirectory}");
        }
コード例 #3
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            // In order to receive log messages from Docotic.Pdf into a log4net logger,
            // you would need to configure log4net. Here is a simplest one-line
            // way to configure it. You might use any other way described in the docs
            // https://logging.apache.org/log4net/release/manual/configuration.html
            log4net.Config.XmlConfigurator.Configure();

            // The above line configures log4net using properties from app.config file.
            // Take a look into the app.config file, it contains more comments.

            // After log4net is configured, there is nothing else to do, the library
            // will put its log messages into the configured loggers.
            // The following code should produce log messages in console and in
            // log-file.txt file next to application's exe file.
            using (PdfDocument pdf = new PdfDocument(@"..\Sample Data\Attachments.pdf"))
            {
                using (MemoryStream ms = new MemoryStream())
                    pdf.Pages[0].Save(ms, PdfDrawOptions.Create());
            }
        }
コード例 #4
0
        private static IEnumerable <RecognizedTextChunk> recognizeWords(PdfPage page, TesseractEngine engine,
                                                                        int resolution, string tempFileName)
        {
            // Save PDF page as high-resolution image
            PdfDrawOptions options = PdfDrawOptions.Create();

            options.BackgroundColor      = new PdfRgbColor(255, 255, 255);
            options.HorizontalResolution = resolution;
            options.VerticalResolution   = resolution;
            page.Save(tempFileName, options);

            using (var img = Pix.LoadFromFile(tempFileName))
            {
                using (var recognizedPage = engine.Process(img))
                {
                    using (ResultIterator iter = recognizedPage.GetIterator())
                    {
                        const PageIteratorLevel Level = PageIteratorLevel.Word;
                        iter.Begin();
                        do
                        {
                            if (iter.TryGetBoundingBox(Level, out Rect bounds))
                            {
                                string text       = iter.GetText(Level);
                                float  confidence = iter.GetConfidence(Level);

                                yield return(new RecognizedTextChunk(text, bounds, confidence));
                            }
                        } while (iter.Next(Level));
                    }
                }
            }
        }
コード例 #5
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            string pathToImage = "DrawZoomedPage.png";

            using (PdfDocument pdf = new PdfDocument(@"..\Sample Data\jfif3.pdf"))
            {
                PdfDrawOptions options = PdfDrawOptions.CreateZoom(400);
                pdf.Pages[0].Save(pathToImage, options);
            }

            Console.WriteLine($"The output is located in {Environment.CurrentDirectory}");
        }
コード例 #6
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            string pathToImage = "DrawZoomedPage.png";

            using (PdfDocument pdf = new PdfDocument(@"Sample Data\jfif3.pdf"))
            {
                PdfDrawOptions options = PdfDrawOptions.CreateZoom(400);
                pdf.Pages[0].Save(pathToImage, options);
            }

            Process.Start(pathToImage);
        }
コード例 #7
0
        /// <summary>
        /// Распознать текст нечитаемого PDF-файла.
        /// </summary>
        /// <param name="page">Нечитаемая PDF-страница.</param>
        /// <returns>Распознанный текст со страницы.</returns>
        private static string RecognizePageText(PdfPage page)
        {
            var options = PdfDrawOptions.Create();

            options.BackgroundColor      = new PdfRgbColor(255, 255, 255);
            options.HorizontalResolution = 200;
            options.VerticalResolution   = 200;

            using var memoryStream = new MemoryStream();
            page.Save(memoryStream, options);

            using var engine         = new TesseractEngine(@"tessdata\fast", "rus+eng", EngineMode.LstmOnly);
            using var img            = Pix.LoadFromMemory(memoryStream.GetBuffer());
            using var recognizedPage = engine.Process(img);

            return(recognizedPage.GetText());
        }
コード例 #8
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            string pathToImage = "MakePageThumbnail.png";

            using (PdfDocument pdf = new PdfDocument(@"..\Sample Data\jfif3.pdf"))
            {
                PdfDrawOptions options = PdfDrawOptions.CreateFitSize(new PdfSize(200, 200), false);
                options.BackgroundColor = new PdfGrayColor(100);
                pdf.Pages[0].Save(pathToImage, options);
            }

            Console.WriteLine($"The output is located in {Environment.CurrentDirectory}");
        }
コード例 #9
0
ファイル: PDFTool.cs プロジェクト: OryxLib/Oryx.FastAdmin
        public static void PDFToImage(string path, string savePath, string extension)
        {
            // replace string.Empty with your license key
            LicenseManager.AddLicenseData(LicenceKey);

            using (PdfDocument pdf = new PdfDocument(path))
            {
                PdfDrawOptions options = PdfDrawOptions.Create();
                options.BackgroundColor = new PdfRgbColor(255, 255, 255);
                options.Compression     = ImageCompressionOptions.CreateJpeg();
                var index = 1;
                foreach (var pdfPage in pdf.Pages)
                {
                    var fileName = savePath + "-" + index++ + extension;
                    pdfPage.Save(fileName, options);
                }
            }
        }
コード例 #10
0
ファイル: Program.cs プロジェクト: goddices/dotnet-xuexi
        static void Process(string file)
        {
            using (var pdfDocumentStream = File.OpenRead(file))
            {
                var document = new PdfDocument(pdfDocumentStream);

                PdfDrawOptions options = PdfDrawOptions.Create();
                options.BackgroundColor = new PdfRgbColor(255, 255, 255);
                var images = document.GetImages();
                int index  = 0;
                foreach (var page in document.Pages)
                {
                    index++;
                    string imagePath = $@"C:\Users\goddi\Desktop\文档暂存\pdfimages\{ Path.GetFileName(file)}-{index}.jpg";
                    page.Save(new FileStream(imagePath, FileMode.Create), options);
                }
            }
        }
コード例 #11
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            string pathToImage = "MakePageThumbnail.png";

            using (PdfDocument pdf = new PdfDocument(@"Sample Data/jfif3.pdf"))
            {
                PdfDrawOptions options = PdfDrawOptions.CreateFitSize(new PdfSize(200, 200), false);
                options.BackgroundColor = new PdfGrayColor(100);
                pdf.Pages[0].Save(pathToImage, options);
            }

            Process.Start(pathToImage);
        }
コード例 #12
0
        private void printDocument_PrintPage(object sender, PrintPageEventArgs e)
        {
            Graphics gr = e.Graphics;

            using (var stream = new MemoryStream())
            {
                PdfDrawOptions options = PdfDrawOptions.Create();
                options.HorizontalResolution = gr.DPI;
                options.VerticalResolution   = gr.DPI;

                PdfPage page = m_pdf.Pages[m_printDocument.PrintSettings.SelectedPageRange.Start - 1 + e.CurrentPage];
                page.Save(stream, options);

                stream.Position = 0;
                using (var bitmap = new Bitmap(stream))
                    gr.DrawImage(bitmap, 0, 0);
            }
        }
コード例 #13
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            string outputPath = "SaveAsTiff.tiff";

            using (var pdf = new PdfDocument(@"Sample Data\jfif3.pdf"))
            {
                PdfDrawOptions options = PdfDrawOptions.Create();
                options.BackgroundColor = new PdfRgbColor(255, 255, 255);

                pdf.SaveAsTiff(outputPath, options);
            }

            Process.Start(outputPath);
        }
コード例 #14
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            string outputPath = "SaveAsTiff.tiff";

            using (PdfDocument pdf = new PdfDocument(@"..\Sample Data\jfif3.pdf"))
            {
                PdfDrawOptions options = PdfDrawOptions.Create();
                options.BackgroundColor = new PdfRgbColor(255, 255, 255);

                pdf.SaveAsTiff(outputPath, options);
            }

            Console.WriteLine($"The output is located in {Environment.CurrentDirectory}");
        }
コード例 #15
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            string pathToImage = "SavePageAsImage.jpg";

            using (PdfDocument pdf = new PdfDocument(@"Sample Data\jfif3.pdf"))
            {
                PdfDrawOptions options = PdfDrawOptions.Create();
                options.BackgroundColor = new PdfRgbColor(255, 255, 255);
                options.Compression     = ImageCompressionOptions.CreateJpeg();

                pdf.Pages[1].Save(pathToImage, options);
            }

            Process.Start(pathToImage);
        }
コード例 #16
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            string pathToImage = "SavePageCustomResolution.png";

            using (PdfDocument pdf = new PdfDocument(@"Sample Data\gmail-cheat-sheet.pdf"))
            {
                PdfDrawOptions options = PdfDrawOptions.Create();
                options.BackgroundColor      = new PdfRgbColor(255, 255, 255);
                options.HorizontalResolution = 600;
                options.VerticalResolution   = 600;

                pdf.Pages[0].Save(pathToImage, options);
            }

            Process.Start(pathToImage);
        }
コード例 #17
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            // In order to receive log messages from Docotic.Pdf into a NLog logger,
            // you would need to configure NLog. It is a common practice to put NLog
            // configuration into app.config file.
            // Take a look into the app.config file, it contains more comments.

            // After NLog is configured, there is nothing else to do, the library
            // will put its log messages into the configured loggers.
            // The following code should produce log messages in console and in
            // log-file.txt file next to application's exe file.
            using (PdfDocument pdf = new PdfDocument(@"..\Sample Data\Attachments.pdf"))
            {
                using (MemoryStream ms = new MemoryStream())
                    pdf.Pages[0].Save(ms, PdfDrawOptions.Create());
            }
        }
コード例 #18
0
ファイル: PDFTool.cs プロジェクト: OryxLib/Oryx.FastAdmin
        public static async Task <List <Stream> > PDFToImage(string path)
        {
            // replace string.Empty with your license key
            LicenseManager.AddLicenseData(LicenceKey);

            return(await Task.Run(() =>
            {
                var streamList = new List <Stream>();
                using (PdfDocument pdf = new PdfDocument(path))
                {
                    PdfDrawOptions options = PdfDrawOptions.Create();
                    options.BackgroundColor = new PdfRgbColor(255, 255, 255);
                    options.Compression = ImageCompressionOptions.CreateJpeg();
                    foreach (var page in pdf.Pages)
                    {
                        var outputStream = new MemoryStream();
                        page.Save(outputStream, options);
                        streamList.Add(outputStream);
                    }
                }
                return streamList;
            }));
        }
コード例 #19
0
        private async Task <Stream> Inner()
        {
            var filename = "我的简历.pdf";

            using (var pdfDocumentStream = File.OpenRead(filename))
            {
                var            document = new PdfDocument(pdfDocumentStream);
                var            page     = document.Pages[0];
                PdfDrawOptions options  = PdfDrawOptions.Create();
                options.BackgroundColor = new PdfRgbColor(255, 255, 255);
                options.Compression     = ImageCompressionOptions.CreateJpeg();
                double resolutionRate = page.Resolution / 200;
                options.HorizontalResolution = 200;
                options.VerticalResolution   = 200;

                var memoryStream = new MemoryStream();
                {
                    page.Save(memoryStream, options);
                }
                memoryStream.Seek(0, SeekOrigin.Begin);
                return(await Task.FromResult(memoryStream));
            }
        }
コード例 #20
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            var documentText = new StringBuilder();

            using (var pdf = new PdfDocument(@"..\Sample data\Freedman Scora.pdf"))
            {
                var location = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
                var tessData = Path.Combine(location, @"tessdata");
                using (var engine = new TesseractEngine(tessData, "eng", EngineMode.LstmOnly))
                {
                    for (int i = 0; i < pdf.PageCount; ++i)
                    {
                        if (documentText.Length > 0)
                        {
                            documentText.Append("\r\n\r\n");
                        }

                        PdfPage page           = pdf.Pages[i];
                        string  searchableText = page.GetText();

                        // Simple check if the page contains searchable text.
                        // We do not need to perform OCR in that case.
                        if (!string.IsNullOrEmpty(searchableText.Trim()))
                        {
                            documentText.Append(searchableText);
                            continue;
                        }

                        // This page is not searchable.
                        // Save PDF page as a high-resolution image.
                        PdfDrawOptions options = PdfDrawOptions.Create();
                        options.BackgroundColor      = new PdfRgbColor(255, 255, 255);
                        options.HorizontalResolution = 200;
                        options.VerticalResolution   = 200;

                        string pageImage = $"page_{i}.png";
                        page.Save(pageImage, options);

                        // Perform OCR
                        using (Pix img = Pix.LoadFromFile(pageImage))
                        {
                            using (Page recognizedPage = engine.Process(img))
                            {
                                Console.WriteLine($"Mean confidence for page #{i}: {recognizedPage.GetMeanConfidence()}");

                                string recognizedText = recognizedPage.GetText();
                                documentText.Append(recognizedText);
                            }
                        }

                        File.Delete(pageImage);
                    }
                }
            }

            const string Result = "result.txt";

            using (var writer = new StreamWriter(Result))
                writer.Write(documentText.ToString());

            Console.WriteLine($"The output is located in {Environment.CurrentDirectory}");
        }