public static void Main() { // NOTE: // When used in trial mode, the library imposes some restrictions. // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx // for more information. string outputDocumentPath = "SaveAsBitonalTiff.tiff"; string outputPagePath = "SaveAsBitonalTiff_page0.tiff"; using (PdfDocument pdf = new PdfDocument(@"..\Sample Data\jfif3.pdf")) { PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); options.HorizontalResolution = 300; options.VerticalResolution = 300; // specify bitonal TIFF as the desired output compression options.Compression = ImageCompressionOptions.CreateBitonalTiff(); // save one page pdf.Pages[0].Save(outputPagePath, options); // save the whole document as multipage TIFF pdf.SaveAsTiff(outputDocumentPath, options); } Console.WriteLine($"The output is located in {Environment.CurrentDirectory}"); }
public static void Main() { // NOTE: // When used in trial mode, the library imposes some restrictions. // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx // for more information. var documentText = new StringBuilder(); using (var pdf = new PdfDocument("Sample data/Freedman Scora.pdf")) { using (var engine = new TesseractEngine(@"tessdata", "eng", EngineMode.Default)) { for (int i = 0; i < pdf.PageCount; ++i) { if (documentText.Length > 0) { documentText.Append("\r\n\r\n"); } PdfPage page = pdf.Pages[i]; string searchableText = page.GetText(); // Simple check if the page contains searchable text. // We do not need to do OCR in that case. if (!string.IsNullOrEmpty(searchableText.Trim())) { documentText.Append(searchableText); continue; } // Save PDF page as high-resolution image PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); options.HorizontalResolution = 600; options.VerticalResolution = 600; string pageImage = $"page_{i}.png"; page.Save(pageImage, options); using (var img = Pix.LoadFromFile(pageImage)) { using (var recognizedPage = engine.Process(img)) { var recognizedText = recognizedPage.GetText(); Console.WriteLine($"Mean confidence for page #{i}: {recognizedPage.GetMeanConfidence()}"); documentText.Append(recognizedText); } } } } const string Result = "result.txt"; using (var writer = new StreamWriter(Result)) writer.Write(documentText.ToString()); Process.Start(Result); } }
private static IEnumerable <RecognizedTextChunk> recognizeWords(PdfPage page, TesseractEngine engine, int resolution, string tempFileName) { // Save PDF page as high-resolution image PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); options.HorizontalResolution = resolution; options.VerticalResolution = resolution; page.Save(tempFileName, options); using (var img = Pix.LoadFromFile(tempFileName)) { using (var recognizedPage = engine.Process(img)) { using (ResultIterator iter = recognizedPage.GetIterator()) { const PageIteratorLevel Level = PageIteratorLevel.Word; iter.Begin(); do { if (iter.TryGetBoundingBox(Level, out Rect bounds)) { string text = iter.GetText(Level); float confidence = iter.GetConfidence(Level); yield return(new RecognizedTextChunk(text, bounds, confidence)); } } while (iter.Next(Level)); } } } }
public static void Main() { // NOTE: // When used in trial mode, the library imposes some restrictions. // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx // for more information. // In order to receive log messages from Docotic.Pdf into a log4net logger, // you would need to configure log4net. Here is a simplest one-line // way to configure it. You might use any other way described in the docs // https://logging.apache.org/log4net/release/manual/configuration.html log4net.Config.XmlConfigurator.Configure(); // The above line configures log4net using properties from app.config file. // Take a look into the app.config file, it contains more comments. // After log4net is configured, there is nothing else to do, the library // will put its log messages into the configured loggers. // The following code should produce log messages in console and in // log-file.txt file next to application's exe file. using (PdfDocument pdf = new PdfDocument(@"..\Sample Data\Attachments.pdf")) { using (MemoryStream ms = new MemoryStream()) pdf.Pages[0].Save(ms, PdfDrawOptions.Create()); } }
/// <summary> /// Распознать текст нечитаемого PDF-файла. /// </summary> /// <param name="page">Нечитаемая PDF-страница.</param> /// <returns>Распознанный текст со страницы.</returns> private static string RecognizePageText(PdfPage page) { var options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); options.HorizontalResolution = 200; options.VerticalResolution = 200; using var memoryStream = new MemoryStream(); page.Save(memoryStream, options); using var engine = new TesseractEngine(@"tessdata\fast", "rus+eng", EngineMode.LstmOnly); using var img = Pix.LoadFromMemory(memoryStream.GetBuffer()); using var recognizedPage = engine.Process(img); return(recognizedPage.GetText()); }
private void printDocument_PrintPage(object sender, PrintPageEventArgs e) { Graphics gr = e.Graphics; using (var stream = new MemoryStream()) { PdfDrawOptions options = PdfDrawOptions.Create(); options.HorizontalResolution = gr.DPI; options.VerticalResolution = gr.DPI; PdfPage page = m_pdf.Pages[m_printDocument.PrintSettings.SelectedPageRange.Start - 1 + e.CurrentPage]; page.Save(stream, options); stream.Position = 0; using (var bitmap = new Bitmap(stream)) gr.DrawImage(bitmap, 0, 0); } }
public static void PDFToImage(string path, string savePath, string extension) { // replace string.Empty with your license key LicenseManager.AddLicenseData(LicenceKey); using (PdfDocument pdf = new PdfDocument(path)) { PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); options.Compression = ImageCompressionOptions.CreateJpeg(); var index = 1; foreach (var pdfPage in pdf.Pages) { var fileName = savePath + "-" + index++ + extension; pdfPage.Save(fileName, options); } } }
static void Process(string file) { using (var pdfDocumentStream = File.OpenRead(file)) { var document = new PdfDocument(pdfDocumentStream); PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); var images = document.GetImages(); int index = 0; foreach (var page in document.Pages) { index++; string imagePath = $@"C:\Users\goddi\Desktop\文档暂存\pdfimages\{ Path.GetFileName(file)}-{index}.jpg"; page.Save(new FileStream(imagePath, FileMode.Create), options); } } }
public static void Main() { // NOTE: // When used in trial mode, the library imposes some restrictions. // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx // for more information. string outputPath = "SaveAsTiff.tiff"; using (var pdf = new PdfDocument(@"Sample Data\jfif3.pdf")) { PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); pdf.SaveAsTiff(outputPath, options); } Process.Start(outputPath); }
public static void Main() { // NOTE: // When used in trial mode, the library imposes some restrictions. // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx // for more information. string outputPath = "SaveAsTiff.tiff"; using (PdfDocument pdf = new PdfDocument(@"..\Sample Data\jfif3.pdf")) { PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); pdf.SaveAsTiff(outputPath, options); } Console.WriteLine($"The output is located in {Environment.CurrentDirectory}"); }
public static void Main() { // NOTE: // When used in trial mode, the library imposes some restrictions. // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx // for more information. string pathToImage = "SavePageAsImage.jpg"; using (PdfDocument pdf = new PdfDocument(@"Sample Data\jfif3.pdf")) { PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); options.Compression = ImageCompressionOptions.CreateJpeg(); pdf.Pages[1].Save(pathToImage, options); } Process.Start(pathToImage); }
public static void Main() { // NOTE: // When used in trial mode, the library imposes some restrictions. // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx // for more information. string pathToImage = "SavePageCustomResolution.png"; using (PdfDocument pdf = new PdfDocument(@"Sample Data\gmail-cheat-sheet.pdf")) { PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); options.HorizontalResolution = 600; options.VerticalResolution = 600; pdf.Pages[0].Save(pathToImage, options); } Process.Start(pathToImage); }
public static void Main() { // NOTE: // When used in trial mode, the library imposes some restrictions. // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx // for more information. // In order to receive log messages from Docotic.Pdf into a NLog logger, // you would need to configure NLog. It is a common practice to put NLog // configuration into app.config file. // Take a look into the app.config file, it contains more comments. // After NLog is configured, there is nothing else to do, the library // will put its log messages into the configured loggers. // The following code should produce log messages in console and in // log-file.txt file next to application's exe file. using (PdfDocument pdf = new PdfDocument(@"..\Sample Data\Attachments.pdf")) { using (MemoryStream ms = new MemoryStream()) pdf.Pages[0].Save(ms, PdfDrawOptions.Create()); } }
private async Task <Stream> Inner() { var filename = "我的简历.pdf"; using (var pdfDocumentStream = File.OpenRead(filename)) { var document = new PdfDocument(pdfDocumentStream); var page = document.Pages[0]; PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); options.Compression = ImageCompressionOptions.CreateJpeg(); double resolutionRate = page.Resolution / 200; options.HorizontalResolution = 200; options.VerticalResolution = 200; var memoryStream = new MemoryStream(); { page.Save(memoryStream, options); } memoryStream.Seek(0, SeekOrigin.Begin); return(await Task.FromResult(memoryStream)); } }
public static async Task <List <Stream> > PDFToImage(string path) { // replace string.Empty with your license key LicenseManager.AddLicenseData(LicenceKey); return(await Task.Run(() => { var streamList = new List <Stream>(); using (PdfDocument pdf = new PdfDocument(path)) { PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); options.Compression = ImageCompressionOptions.CreateJpeg(); foreach (var page in pdf.Pages) { var outputStream = new MemoryStream(); page.Save(outputStream, options); streamList.Add(outputStream); } } return streamList; })); }
public static void Main() { // NOTE: // When used in trial mode, the library imposes some restrictions. // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx // for more information. var documentText = new StringBuilder(); using (var pdf = new PdfDocument(@"..\Sample data\Freedman Scora.pdf")) { var location = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); var tessData = Path.Combine(location, @"tessdata"); using (var engine = new TesseractEngine(tessData, "eng", EngineMode.LstmOnly)) { for (int i = 0; i < pdf.PageCount; ++i) { if (documentText.Length > 0) { documentText.Append("\r\n\r\n"); } PdfPage page = pdf.Pages[i]; string searchableText = page.GetText(); // Simple check if the page contains searchable text. // We do not need to perform OCR in that case. if (!string.IsNullOrEmpty(searchableText.Trim())) { documentText.Append(searchableText); continue; } // This page is not searchable. // Save PDF page as a high-resolution image. PdfDrawOptions options = PdfDrawOptions.Create(); options.BackgroundColor = new PdfRgbColor(255, 255, 255); options.HorizontalResolution = 200; options.VerticalResolution = 200; string pageImage = $"page_{i}.png"; page.Save(pageImage, options); // Perform OCR using (Pix img = Pix.LoadFromFile(pageImage)) { using (Page recognizedPage = engine.Process(img)) { Console.WriteLine($"Mean confidence for page #{i}: {recognizedPage.GetMeanConfidence()}"); string recognizedText = recognizedPage.GetText(); documentText.Append(recognizedText); } } File.Delete(pageImage); } } } const string Result = "result.txt"; using (var writer = new StreamWriter(Result)) writer.Write(documentText.ToString()); Console.WriteLine($"The output is located in {Environment.CurrentDirectory}"); }