private static long TimeCutPage(string fileName) { SautinSoft.PdfFocus f = new PdfFocus(); long timeFor1 = 0; long timeFor2 = 0; f.OpenPdf(fileName); f.ImageOptions.Dpi = 320; pages = f.PageCount; sw.Start(); f.ToImage(prefixFilePath, 1); sw.Stop(); timeFor1 = sw.ElapsedMilliseconds; sw.Start(); f.ToImage(prefixFilePath, 2); sw.Stop(); timeFor2 = sw.ElapsedMilliseconds; long allTime = timeFor1 + ((timeFor2 - timeFor1) * (f.PageCount - 1)); f.ClosePdf(); timeForJpeg = allTime; sw.Reset(); return(timeForJpeg); }
private string OpenPDF(string path) //метод конвертирования PDF { //Создания экземпляра класса для конвертации pdf в jpg string txtFile = DirectoryTemporary + "textOCR.txt"; PdfFocus f = new PdfFocus(); f.OpenPdf(path); Console.WriteLine($"Колличество страниц: {f.PageCount}"); if (f.PageCount > 0) { f.ImageOptions.ImageFormat = System.Drawing.Imaging.ImageFormat.Jpeg; f.ImageOptions.Dpi = 300; //Деление каждой страницы pdf на изображения f.ToImage(DirectoryTemporary, "page"); } using (FileStream fileStream = File.Open(txtFile, FileMode.Create)) { using (StreamWriter strWrite = new StreamWriter(fileStream)) { for (int i = 1; i < f.PageCount + 1; i++) { string pathPage = DirectoryTemporary + $"page{i}.jpg"; try { using (TesseractEngine engine = new TesseractEngine(@"tessdata", "rus", EngineMode.Default)) { using (var img = Pix.LoadFromFile(pathPage)) { using (var page = engine.Process(img)) { Console.WriteLine("Качество скана: {0}", page.GetMeanConfidence()); var text = page.GetText(); strWrite.WriteLine($"{text}"); } } } } catch (Exception ex) { Console.WriteLine("Error: {0}", ex.Message); } } } } return(txtFile); }
/// <summary> /// 将PDF文档转换为图片的方法 /// </summary> /// <param name="pdfInputPath">PDF文件路径</param> /// <param name="imageOutputPath">图片输出路径</param> /// <param name="imageName">生成图片的名字</param> /// <param name="startPageNum">从PDF文档的第几页开始转换</param> /// <param name="endPageNum">从PDF文档的第几页开始停止转换</param> /// <param name="imageFormat">设置所需图片格式</param> /// <param name="definition">设置图片的清晰度,数字越大越清晰</param> public static void ConvertPDF2Image(string pdfInputPath, string imageOutputPath, string imageName, int startPageNum, int endPageNum, ImageFormat imageFormat, Definition definition) { PdfFocus pdfFocus = new PdfFocus(); pdfFocus.OpenPdf(pdfInputPath); if (!Directory.Exists(imageOutputPath)) { Directory.CreateDirectory(imageOutputPath); } // validate pageNum if (startPageNum <= 0) { startPageNum = 1; } if (endPageNum > pdfFocus.PageCount) { endPageNum = pdfFocus.PageCount; } if (startPageNum > endPageNum) { int tempPageNum = startPageNum; startPageNum = endPageNum; endPageNum = startPageNum; } // start to convert each page for (int i = startPageNum; i <= endPageNum; i++) { byte[] img = pdfFocus.ToImage(i); using (FileStream fs1 = File.Create(imageOutputPath + imageName + i.ToString() + "." + imageFormat.ToString())) { fs1.Write(img, 0, img.Length); } } pdfFocus.ClosePdf(); }
public static void ConvertToImg(string path, ref ProgressBar progressBar, ref double time, ref Label label) { //Stopwatch sw = new Stopwatch(); // sw.Start(); SautinSoft.PdfFocus f = new PdfFocus(); f.OpenPdf(path); int pageCount = f.PageCount; string prefixFilePath = "PDF\\"; f.ImageOptions.Dpi = 320; Stopwatch sw = new Stopwatch(); Cutter.setMaximumAndStep(progressBar, f.PageCount * 2); for (int i = 1; i <= f.PageCount; i++) { sw.Start(); f.ToImage(prefixFilePath + i.ToString() + ".jpeg", i); sw.Stop(); time -= sw.ElapsedMilliseconds; TimeCalc.MinuteSeconds(time, label); sw.Reset(); } f.ClosePdf(); for (int i = 1; i <= pageCount; i++) { sw.Start(); Cutter.Cut(prefixFilePath + i.ToString() + ".jpeg", i, progressBar); sw.Stop(); time -= sw.ElapsedMilliseconds; TimeCalc.MinuteSeconds(time, label); sw.Reset(); } }