Beispiel #1
0
        private static long TimeCutPage(string fileName)
        {
            SautinSoft.PdfFocus f = new PdfFocus();
            long timeFor1         = 0;
            long timeFor2         = 0;

            f.OpenPdf(fileName);
            f.ImageOptions.Dpi = 320;
            pages = f.PageCount;
            sw.Start();
            f.ToImage(prefixFilePath, 1);
            sw.Stop();
            timeFor1 = sw.ElapsedMilliseconds;
            sw.Start();
            f.ToImage(prefixFilePath, 2);
            sw.Stop();
            timeFor2 = sw.ElapsedMilliseconds;
            long allTime = timeFor1 + ((timeFor2 - timeFor1) * (f.PageCount - 1));

            f.ClosePdf();
            timeForJpeg = allTime;
            sw.Reset();
            return(timeForJpeg);
        }
Beispiel #2
0
        private string OpenPDF(string path) //метод конвертирования PDF
        {
            //Создания экземпляра класса для конвертации pdf в jpg
            string   txtFile = DirectoryTemporary + "textOCR.txt";
            PdfFocus f       = new PdfFocus();

            f.OpenPdf(path);
            Console.WriteLine($"Колличество страниц: {f.PageCount}");
            if (f.PageCount > 0)
            {
                f.ImageOptions.ImageFormat = System.Drawing.Imaging.ImageFormat.Jpeg;
                f.ImageOptions.Dpi         = 300;

                //Деление каждой страницы pdf на изображения
                f.ToImage(DirectoryTemporary, "page");
            }
            using (FileStream fileStream = File.Open(txtFile, FileMode.Create))
            {
                using (StreamWriter strWrite = new StreamWriter(fileStream))
                {
                    for (int i = 1; i < f.PageCount + 1; i++)
                    {
                        string pathPage = DirectoryTemporary + $"page{i}.jpg";
                        try
                        {
                            using (TesseractEngine engine = new TesseractEngine(@"tessdata", "rus", EngineMode.Default))
                            {
                                using (var img = Pix.LoadFromFile(pathPage))
                                {
                                    using (var page = engine.Process(img))
                                    {
                                        Console.WriteLine("Качество скана: {0}", page.GetMeanConfidence());
                                        var text = page.GetText();
                                        strWrite.WriteLine($"{text}");
                                    }
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine("Error: {0}", ex.Message);
                        }
                    }
                }
            }
            return(txtFile);
        }
Beispiel #3
0
    /// <summary>
    /// 将PDF文档转换为图片的方法
    /// </summary>
    /// <param name="pdfInputPath">PDF文件路径</param>
    /// <param name="imageOutputPath">图片输出路径</param>
    /// <param name="imageName">生成图片的名字</param>
    /// <param name="startPageNum">从PDF文档的第几页开始转换</param>
    /// <param name="endPageNum">从PDF文档的第几页开始停止转换</param>
    /// <param name="imageFormat">设置所需图片格式</param>
    /// <param name="definition">设置图片的清晰度,数字越大越清晰</param>
    public static void ConvertPDF2Image(string pdfInputPath, string imageOutputPath,
                                        string imageName, int startPageNum, int endPageNum, ImageFormat imageFormat, Definition definition)
    {
        PdfFocus pdfFocus = new PdfFocus();

        pdfFocus.OpenPdf(pdfInputPath);

        if (!Directory.Exists(imageOutputPath))
        {
            Directory.CreateDirectory(imageOutputPath);
        }

        // validate pageNum
        if (startPageNum <= 0)
        {
            startPageNum = 1;
        }

        if (endPageNum > pdfFocus.PageCount)
        {
            endPageNum = pdfFocus.PageCount;
        }

        if (startPageNum > endPageNum)
        {
            int tempPageNum = startPageNum;
            startPageNum = endPageNum;
            endPageNum   = startPageNum;
        }

        // start to convert each page
        for (int i = startPageNum; i <= endPageNum; i++)
        {
            byte[] img = pdfFocus.ToImage(i);
            using (FileStream fs1 = File.Create(imageOutputPath + imageName + i.ToString() + "." + imageFormat.ToString()))
            {
                fs1.Write(img, 0, img.Length);
            }
        }

        pdfFocus.ClosePdf();
    }
Beispiel #4
0
        public static void ConvertToImg(string path, ref ProgressBar progressBar, ref double time, ref Label label)
        {
            //Stopwatch sw = new Stopwatch();
            // sw.Start();
            SautinSoft.PdfFocus f = new PdfFocus();
            f.OpenPdf(path);
            int    pageCount      = f.PageCount;
            string prefixFilePath = "PDF\\";

            f.ImageOptions.Dpi = 320;
            Stopwatch sw = new Stopwatch();

            Cutter.setMaximumAndStep(progressBar, f.PageCount * 2);

            for (int i = 1; i <= f.PageCount; i++)
            {
                sw.Start();
                f.ToImage(prefixFilePath + i.ToString() + ".jpeg", i);
                sw.Stop();
                time -= sw.ElapsedMilliseconds;
                TimeCalc.MinuteSeconds(time, label);
                sw.Reset();
            }

            f.ClosePdf();

            for (int i = 1; i <= pageCount; i++)
            {
                sw.Start();
                Cutter.Cut(prefixFilePath + i.ToString() + ".jpeg", i, progressBar);
                sw.Stop();
                time -= sw.ElapsedMilliseconds;
                TimeCalc.MinuteSeconds(time, label);
                sw.Reset();
            }
        }