Exemple #1
0
        private void btn_BrowserPDF_Click(object sender, EventArgs e)
        {
            if (string.IsNullOrEmpty(txt_BatchName.Text))
            {
                MessageBox.Show("Vui lòng điền tên batch", "Lỗi", MessageBoxButtons.OK, MessageBoxIcon.Warning);
                return;
            }

            OpenFileDialog dlg = new OpenFileDialog();

            dlg.Filter = "Types PDF|*.pdf";

            dlg.Multiselect = false;

            if (dlg.ShowDialog() == DialogResult.OK)
            {
                _lFileNames        = dlg.FileNames;
                txt_ImagePath.Text = Path.GetFullPath(dlg.FileName);
            }
            var f = new PdfFocus {
                Serial = "1234567890"
            };
            string pdfFile = txt_ImagePath.Text;

            f.OpenPdf(pdfFile);
            TongSoTrang   = f.PageCount;
            lbl_Page.Text = TongSoTrang + " Pages";
        }
        public static bool FindTextInPdfFile(string fileFullPath, string text, ref CancellationTokenSource cts)
        {
            var pdfFocus = new PdfFocus();

            try
            {
                pdfFocus.OpenPdf(fileFullPath);
                if (pdfFocus.PageCount > 0)
                {
                    for (var i = 1; i < pdfFocus.PageCount + 1; i++)
                    {
                        if (cts.IsCancellationRequested)
                        {
                            break;
                        }
                        if (pdfFocus.ToText(i, i).IndexOf(text, StringComparison.OrdinalIgnoreCase) >= 0)
                        {
                            pdfFocus.ClosePdf();
                            return(true);
                        }
                    }
                }
            }
            catch (Exception)
            {
                return(false);
            }
            finally
            {
                pdfFocus.ClosePdf();
            }

            return(false);
        }
        /// <summary>
        /// pdf生成xml
        /// </summary>
        /// <param name="pdfFile"></param>
        /// <returns></returns>
        public static bool PdfToXMLAsFiles(string pdfFile)
        {
            try
            {
                string pathToXml = Path.ChangeExtension(pdfFile, ".xml");
                // Convert PDF file to XML file.
                PdfFocus f = new PdfFocus();

                // This property is necessary only for registered version.
                // f.Serial = "XXXXXXXXXXX";

                // Let's convert only tables to XML and skip all textual data.
                f.XmlOptions.ConvertNonTabularDataToSpreadsheet = false;

                f.OpenPdf(pdfFile);

                if (f.PageCount > 0)
                {
                    int result = f.ToXml(pathToXml);
                    if (result == 0)
                    {
                        //Show XML document in browser 选择直接打开
                        // Process.Start(new ProcessStartInfo(pathToXml) { UseShellExecute = true });
                        return(true);
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
            }
            return(false);
        }
Exemple #4
0
        /// <summary>
        /// pdf生成excel
        /// </summary>
        /// <param name="pdfFile"></param>
        /// <returns></returns>
        public static void PdfToExcelAsFiles(string pdfFile)
        {
            try
            {
                string   excelFile = Path.ChangeExtension(pdfFile, ".xls");
                PdfFocus f         = new PdfFocus();
                // 'true' = Convert all data to spreadsheet (tabular and even textual).
                // 'false' = Skip textual data and convert only tabular (tables) data.
                f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = false;

                // 'true'  = Preserve original page layout.
                // 'false' = Place tables before text.
                f.ExcelOptions.PreservePageLayout = true;

                f.OpenPdf(pdfFile);

                if (f.PageCount > 0)
                {
                    f.ToExcel(excelFile);
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
            }
        }
    protected void Upload(object sender, EventArgs e)
    {
        lblMessage.Visible = true;
        PdfFocus f = new PdfFocus();

        f.OpenPdf(uplPDF.FileBytes);
        Session["focus"] = f;
        ShowPdf();
    }
Exemple #6
0
        public static async Task ConvertPDFtoWord(TelegramBotClient bot, Message msg)
        {
            if (msg.Document.FileName.Contains(".pdf"))
            {
                string path    = String.Format(@"TelegramBotIQ Users File\Convert\{0}.pdf", msg.Document.FileName);
                string file_id = msg.Document.FileId;

                try
                {
                    using (var filestream = System.IO.File.OpenWrite(path))
                    {
                        var filedowload = await bot.GetInfoAndDownloadFileAsync(
                            fileId : file_id,
                            destination : filestream
                            );
                    }
                }

                catch (Exception ex)
                {
                    Console.WriteLine($"Error: {ex}");
                    await bot.SendTextMessageAsync(msg.Chat.Id, "Возникла ошибка :(" + Environment.NewLine +
                                                   "Не беспокойтесь, разрабу дадим по шапке");
                }


                PdfFocus f = new PdfFocus();

                string pathDocx = String.Format(@"TelegramBotIQ Users File\Convert\{0}.docx", msg.Document.FileName);

                f.OpenPdf(path);

                if (f.PageCount > 0)
                {
                    await bot.SendTextMessageAsync(msg.Chat.Id, "Подождите, идет конвертирование!");

                    f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx;
                    f.ToWord(pathDocx);
                }

                FileStream fileStream = System.IO.File.OpenRead(pathDocx);

                InputOnlineFile file = new InputOnlineFile(fileStream);

                await bot.SendDocumentAsync(msg.Chat.Id, file, caption : "Конвертирование завершено!");
            }
            else
            {
                await bot.SendTextMessageAsync(msg.Chat.Id, "Файл имеет не верный формат");

                return;
            }
        }
        /// <summary>
        ///     Return pdf page as image
        /// </summary>
        /// <param name="filename">filename of pdf</param>
        /// <param name="page">page of pdf</param>
        /// <returns>Pdf page as a png image</returns>
        public static Image GetPdfPageAsImage(string filename, int page)
        {
            var testDataDirectory = AppSettings.Get("TestDataDirectory");
            var f = new PdfFocus();

            f.OpenPdf($"{testDataDirectory}{filename}");
            if (f.PageCount > page)
            {
                f.ImageOptions.Dpi = 300;
                return(f.ToDrawingImage(page));
            }
            return(null);
        }
        public string ConvertPDFtoWord(string FilePath)
        {
            string   SaveToFileLocation = Properties.Settings.Default.FolderPath + "PdfToWrod.docx";
            PdfFocus pdfFocus           = new PdfFocus();

            PdfDocument pdfDoc = new PdfDocument();

            pdfDoc.LoadFromFile(FilePath);
            pdfDoc.SaveToFile(SaveToFileLocation, FileFormat.DOC);
            //System.Diagnostics.Process.Start(SaveToFileLocation);

            return(SaveToFileLocation);
        }
    protected bool IsPageInRange(int page)
    {
        if (Session["focus"] != null)
        {
            PdfFocus f = (PdfFocus)Session["focus"];

            if (page > 0 && page <= f.PageCount)
            {
                return(true);
            }
        }
        return(false);
    }
        public Stream getWordStream(Stream file)
        {
            PdfFocus f = new PdfFocus();

            f.OpenPdf(file);
            byte[] docx = null;

            if (f.PageCount > 0)
            {
                docx = f.ToWord();
            }
            Stream stream = new MemoryStream(docx);

            return(stream);
        }
        public PDFParser(string PDFLocation, int FromPage, int ToPage)
        {
            DirLocation = PDFLocation.Substring(0, PDFLocation.LastIndexOf("\\"));

            if (File.Exists(Path.Combine(DirLocation, "final.bin")))
            {
                Console.WriteLine("Loading Existing Table");
                LoadTable();
                Console.WriteLine("Table Loaded");
                return;
            }

            Console.WriteLine("Loading Parsers");

            pdfFocus = new PdfFocus();
            pdfFocus.OpenPdf(PDFLocation);

            xlApp = new Application();

            if (xlApp == null)
            {
                throw new Exception("Excel not installed!!");
            }


            // Creating Directories
            string loc1 = Path.Combine(DirLocation, "Excel Files");

            if (!Directory.Exists(loc1))
            {
                Directory.CreateDirectory(loc1);
            }


            // Initialize Pages
            CompleteList     = new Page[ToPage - FromPage + 1];
            Page.PDFLocation = PDFLocation;

            for (int i = FromPage; i <= ToPage; ++i)
            {
                CompleteList[i - FromPage] = new Page {
                    PageNumber = i
                }
            }
            ;

            Console.WriteLine("Parsers Loaded");
        }
        /// <summary>
        ///     Save pdf pages as images
        /// </summary>
        /// <param name="filename">filename of pdf to save as images</param>
        public static void SavePdfToImage(string filename)
        {
            var testDataDirectory = AppSettings.Get("TestDataDirectory");
            var f = new PdfFocus();

            f.OpenPdf($"{testDataDirectory}{filename}");
            if (f.PageCount > 0)
            {
                f.ImageOptions.Dpi = 300;
                for (var p = 1; p < f.PageCount + 1; p = p + 1)
                {
                    var img = f.ToDrawingImage(p);
                    img.Save($"{testDataDirectory}{filename}.{p}.png");
                }
            }
        }
Exemple #13
0
        /// <summary>
        /// Converts multiple PDF files to HTML files.
        /// </summary>
        static void ConvertMultiplePdfToHtmls()
        {
            // Directory with *.pdf files.
            string pdfDirectory = Path.GetFullPath(@"..\..\..\..\..\");

            string[] pdfFiles = Directory.GetFiles(pdfDirectory, "*.pdf");

            PdfFocus f = new PdfFocus();

            // After purchasing the license, please insert your serial number here to activate the component:
            //f.Serial = "123456789";

            int success = 0;
            int total   = 0;

            foreach (string pdfFile in pdfFiles)
            {
                Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile));

                f.OpenPdf(pdfFile);
                total++;

                if (f.PageCount > 0)
                {
                    // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder".
                    f.HtmlOptions.ImageFolder = Path.GetDirectoryName(pdfFile);
                    // A folder (will be created by the component) without any drive letters, only the folder as "myfolder".
                    f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));
                    // We recommend to use PNG type for storing images.
                    f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png;
                    // How to store images: Inside HTML document as base64 images or as linked separate image files.
                    f.HtmlOptions.IncludeImageInHtml = false;

                    string resultFile = Path.ChangeExtension(pdfFile, ".html");
                    if (f.ToHtml(resultFile) == 0)
                    {
                        success++;
                    }
                }
            }
            // Show results:
            Console.WriteLine("{0} of {1} files converted successfully!", success, total);
            Console.WriteLine("Press any key ...");
            Console.ReadLine();
            // Open folder with HTML files after converting.
            System.Diagnostics.Process.Start(pdfDirectory);
        }
Exemple #14
0
        private string OpenPDF(string path) //метод конвертирования PDF
        {
            //Создания экземпляра класса для конвертации pdf в jpg
            string   txtFile = DirectoryTemporary + "textOCR.txt";
            PdfFocus f       = new PdfFocus();

            f.OpenPdf(path);
            Console.WriteLine($"Колличество страниц: {f.PageCount}");
            if (f.PageCount > 0)
            {
                f.ImageOptions.ImageFormat = System.Drawing.Imaging.ImageFormat.Jpeg;
                f.ImageOptions.Dpi         = 300;

                //Деление каждой страницы pdf на изображения
                f.ToImage(DirectoryTemporary, "page");
            }
            using (FileStream fileStream = File.Open(txtFile, FileMode.Create))
            {
                using (StreamWriter strWrite = new StreamWriter(fileStream))
                {
                    for (int i = 1; i < f.PageCount + 1; i++)
                    {
                        string pathPage = DirectoryTemporary + $"page{i}.jpg";
                        try
                        {
                            using (TesseractEngine engine = new TesseractEngine(@"tessdata", "rus", EngineMode.Default))
                            {
                                using (var img = Pix.LoadFromFile(pathPage))
                                {
                                    using (var page = engine.Process(img))
                                    {
                                        Console.WriteLine("Качество скана: {0}", page.GetMeanConfidence());
                                        var text = page.GetText();
                                        strWrite.WriteLine($"{text}");
                                    }
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine("Error: {0}", ex.Message);
                        }
                    }
                }
            }
            return(txtFile);
        }
Exemple #15
0
        /// <summary>
        /// Load a scanned PDF document with help of Tesseract OCR (free OCR library) and save the result as DOCX document.
        /// </summary>
        static void LoadScannedPdf()
        {
            // Here we'll load a scanned PDF document (perform OCR) containing a text on English, Russian and Vietnamese.
            // Next save the OCR result as a new DOCX document.

            // First steps:

            // 1. Download data files for English, Russian and Vietnamese languages.
            // Please download the files: eng.traineddata, rus.traineddata and vie.traineddata.
            // From here (good and fast): https://github.com/tesseract-ocr/tessdata_fast
            // or (best and slow): https://github.com/tesseract-ocr/tessdata_best

            // 2. Copy the files: eng.traineddata, rus.traineddata and vie.traineddata to
            // the folder "tessdata" in the Project root.

            // 3. Be sure that the folder "tessdata" also contains "pdf.ttf" file.

            // Let's start:
            string inpFile = @"..\..\scan.pdf";
            string outFile = "Result.docx";

            PdfFocus f = new PdfFocus();

            f.OCROptions.Mode    = PdfFocus.COCROptions.eOCRMode.AllImages;
            f.OCROptions.Method += PerformOCRTesseract;

            f.OpenPdf(inpFile);
            bool result = false;

            if (f.PageCount > 0)
            {
                result = f.ToWord(outFile) == 0;
            }
            // Open the result for demonstration purposes.
            if (result)
            {
                System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
                {
                    UseShellExecute = true
                });
            }
            else
            {
                Console.WriteLine("Conversion failed!");
            }
        }
 private void UploadAndConvert()
 {
     openWordDoc.Title  = "Open PDF file to convert";
     openWordDoc.Filter = "PDF Files|*.pdf";
     if (openWordDoc.ShowDialog() == DialogResult.OK)
     {
         PdfFocus pdfFocus = new PdfFocus();
         pdfFocus.OpenPdf(openWordDoc.FileName);
         saveWordDoc.Title  = "Save Doc file";
         saveWordDoc.Filter = "Word Files|*.docx";
         if (saveWordDoc.ShowDialog() == DialogResult.OK)
         {
             pdfFocus.ToWord(saveWordDoc.FileName);
             MessageBox.Show("Converted Successfully", "Info", MessageBoxButtons.OK, MessageBoxIcon.Information);
         }
     }
 }
Exemple #17
0
        private void ConvertToTextPdfFocusButton_Click(object sender, EventArgs e)
        {
            var pdf       = new PdfFocus();
            var stopwatch = new Stopwatch();

            stopwatch.Start();
            pdf.OpenPdf(new MemoryStream(_binaryPdfFile));
            stopwatch.Stop();
            PdfFocusStatsDetails.Text = $"Load Pdf Binary: {stopwatch.Elapsed.Seconds} seconds\r\n\r\n";
            stopwatch.Reset();
            stopwatch.Start();
            var text = pdf.ToText();

            stopwatch.Stop();
            PdfFocusStatsDetails.Text += $"Convert to Text: {stopwatch.Elapsed.Seconds} seconds";

            ExtractedTextTextBox.Text = text;
            File.WriteAllText(@"C:\Base64\PdfFocus.txt", text);
        }
    private void ShowPdf()
    {
        if (Session["focus"] != null)
        {
            PdfFocus f = (PdfFocus)Session["focus"];

            if (f.PageCount > 0)
            {
                f.HtmlOptions.IncludeImageInHtml = true;
                f.EmbeddedImagesFormat           = PdfFocus.eImageFormat.Png;

                int page = (int)Session["page"];

                string html = f.ToHtml(page, page);
                htmlLiteral.Text = html;
                txtPage.Text     = String.Format("Page {0} of {1}", page, f.PageCount);
            }
        }
    }
Exemple #19
0
    /// <summary>
    /// 将PDF文档转换为图片的方法
    /// </summary>
    /// <param name="pdfInputPath">PDF文件路径</param>
    /// <param name="imageOutputPath">图片输出路径</param>
    /// <param name="imageName">生成图片的名字</param>
    /// <param name="startPageNum">从PDF文档的第几页开始转换</param>
    /// <param name="endPageNum">从PDF文档的第几页开始停止转换</param>
    /// <param name="imageFormat">设置所需图片格式</param>
    /// <param name="definition">设置图片的清晰度,数字越大越清晰</param>
    public static void ConvertPDF2Image(string pdfInputPath, string imageOutputPath,
                                        string imageName, int startPageNum, int endPageNum, ImageFormat imageFormat, Definition definition)
    {
        PdfFocus pdfFocus = new PdfFocus();

        pdfFocus.OpenPdf(pdfInputPath);

        if (!Directory.Exists(imageOutputPath))
        {
            Directory.CreateDirectory(imageOutputPath);
        }

        // validate pageNum
        if (startPageNum <= 0)
        {
            startPageNum = 1;
        }

        if (endPageNum > pdfFocus.PageCount)
        {
            endPageNum = pdfFocus.PageCount;
        }

        if (startPageNum > endPageNum)
        {
            int tempPageNum = startPageNum;
            startPageNum = endPageNum;
            endPageNum   = startPageNum;
        }

        // start to convert each page
        for (int i = startPageNum; i <= endPageNum; i++)
        {
            byte[] img = pdfFocus.ToImage(i);
            using (FileStream fs1 = File.Create(imageOutputPath + imageName + i.ToString() + "." + imageFormat.ToString()))
            {
                fs1.Write(img, 0, img.Length);
            }
        }

        pdfFocus.ClosePdf();
    }
Exemple #20
0
 public static bool PdfToHtml(this FileInfo file, DirectoryInfo writeToDirectory, ILog logger, out FileInfo resultFile)
 {
     resultFile = null;
     try
     {
         PdfFocus pdf = new PdfFocus();
         pdf.OpenPdf(file.FullName);
         string fileName = $"{Path.GetFileNameWithoutExtension(file.FullName)}.html";
         resultFile = new FileInfo(Path.Combine(writeToDirectory.FullName, fileName));
         if (resultFile.Exists)
         {
             resultFile = new FileInfo(resultFile.FullName.GetNextFileName());
         }
         pdf.ToHtml(resultFile.FullName);
         return(true);
     }
     catch (Exception ex)
     {
         logger.Error("An exception occurred: {0}\r\n{1}", ex.Message, ex.StackTrace);
         return(false);
     }
 }
Exemple #21
0
        private static Dictionary <string, string> GetFileContentsThroughSautin(string[] files)
        {
            Dictionary <string, string> contents = new Dictionary <string, string>();

            foreach (var file in files)
            {
                string content = string.Empty;

                PdfFocus f = new PdfFocus();
                f.XmlOptions.ConvertNonTabularDataToSpreadsheet = true;

                f.OpenPdf(file);

                if (f.PageCount > 0)
                {
                    content = f.ToXml();
                }

                contents.Add(file, content);
            }

            return(contents);
        }
Exemple #22
0
        public static void ConvertToImg(string path, ref ProgressBar progressBar, ref double time, ref Label label)
        {
            //Stopwatch sw = new Stopwatch();
            // sw.Start();
            SautinSoft.PdfFocus f = new PdfFocus();
            f.OpenPdf(path);
            int    pageCount      = f.PageCount;
            string prefixFilePath = "PDF\\";

            f.ImageOptions.Dpi = 320;
            Stopwatch sw = new Stopwatch();

            Cutter.setMaximumAndStep(progressBar, f.PageCount * 2);

            for (int i = 1; i <= f.PageCount; i++)
            {
                sw.Start();
                f.ToImage(prefixFilePath + i.ToString() + ".jpeg", i);
                sw.Stop();
                time -= sw.ElapsedMilliseconds;
                TimeCalc.MinuteSeconds(time, label);
                sw.Reset();
            }

            f.ClosePdf();

            for (int i = 1; i <= pageCount; i++)
            {
                sw.Start();
                Cutter.Cut(prefixFilePath + i.ToString() + ".jpeg", i, progressBar);
                sw.Stop();
                time -= sw.ElapsedMilliseconds;
                TimeCalc.MinuteSeconds(time, label);
                sw.Reset();
            }
        }
Exemple #23
0
        private static long TimeCutPage(string fileName)
        {
            SautinSoft.PdfFocus f = new PdfFocus();
            long timeFor1         = 0;
            long timeFor2         = 0;

            f.OpenPdf(fileName);
            f.ImageOptions.Dpi = 320;
            pages = f.PageCount;
            sw.Start();
            f.ToImage(prefixFilePath, 1);
            sw.Stop();
            timeFor1 = sw.ElapsedMilliseconds;
            sw.Start();
            f.ToImage(prefixFilePath, 2);
            sw.Stop();
            timeFor2 = sw.ElapsedMilliseconds;
            long allTime = timeFor1 + ((timeFor2 - timeFor1) * (f.PageCount - 1));

            f.ClosePdf();
            timeForJpeg = allTime;
            sw.Reset();
            return(timeForJpeg);
        }
        public void ConvertPdfToExcel(string _PdfPath)
        {
            string _pathToExcel = System.IO.Path.ChangeExtension(_PdfPath, "xls");

            SautinSoft.PdfFocus fs = new PdfFocus();

            // 'true' = Convert all data to spreadsheet (tabular and even textual).
            // 'false' = Skip textual data and convert only tabular (tables) data.
            fs.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true;
            fs.ExcelOptions.PreservePageLayout = true;

            fs.OpenPdf(_PdfPath);

            if (fs.PageCount > 0)
            {
                int result = fs.ToExcel(_pathToExcel);

                //Open a produced Excel workbook
                if (result == 0)
                {
                    System.Diagnostics.Process.Start(_pathToExcel);
                }
            }
        }
Exemple #25
0
        /// <summary>
        /// Converts multiple PDF files into a single HTML document.
        /// </summary>
        static void ConvertMultiplePdfToSingleHtml()
        {
            // Directory with *.pdf files.
            string pdfDirectory = Path.GetFullPath(@"..\..\..\..\..\");
            string htmlFile     = Path.GetFullPath("Result.html");

            string[] pdfFiles = Directory.GetFiles(pdfDirectory, "*.pdf");

            // Here we'll keep our Html document.
            StringBuilder singleHtml = new StringBuilder();

            singleHtml.Append("<html><body>");


            PdfFocus f = new PdfFocus();
            //f.Serial = "123456789";

            int success = 0;
            int total   = 0;

            foreach (string pdfFile in pdfFiles)
            {
                Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile));

                f.OpenPdf(pdfFile);
                total++;

                if (f.PageCount > 0)
                {
                    // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder".
                    f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile);
                    // A folder (will be created by the component) without any drive letters, only the folder as "myfolder".
                    f.HtmlOptions.ImageSubFolder = "images";
                    // We recommend to use PNG type for storing images.
                    f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png;
                    // How to store images: Inside HTML document as base64 images or as linked separate image files.
                    f.HtmlOptions.IncludeImageInHtml = false;
                    // Let's make our CSS inline to be able merge HTML documents without any problems.
                    f.HtmlOptions.InlineCSS = true;
                    // We need only contents of <body>...</body>.
                    f.HtmlOptions.ProduceOnlyHtmlBody = true;

                    string tempHtml = f.ToHtml();

                    if (!String.IsNullOrEmpty(tempHtml))
                    {
                        success++;
                        // Add tempHtml into a single HTML.
                        singleHtml.Append(tempHtml);
                    }
                }
            }
            singleHtml.Append("</body></html>");

            // Show results:
            File.WriteAllText(htmlFile, singleHtml.ToString());

            Console.WriteLine("{0} of {1} files converted and merged into {2}!", success, total, Path.GetFileName(htmlFile));
            Console.WriteLine("Press any key ...");
            Console.ReadLine();
            // Open our single HTML document.
            System.Diagnostics.Process.Start(htmlFile);
        }
Exemple #26
0
        /// <summary>
        /// How to convert all files to a single XLS file.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-convert-pdf-docx-rtf-to-single-xls-workbook-net-csharp-vb.php
        /// </remarks>
        public static void ConvertToSingleXls()
        {
            // In this example we'll use not only Document .Net component, but also
            // another SautinSoft 'component - PDF Focus .Net (to perform conversion from PDF to single xls workbook).
            // First of all, please perform "Rebuild Solution" to restore PDF Focus .Net package from NuGet.

            // Our steps:
            // 1. Convert all RTF, DOCX, PDF files into a single PDF document. (by Document .Net).
            // 2. Convert the single PDF into a single XLS workbook. (by PDF Focus .Net).

            byte[] singlePdfBytes = null;

            // This file we need only to show intermediate result.
            string singlePdfFile = "Single.pdf";
            string workingDir    = @"..\..\";
            string singleXlsFile = "Single.xls";

            List <string> supportedFiles = new List <string>();

            foreach (string file in Directory.GetFiles(workingDir, "*.*"))
            {
                string ext = Path.GetExtension(file).ToLower();

                if (ext == ".pdf" || ext == ".docx" || ext == ".rtf")
                {
                    supportedFiles.Add(file);
                }
            }

            // Create single pdf.
            DocumentCore singlePDF = new DocumentCore();

            foreach (string file in supportedFiles)
            {
                DocumentCore dc = DocumentCore.Load(file);

                Console.WriteLine("Adding: {0}...", Path.GetFileName(file));

                // Create import session.
                ImportSession session = new ImportSession(dc, singlePDF, StyleImportingMode.KeepSourceFormatting);

                // Loop through all sections in the source document.
                foreach (Section sourceSection in dc.Sections)
                {
                    // Because we are copying a section from one document to another,
                    // it is required to import the Section into the destination document.
                    // This adjusts any document-specific references to styles, bookmarks, etc.
                    //
                    // Importing a element creates a copy of the original element, but the copy
                    // is ready to be inserted into the destination document.
                    Section importedSection = singlePDF.Import <Section>(sourceSection, true, session);

                    // First section start from new page.
                    if (dc.Sections.IndexOf(sourceSection) == 0)
                    {
                        importedSection.PageSetup.SectionStart = SectionStart.NewPage;
                    }

                    // Now the new section can be appended to the destination document.
                    singlePDF.Sections.Add(importedSection);
                }
            }

            // Save our single document into PDF format in memory.
            // Let's save our document to a MemoryStream.
            using (MemoryStream Pdf = new MemoryStream())
            {
                singlePDF.Save(Pdf, new PdfSaveOptions()
                {
                    Compliance = PdfCompliance.PDF_A1a
                });
                singlePdfBytes = Pdf.ToArray();
            }

            // Open the result for demonstration purposes.
            File.WriteAllBytes(singlePdfFile, singlePdfBytes);
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(singlePdfFile)
            {
                UseShellExecute = true
            });

            SautinSoft.PdfFocus f = new PdfFocus();

            f.OpenPdf(singlePdfBytes);

            if (f.PageCount > 0)
            {
                f.ToExcel(singleXlsFile);
            }

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(singleXlsFile)
            {
                UseShellExecute = true
            });
        }
        public ActionResult <bool> EvaluateStudents([FromBody] List <Evaluation> evaluations)
        {
            var dao = new EvaluationsDao();

            Directory.CreateDirectory(STUDENT_ANS_SHEET_UPLOAD_PATH);
            foreach (var e in evaluations)
            {
                Console.WriteLine("-------------------------------------------------------");
                Console.WriteLine($"Student Id {e.Student.Id}");
                Console.WriteLine($"DateTime {e.DateTime?.ToString("yyyy-MM-dd")}");
                var date     = e.DateTime?.ToString("yyyy_MM_dd");
                var fileName = $"AnswerKey_Student_{e.Student.Id}_{e.Examination.Course.CourseCode}_{date}.pdf";
                var filePath = Path.Combine(STUDENT_ANS_SHEET_UPLOAD_PATH, fileName);
                var bytes    = Convert.FromBase64String(e.AnswerSheet);
                System.IO.File.WriteAllBytes(filePath, bytes);
                Console.WriteLine("File Written");

                var refSheetName =
                    e.Examination.ReferenceAnswerSheet.Substring(
                        e.Examination.ReferenceAnswerSheet.LastIndexOf("/") + 1);

                var refSheetPath = Path.Combine(REFERENCE_ANS_SHEET_UPLOAD_PATH, refSheetName);
                Console.WriteLine($"Ref Sheet Path = {refSheetPath}");

                var studentPdf = new PdfFocus();
                Console.WriteLine($"Opening Student Pdf...");
                studentPdf.OpenPdf(filePath);
                var studentAnsText = studentPdf.ToText();
                Console.WriteLine("Student pdf read successfully!");
                studentPdf.ClosePdf();
                Console.WriteLine("Closing student pdf");

                var refPdf = new PdfFocus();
                Console.WriteLine("Opening reference pdf");
                refPdf.OpenPdf(refSheetPath);
                var refAnsText = refPdf.ToText();
                Console.WriteLine("Reference pdf read successfully!");
                refPdf.ClosePdf();
                Console.WriteLine("Closing reference pdf");

                paralleldots pd = new paralleldots("AliC73YnPPScR8dJJEMD8qxinhFTTUjFPmJGs5yknY0");
                Console.WriteLine("Calculating score");
                var similarity = pd.similarity(studentAnsText, refAnsText);

                var json  = JsonValue.Parse(similarity);
                var score = double.Parse(json["normalized_score"].ToString());
                Console.WriteLine($"Score = {score}");
                var percent = score / 5.0;
                Console.WriteLine($"Percentage = {percent}");

                e.MarksObtained = (int?)(e.Examination.TotalMarks * percent);

                e.AnswerSheet = $"/api/examinations/student-ans-sheet/get/{fileName}";

                Console.WriteLine("Writing result to database");
                dao.CreateEvaluation(e);
                Console.WriteLine("Written Successfully!");
            }

            return(true);
        }
Exemple #28
0
        /// <summary>
        /// Converts multiple PDF files into a single HTML document.
        /// </summary>
        static void ConvertMultiplePdfToSingleHtml()
        {
            // Directory with *.pdf files.
            string pdfDirectory = Path.GetFullPath(@"..\..\");
            string htmlFile     = "Result.html";

            string[] pdfFiles = Directory.GetFiles(pdfDirectory, "*.pdf");

            // Here we'll keep our Html document.
            StringBuilder singleHtml = new StringBuilder();

            singleHtml.Append("<html>\r\n<head>\r\n");
            singleHtml.Append(@"<meta http-equiv = ""Content-Type"" content=""text/html; charset=utf-8"" />");
            singleHtml.Append("\r\n</head>\r\n<body>");

            PdfFocus f = new PdfFocus();
            //f.Serial = "XXXXXXXXXXX";

            int success = 0;
            int total   = 0;

            foreach (string pdfFile in pdfFiles)
            {
                Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile));

                f.OpenPdf(pdfFile);
                total++;

                if (f.PageCount > 0)
                {
                    // How to store images: Inside HTML document as base64 images or as linked separate image files.
                    f.HtmlOptions.IncludeImageInHtml = false;

                    // Create own subfolder for each converted file to store images separately and don't mix up them.
                    f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));

                    // A template name for images
                    f.HtmlOptions.ImageFileName = "picture";

                    // Auto - the same image format as in the source PDF;
                    // 'Jpeg' to make the document size less;
                    // 'PNG' to keep the highest quality, but the highest size too.
                    f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto;

                    // Let's make our CSS inline to be able merge HTML documents without any problems.
                    f.HtmlOptions.InlineCSS = true;

                    // We need only contents of <body>...</body>.
                    f.HtmlOptions.ProduceOnlyHtmlBody = true;

                    string tempHtml = f.ToHtml();

                    if (!String.IsNullOrEmpty(tempHtml))
                    {
                        success++;
                        // Add tempHtml into a single HTML.
                        singleHtml.Append(tempHtml);
                    }
                }
            }
            singleHtml.Append("</body></html>");

            // Show results:
            File.WriteAllText(htmlFile, singleHtml.ToString());

            Console.WriteLine("{0} of {1} files converted and merged into {2}!", success, total, Path.GetFileName(htmlFile));

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile)
            {
                UseShellExecute = true
            });
        }
Exemple #29
0
        /// <summary>
        /// Converts multiple PDF files to HTML files.
        /// </summary>
        static void ConvertMultiplePdfToHtmls()
        {
            // Directory with *.pdf files.
            string pdfDirectory = Path.GetFullPath(@"..\..\");

            string[]      pdfFiles      = Directory.GetFiles(pdfDirectory, "*.pdf");
            DirectoryInfo htmlDirectory = new DirectoryInfo(@"htmls");

            if (!htmlDirectory.Exists)
            {
                htmlDirectory.Create();
            }

            PdfFocus f = new PdfFocus();
            // After purchasing the license, please insert your serial number here to activate the component:
            //f.Serial = "XXXXXXXXXXX";

            int success = 0;
            int total   = 0;

            foreach (string pdfFile in pdfFiles)
            {
                Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile));

                f.OpenPdf(pdfFile);
                total++;

                if (f.PageCount > 0)
                {
                    // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder".
                    f.HtmlOptions.ImageFolder = htmlDirectory.FullName;

                    // A folder (will be created by the component) without any drive letters, only the folder as "myfolder".
                    f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile));

                    // A template name for images
                    f.HtmlOptions.ImageFileName = "picture";

                    // Auto - the same image format as in the source PDF;
                    // 'Jpeg' to make the document size less;
                    // 'PNG' to keep the highest quality, but the highest size too.
                    f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto;

                    // How to store images: Inside HTML document as base64 images or as linked separate image files.
                    f.HtmlOptions.IncludeImageInHtml = false;

                    string htmlFile     = Path.GetFileNameWithoutExtension(pdfFile) + ".html";
                    string htmlFilePath = Path.Combine(htmlDirectory.FullName, htmlFile);

                    if (f.ToHtml(htmlFilePath) == 0)
                    {
                        success++;
                    }
                }
            }
            // Show results:
            Console.WriteLine("{0} of {1} files converted successfully!", success, total);

            // Open folder with HTML files after converting.
            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlDirectory.FullName)
            {
                UseShellExecute = true
            });
        }
Exemple #30
0
        private void ExtractImage()
        {
            int h = 1;

            string[] ImageName = new string[TongSoTrang + 1];
            foreach (DataGridViewRow dr in dataGridView1.Rows)
            {
                string   temp  = "";
                string[] temp1 = null;


                if (h < dataGridView1.RowCount)
                {
                    temp = dr.Cells[2].Value != null ? dr.Cells[2].Value.ToString() : "";
                    if (temp.IndexOf(";", StringComparison.Ordinal) > 0)
                    {
                        temp1 = temp.Split(';');
                        for (int i = 0; i < temp1.Length; i++)
                        {
                            if (temp1[i].IndexOf("-", StringComparison.Ordinal) > 0)
                            {
                                string[] temp2 = temp1[i].Split('-');
                                for (int j = int.Parse(temp2[0]); j <= int.Parse(temp2[1]); j++)
                                {
                                    ImageName[j] = dr.Cells[1].Value.ToString();
                                }
                            }
                            else
                            {
                                ImageName[int.Parse(temp1[i])] = dr.Cells[1].Value.ToString();
                            }
                        }
                    }
                    else
                    {
                        if (temp.IndexOf("-", StringComparison.Ordinal) > 0)
                        {
                            string[] temp2 = temp.Split('-');
                            for (int j = int.Parse(temp2[0]); j <= int.Parse(temp2[1]); j++)
                            {
                                ImageName[j] = dr.Cells[1].Value.ToString();
                            }
                        }
                        else
                        {
                            ImageName[int.Parse(temp)] = dr.Cells[1].Value.ToString();
                        }
                    }
                }
                h++;
            }

            var f = new PdfFocus {
                Serial = "1234567890"
            };
            string pdfFile  = txt_ImagePath.Text;
            string imageDir = Path.GetDirectoryName(pdfFile);
            List <PdfFocus.PdfImage> pdfImages = null;

            f.OpenPdf(pdfFile);
            if (f.PageCount > 0)
            {
                pdfImages = f.ExtractImages(1, f.PageCount);

                // Show all extracted images.
                if (pdfImages != null && pdfImages.Count > 0)
                {
                    for (int i = 0; i < pdfImages.Count; i++)
                    {
                        string imageFile = Path.Combine(txt_FolderSaveImage.Text + "\\", ImageName[i + 1] + "_Page" + (i + 1) + ".jpg");
                        pdfImages[i].Picture.Save(imageFile);
                    }
                }
            }
        }