private void btn_BrowserPDF_Click(object sender, EventArgs e) { if (string.IsNullOrEmpty(txt_BatchName.Text)) { MessageBox.Show("Vui lòng điền tên batch", "Lỗi", MessageBoxButtons.OK, MessageBoxIcon.Warning); return; } OpenFileDialog dlg = new OpenFileDialog(); dlg.Filter = "Types PDF|*.pdf"; dlg.Multiselect = false; if (dlg.ShowDialog() == DialogResult.OK) { _lFileNames = dlg.FileNames; txt_ImagePath.Text = Path.GetFullPath(dlg.FileName); } var f = new PdfFocus { Serial = "1234567890" }; string pdfFile = txt_ImagePath.Text; f.OpenPdf(pdfFile); TongSoTrang = f.PageCount; lbl_Page.Text = TongSoTrang + " Pages"; }
public static bool FindTextInPdfFile(string fileFullPath, string text, ref CancellationTokenSource cts) { var pdfFocus = new PdfFocus(); try { pdfFocus.OpenPdf(fileFullPath); if (pdfFocus.PageCount > 0) { for (var i = 1; i < pdfFocus.PageCount + 1; i++) { if (cts.IsCancellationRequested) { break; } if (pdfFocus.ToText(i, i).IndexOf(text, StringComparison.OrdinalIgnoreCase) >= 0) { pdfFocus.ClosePdf(); return(true); } } } } catch (Exception) { return(false); } finally { pdfFocus.ClosePdf(); } return(false); }
/// <summary> /// pdf生成xml /// </summary> /// <param name="pdfFile"></param> /// <returns></returns> public static bool PdfToXMLAsFiles(string pdfFile) { try { string pathToXml = Path.ChangeExtension(pdfFile, ".xml"); // Convert PDF file to XML file. PdfFocus f = new PdfFocus(); // This property is necessary only for registered version. // f.Serial = "XXXXXXXXXXX"; // Let's convert only tables to XML and skip all textual data. f.XmlOptions.ConvertNonTabularDataToSpreadsheet = false; f.OpenPdf(pdfFile); if (f.PageCount > 0) { int result = f.ToXml(pathToXml); if (result == 0) { //Show XML document in browser 选择直接打开 // Process.Start(new ProcessStartInfo(pathToXml) { UseShellExecute = true }); return(true); } } } catch (Exception e) { Console.WriteLine(e.ToString()); } return(false); }
/// <summary> /// pdf生成excel /// </summary> /// <param name="pdfFile"></param> /// <returns></returns> public static void PdfToExcelAsFiles(string pdfFile) { try { string excelFile = Path.ChangeExtension(pdfFile, ".xls"); PdfFocus f = new PdfFocus(); // 'true' = Convert all data to spreadsheet (tabular and even textual). // 'false' = Skip textual data and convert only tabular (tables) data. f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = false; // 'true' = Preserve original page layout. // 'false' = Place tables before text. f.ExcelOptions.PreservePageLayout = true; f.OpenPdf(pdfFile); if (f.PageCount > 0) { f.ToExcel(excelFile); } } catch (Exception e) { Console.WriteLine(e.ToString()); } }
protected void Upload(object sender, EventArgs e) { lblMessage.Visible = true; PdfFocus f = new PdfFocus(); f.OpenPdf(uplPDF.FileBytes); Session["focus"] = f; ShowPdf(); }
public static async Task ConvertPDFtoWord(TelegramBotClient bot, Message msg) { if (msg.Document.FileName.Contains(".pdf")) { string path = String.Format(@"TelegramBotIQ Users File\Convert\{0}.pdf", msg.Document.FileName); string file_id = msg.Document.FileId; try { using (var filestream = System.IO.File.OpenWrite(path)) { var filedowload = await bot.GetInfoAndDownloadFileAsync( fileId : file_id, destination : filestream ); } } catch (Exception ex) { Console.WriteLine($"Error: {ex}"); await bot.SendTextMessageAsync(msg.Chat.Id, "Возникла ошибка :(" + Environment.NewLine + "Не беспокойтесь, разрабу дадим по шапке"); } PdfFocus f = new PdfFocus(); string pathDocx = String.Format(@"TelegramBotIQ Users File\Convert\{0}.docx", msg.Document.FileName); f.OpenPdf(path); if (f.PageCount > 0) { await bot.SendTextMessageAsync(msg.Chat.Id, "Подождите, идет конвертирование!"); f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx; f.ToWord(pathDocx); } FileStream fileStream = System.IO.File.OpenRead(pathDocx); InputOnlineFile file = new InputOnlineFile(fileStream); await bot.SendDocumentAsync(msg.Chat.Id, file, caption : "Конвертирование завершено!"); } else { await bot.SendTextMessageAsync(msg.Chat.Id, "Файл имеет не верный формат"); return; } }
/// <summary> /// Return pdf page as image /// </summary> /// <param name="filename">filename of pdf</param> /// <param name="page">page of pdf</param> /// <returns>Pdf page as a png image</returns> public static Image GetPdfPageAsImage(string filename, int page) { var testDataDirectory = AppSettings.Get("TestDataDirectory"); var f = new PdfFocus(); f.OpenPdf($"{testDataDirectory}{filename}"); if (f.PageCount > page) { f.ImageOptions.Dpi = 300; return(f.ToDrawingImage(page)); } return(null); }
public string ConvertPDFtoWord(string FilePath) { string SaveToFileLocation = Properties.Settings.Default.FolderPath + "PdfToWrod.docx"; PdfFocus pdfFocus = new PdfFocus(); PdfDocument pdfDoc = new PdfDocument(); pdfDoc.LoadFromFile(FilePath); pdfDoc.SaveToFile(SaveToFileLocation, FileFormat.DOC); //System.Diagnostics.Process.Start(SaveToFileLocation); return(SaveToFileLocation); }
protected bool IsPageInRange(int page) { if (Session["focus"] != null) { PdfFocus f = (PdfFocus)Session["focus"]; if (page > 0 && page <= f.PageCount) { return(true); } } return(false); }
public Stream getWordStream(Stream file) { PdfFocus f = new PdfFocus(); f.OpenPdf(file); byte[] docx = null; if (f.PageCount > 0) { docx = f.ToWord(); } Stream stream = new MemoryStream(docx); return(stream); }
public PDFParser(string PDFLocation, int FromPage, int ToPage) { DirLocation = PDFLocation.Substring(0, PDFLocation.LastIndexOf("\\")); if (File.Exists(Path.Combine(DirLocation, "final.bin"))) { Console.WriteLine("Loading Existing Table"); LoadTable(); Console.WriteLine("Table Loaded"); return; } Console.WriteLine("Loading Parsers"); pdfFocus = new PdfFocus(); pdfFocus.OpenPdf(PDFLocation); xlApp = new Application(); if (xlApp == null) { throw new Exception("Excel not installed!!"); } // Creating Directories string loc1 = Path.Combine(DirLocation, "Excel Files"); if (!Directory.Exists(loc1)) { Directory.CreateDirectory(loc1); } // Initialize Pages CompleteList = new Page[ToPage - FromPage + 1]; Page.PDFLocation = PDFLocation; for (int i = FromPage; i <= ToPage; ++i) { CompleteList[i - FromPage] = new Page { PageNumber = i } } ; Console.WriteLine("Parsers Loaded"); }
/// <summary> /// Save pdf pages as images /// </summary> /// <param name="filename">filename of pdf to save as images</param> public static void SavePdfToImage(string filename) { var testDataDirectory = AppSettings.Get("TestDataDirectory"); var f = new PdfFocus(); f.OpenPdf($"{testDataDirectory}{filename}"); if (f.PageCount > 0) { f.ImageOptions.Dpi = 300; for (var p = 1; p < f.PageCount + 1; p = p + 1) { var img = f.ToDrawingImage(p); img.Save($"{testDataDirectory}{filename}.{p}.png"); } } }
/// <summary> /// Converts multiple PDF files to HTML files. /// </summary> static void ConvertMultiplePdfToHtmls() { // Directory with *.pdf files. string pdfDirectory = Path.GetFullPath(@"..\..\..\..\..\"); string[] pdfFiles = Directory.GetFiles(pdfDirectory, "*.pdf"); PdfFocus f = new PdfFocus(); // After purchasing the license, please insert your serial number here to activate the component: //f.Serial = "123456789"; int success = 0; int total = 0; foreach (string pdfFile in pdfFiles) { Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile)); f.OpenPdf(pdfFile); total++; if (f.PageCount > 0) { // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder". f.HtmlOptions.ImageFolder = Path.GetDirectoryName(pdfFile); // A folder (will be created by the component) without any drive letters, only the folder as "myfolder". f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile)); // We recommend to use PNG type for storing images. f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png; // How to store images: Inside HTML document as base64 images or as linked separate image files. f.HtmlOptions.IncludeImageInHtml = false; string resultFile = Path.ChangeExtension(pdfFile, ".html"); if (f.ToHtml(resultFile) == 0) { success++; } } } // Show results: Console.WriteLine("{0} of {1} files converted successfully!", success, total); Console.WriteLine("Press any key ..."); Console.ReadLine(); // Open folder with HTML files after converting. System.Diagnostics.Process.Start(pdfDirectory); }
private string OpenPDF(string path) //метод конвертирования PDF { //Создания экземпляра класса для конвертации pdf в jpg string txtFile = DirectoryTemporary + "textOCR.txt"; PdfFocus f = new PdfFocus(); f.OpenPdf(path); Console.WriteLine($"Колличество страниц: {f.PageCount}"); if (f.PageCount > 0) { f.ImageOptions.ImageFormat = System.Drawing.Imaging.ImageFormat.Jpeg; f.ImageOptions.Dpi = 300; //Деление каждой страницы pdf на изображения f.ToImage(DirectoryTemporary, "page"); } using (FileStream fileStream = File.Open(txtFile, FileMode.Create)) { using (StreamWriter strWrite = new StreamWriter(fileStream)) { for (int i = 1; i < f.PageCount + 1; i++) { string pathPage = DirectoryTemporary + $"page{i}.jpg"; try { using (TesseractEngine engine = new TesseractEngine(@"tessdata", "rus", EngineMode.Default)) { using (var img = Pix.LoadFromFile(pathPage)) { using (var page = engine.Process(img)) { Console.WriteLine("Качество скана: {0}", page.GetMeanConfidence()); var text = page.GetText(); strWrite.WriteLine($"{text}"); } } } } catch (Exception ex) { Console.WriteLine("Error: {0}", ex.Message); } } } } return(txtFile); }
/// <summary> /// Load a scanned PDF document with help of Tesseract OCR (free OCR library) and save the result as DOCX document. /// </summary> static void LoadScannedPdf() { // Here we'll load a scanned PDF document (perform OCR) containing a text on English, Russian and Vietnamese. // Next save the OCR result as a new DOCX document. // First steps: // 1. Download data files for English, Russian and Vietnamese languages. // Please download the files: eng.traineddata, rus.traineddata and vie.traineddata. // From here (good and fast): https://github.com/tesseract-ocr/tessdata_fast // or (best and slow): https://github.com/tesseract-ocr/tessdata_best // 2. Copy the files: eng.traineddata, rus.traineddata and vie.traineddata to // the folder "tessdata" in the Project root. // 3. Be sure that the folder "tessdata" also contains "pdf.ttf" file. // Let's start: string inpFile = @"..\..\scan.pdf"; string outFile = "Result.docx"; PdfFocus f = new PdfFocus(); f.OCROptions.Mode = PdfFocus.COCROptions.eOCRMode.AllImages; f.OCROptions.Method += PerformOCRTesseract; f.OpenPdf(inpFile); bool result = false; if (f.PageCount > 0) { result = f.ToWord(outFile) == 0; } // Open the result for demonstration purposes. if (result) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } else { Console.WriteLine("Conversion failed!"); } }
private void UploadAndConvert() { openWordDoc.Title = "Open PDF file to convert"; openWordDoc.Filter = "PDF Files|*.pdf"; if (openWordDoc.ShowDialog() == DialogResult.OK) { PdfFocus pdfFocus = new PdfFocus(); pdfFocus.OpenPdf(openWordDoc.FileName); saveWordDoc.Title = "Save Doc file"; saveWordDoc.Filter = "Word Files|*.docx"; if (saveWordDoc.ShowDialog() == DialogResult.OK) { pdfFocus.ToWord(saveWordDoc.FileName); MessageBox.Show("Converted Successfully", "Info", MessageBoxButtons.OK, MessageBoxIcon.Information); } } }
private void ConvertToTextPdfFocusButton_Click(object sender, EventArgs e) { var pdf = new PdfFocus(); var stopwatch = new Stopwatch(); stopwatch.Start(); pdf.OpenPdf(new MemoryStream(_binaryPdfFile)); stopwatch.Stop(); PdfFocusStatsDetails.Text = $"Load Pdf Binary: {stopwatch.Elapsed.Seconds} seconds\r\n\r\n"; stopwatch.Reset(); stopwatch.Start(); var text = pdf.ToText(); stopwatch.Stop(); PdfFocusStatsDetails.Text += $"Convert to Text: {stopwatch.Elapsed.Seconds} seconds"; ExtractedTextTextBox.Text = text; File.WriteAllText(@"C:\Base64\PdfFocus.txt", text); }
private void ShowPdf() { if (Session["focus"] != null) { PdfFocus f = (PdfFocus)Session["focus"]; if (f.PageCount > 0) { f.HtmlOptions.IncludeImageInHtml = true; f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Png; int page = (int)Session["page"]; string html = f.ToHtml(page, page); htmlLiteral.Text = html; txtPage.Text = String.Format("Page {0} of {1}", page, f.PageCount); } } }
/// <summary> /// 将PDF文档转换为图片的方法 /// </summary> /// <param name="pdfInputPath">PDF文件路径</param> /// <param name="imageOutputPath">图片输出路径</param> /// <param name="imageName">生成图片的名字</param> /// <param name="startPageNum">从PDF文档的第几页开始转换</param> /// <param name="endPageNum">从PDF文档的第几页开始停止转换</param> /// <param name="imageFormat">设置所需图片格式</param> /// <param name="definition">设置图片的清晰度,数字越大越清晰</param> public static void ConvertPDF2Image(string pdfInputPath, string imageOutputPath, string imageName, int startPageNum, int endPageNum, ImageFormat imageFormat, Definition definition) { PdfFocus pdfFocus = new PdfFocus(); pdfFocus.OpenPdf(pdfInputPath); if (!Directory.Exists(imageOutputPath)) { Directory.CreateDirectory(imageOutputPath); } // validate pageNum if (startPageNum <= 0) { startPageNum = 1; } if (endPageNum > pdfFocus.PageCount) { endPageNum = pdfFocus.PageCount; } if (startPageNum > endPageNum) { int tempPageNum = startPageNum; startPageNum = endPageNum; endPageNum = startPageNum; } // start to convert each page for (int i = startPageNum; i <= endPageNum; i++) { byte[] img = pdfFocus.ToImage(i); using (FileStream fs1 = File.Create(imageOutputPath + imageName + i.ToString() + "." + imageFormat.ToString())) { fs1.Write(img, 0, img.Length); } } pdfFocus.ClosePdf(); }
public static bool PdfToHtml(this FileInfo file, DirectoryInfo writeToDirectory, ILog logger, out FileInfo resultFile) { resultFile = null; try { PdfFocus pdf = new PdfFocus(); pdf.OpenPdf(file.FullName); string fileName = $"{Path.GetFileNameWithoutExtension(file.FullName)}.html"; resultFile = new FileInfo(Path.Combine(writeToDirectory.FullName, fileName)); if (resultFile.Exists) { resultFile = new FileInfo(resultFile.FullName.GetNextFileName()); } pdf.ToHtml(resultFile.FullName); return(true); } catch (Exception ex) { logger.Error("An exception occurred: {0}\r\n{1}", ex.Message, ex.StackTrace); return(false); } }
private static Dictionary <string, string> GetFileContentsThroughSautin(string[] files) { Dictionary <string, string> contents = new Dictionary <string, string>(); foreach (var file in files) { string content = string.Empty; PdfFocus f = new PdfFocus(); f.XmlOptions.ConvertNonTabularDataToSpreadsheet = true; f.OpenPdf(file); if (f.PageCount > 0) { content = f.ToXml(); } contents.Add(file, content); } return(contents); }
public static void ConvertToImg(string path, ref ProgressBar progressBar, ref double time, ref Label label) { //Stopwatch sw = new Stopwatch(); // sw.Start(); SautinSoft.PdfFocus f = new PdfFocus(); f.OpenPdf(path); int pageCount = f.PageCount; string prefixFilePath = "PDF\\"; f.ImageOptions.Dpi = 320; Stopwatch sw = new Stopwatch(); Cutter.setMaximumAndStep(progressBar, f.PageCount * 2); for (int i = 1; i <= f.PageCount; i++) { sw.Start(); f.ToImage(prefixFilePath + i.ToString() + ".jpeg", i); sw.Stop(); time -= sw.ElapsedMilliseconds; TimeCalc.MinuteSeconds(time, label); sw.Reset(); } f.ClosePdf(); for (int i = 1; i <= pageCount; i++) { sw.Start(); Cutter.Cut(prefixFilePath + i.ToString() + ".jpeg", i, progressBar); sw.Stop(); time -= sw.ElapsedMilliseconds; TimeCalc.MinuteSeconds(time, label); sw.Reset(); } }
private static long TimeCutPage(string fileName) { SautinSoft.PdfFocus f = new PdfFocus(); long timeFor1 = 0; long timeFor2 = 0; f.OpenPdf(fileName); f.ImageOptions.Dpi = 320; pages = f.PageCount; sw.Start(); f.ToImage(prefixFilePath, 1); sw.Stop(); timeFor1 = sw.ElapsedMilliseconds; sw.Start(); f.ToImage(prefixFilePath, 2); sw.Stop(); timeFor2 = sw.ElapsedMilliseconds; long allTime = timeFor1 + ((timeFor2 - timeFor1) * (f.PageCount - 1)); f.ClosePdf(); timeForJpeg = allTime; sw.Reset(); return(timeForJpeg); }
public void ConvertPdfToExcel(string _PdfPath) { string _pathToExcel = System.IO.Path.ChangeExtension(_PdfPath, "xls"); SautinSoft.PdfFocus fs = new PdfFocus(); // 'true' = Convert all data to spreadsheet (tabular and even textual). // 'false' = Skip textual data and convert only tabular (tables) data. fs.ExcelOptions.ConvertNonTabularDataToSpreadsheet = true; fs.ExcelOptions.PreservePageLayout = true; fs.OpenPdf(_PdfPath); if (fs.PageCount > 0) { int result = fs.ToExcel(_pathToExcel); //Open a produced Excel workbook if (result == 0) { System.Diagnostics.Process.Start(_pathToExcel); } } }
/// <summary> /// Converts multiple PDF files into a single HTML document. /// </summary> static void ConvertMultiplePdfToSingleHtml() { // Directory with *.pdf files. string pdfDirectory = Path.GetFullPath(@"..\..\..\..\..\"); string htmlFile = Path.GetFullPath("Result.html"); string[] pdfFiles = Directory.GetFiles(pdfDirectory, "*.pdf"); // Here we'll keep our Html document. StringBuilder singleHtml = new StringBuilder(); singleHtml.Append("<html><body>"); PdfFocus f = new PdfFocus(); //f.Serial = "123456789"; int success = 0; int total = 0; foreach (string pdfFile in pdfFiles) { Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile)); f.OpenPdf(pdfFile); total++; if (f.PageCount > 0) { // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder". f.HtmlOptions.ImageFolder = Path.GetDirectoryName(htmlFile); // A folder (will be created by the component) without any drive letters, only the folder as "myfolder". f.HtmlOptions.ImageSubFolder = "images"; // We recommend to use PNG type for storing images. f.HtmlOptions.ImageType = PdfFocus.CHtmlOptions.eHtmlImageType.Png; // How to store images: Inside HTML document as base64 images or as linked separate image files. f.HtmlOptions.IncludeImageInHtml = false; // Let's make our CSS inline to be able merge HTML documents without any problems. f.HtmlOptions.InlineCSS = true; // We need only contents of <body>...</body>. f.HtmlOptions.ProduceOnlyHtmlBody = true; string tempHtml = f.ToHtml(); if (!String.IsNullOrEmpty(tempHtml)) { success++; // Add tempHtml into a single HTML. singleHtml.Append(tempHtml); } } } singleHtml.Append("</body></html>"); // Show results: File.WriteAllText(htmlFile, singleHtml.ToString()); Console.WriteLine("{0} of {1} files converted and merged into {2}!", success, total, Path.GetFileName(htmlFile)); Console.WriteLine("Press any key ..."); Console.ReadLine(); // Open our single HTML document. System.Diagnostics.Process.Start(htmlFile); }
/// <summary> /// How to convert all files to a single XLS file. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-convert-pdf-docx-rtf-to-single-xls-workbook-net-csharp-vb.php /// </remarks> public static void ConvertToSingleXls() { // In this example we'll use not only Document .Net component, but also // another SautinSoft 'component - PDF Focus .Net (to perform conversion from PDF to single xls workbook). // First of all, please perform "Rebuild Solution" to restore PDF Focus .Net package from NuGet. // Our steps: // 1. Convert all RTF, DOCX, PDF files into a single PDF document. (by Document .Net). // 2. Convert the single PDF into a single XLS workbook. (by PDF Focus .Net). byte[] singlePdfBytes = null; // This file we need only to show intermediate result. string singlePdfFile = "Single.pdf"; string workingDir = @"..\..\"; string singleXlsFile = "Single.xls"; List <string> supportedFiles = new List <string>(); foreach (string file in Directory.GetFiles(workingDir, "*.*")) { string ext = Path.GetExtension(file).ToLower(); if (ext == ".pdf" || ext == ".docx" || ext == ".rtf") { supportedFiles.Add(file); } } // Create single pdf. DocumentCore singlePDF = new DocumentCore(); foreach (string file in supportedFiles) { DocumentCore dc = DocumentCore.Load(file); Console.WriteLine("Adding: {0}...", Path.GetFileName(file)); // Create import session. ImportSession session = new ImportSession(dc, singlePDF, StyleImportingMode.KeepSourceFormatting); // Loop through all sections in the source document. foreach (Section sourceSection in dc.Sections) { // Because we are copying a section from one document to another, // it is required to import the Section into the destination document. // This adjusts any document-specific references to styles, bookmarks, etc. // // Importing a element creates a copy of the original element, but the copy // is ready to be inserted into the destination document. Section importedSection = singlePDF.Import <Section>(sourceSection, true, session); // First section start from new page. if (dc.Sections.IndexOf(sourceSection) == 0) { importedSection.PageSetup.SectionStart = SectionStart.NewPage; } // Now the new section can be appended to the destination document. singlePDF.Sections.Add(importedSection); } } // Save our single document into PDF format in memory. // Let's save our document to a MemoryStream. using (MemoryStream Pdf = new MemoryStream()) { singlePDF.Save(Pdf, new PdfSaveOptions() { Compliance = PdfCompliance.PDF_A1a }); singlePdfBytes = Pdf.ToArray(); } // Open the result for demonstration purposes. File.WriteAllBytes(singlePdfFile, singlePdfBytes); System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(singlePdfFile) { UseShellExecute = true }); SautinSoft.PdfFocus f = new PdfFocus(); f.OpenPdf(singlePdfBytes); if (f.PageCount > 0) { f.ToExcel(singleXlsFile); } // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(singleXlsFile) { UseShellExecute = true }); }
public ActionResult <bool> EvaluateStudents([FromBody] List <Evaluation> evaluations) { var dao = new EvaluationsDao(); Directory.CreateDirectory(STUDENT_ANS_SHEET_UPLOAD_PATH); foreach (var e in evaluations) { Console.WriteLine("-------------------------------------------------------"); Console.WriteLine($"Student Id {e.Student.Id}"); Console.WriteLine($"DateTime {e.DateTime?.ToString("yyyy-MM-dd")}"); var date = e.DateTime?.ToString("yyyy_MM_dd"); var fileName = $"AnswerKey_Student_{e.Student.Id}_{e.Examination.Course.CourseCode}_{date}.pdf"; var filePath = Path.Combine(STUDENT_ANS_SHEET_UPLOAD_PATH, fileName); var bytes = Convert.FromBase64String(e.AnswerSheet); System.IO.File.WriteAllBytes(filePath, bytes); Console.WriteLine("File Written"); var refSheetName = e.Examination.ReferenceAnswerSheet.Substring( e.Examination.ReferenceAnswerSheet.LastIndexOf("/") + 1); var refSheetPath = Path.Combine(REFERENCE_ANS_SHEET_UPLOAD_PATH, refSheetName); Console.WriteLine($"Ref Sheet Path = {refSheetPath}"); var studentPdf = new PdfFocus(); Console.WriteLine($"Opening Student Pdf..."); studentPdf.OpenPdf(filePath); var studentAnsText = studentPdf.ToText(); Console.WriteLine("Student pdf read successfully!"); studentPdf.ClosePdf(); Console.WriteLine("Closing student pdf"); var refPdf = new PdfFocus(); Console.WriteLine("Opening reference pdf"); refPdf.OpenPdf(refSheetPath); var refAnsText = refPdf.ToText(); Console.WriteLine("Reference pdf read successfully!"); refPdf.ClosePdf(); Console.WriteLine("Closing reference pdf"); paralleldots pd = new paralleldots("AliC73YnPPScR8dJJEMD8qxinhFTTUjFPmJGs5yknY0"); Console.WriteLine("Calculating score"); var similarity = pd.similarity(studentAnsText, refAnsText); var json = JsonValue.Parse(similarity); var score = double.Parse(json["normalized_score"].ToString()); Console.WriteLine($"Score = {score}"); var percent = score / 5.0; Console.WriteLine($"Percentage = {percent}"); e.MarksObtained = (int?)(e.Examination.TotalMarks * percent); e.AnswerSheet = $"/api/examinations/student-ans-sheet/get/{fileName}"; Console.WriteLine("Writing result to database"); dao.CreateEvaluation(e); Console.WriteLine("Written Successfully!"); } return(true); }
/// <summary> /// Converts multiple PDF files into a single HTML document. /// </summary> static void ConvertMultiplePdfToSingleHtml() { // Directory with *.pdf files. string pdfDirectory = Path.GetFullPath(@"..\..\"); string htmlFile = "Result.html"; string[] pdfFiles = Directory.GetFiles(pdfDirectory, "*.pdf"); // Here we'll keep our Html document. StringBuilder singleHtml = new StringBuilder(); singleHtml.Append("<html>\r\n<head>\r\n"); singleHtml.Append(@"<meta http-equiv = ""Content-Type"" content=""text/html; charset=utf-8"" />"); singleHtml.Append("\r\n</head>\r\n<body>"); PdfFocus f = new PdfFocus(); //f.Serial = "XXXXXXXXXXX"; int success = 0; int total = 0; foreach (string pdfFile in pdfFiles) { Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile)); f.OpenPdf(pdfFile); total++; if (f.PageCount > 0) { // How to store images: Inside HTML document as base64 images or as linked separate image files. f.HtmlOptions.IncludeImageInHtml = false; // Create own subfolder for each converted file to store images separately and don't mix up them. f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile)); // A template name for images f.HtmlOptions.ImageFileName = "picture"; // Auto - the same image format as in the source PDF; // 'Jpeg' to make the document size less; // 'PNG' to keep the highest quality, but the highest size too. f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto; // Let's make our CSS inline to be able merge HTML documents without any problems. f.HtmlOptions.InlineCSS = true; // We need only contents of <body>...</body>. f.HtmlOptions.ProduceOnlyHtmlBody = true; string tempHtml = f.ToHtml(); if (!String.IsNullOrEmpty(tempHtml)) { success++; // Add tempHtml into a single HTML. singleHtml.Append(tempHtml); } } } singleHtml.Append("</body></html>"); // Show results: File.WriteAllText(htmlFile, singleHtml.ToString()); Console.WriteLine("{0} of {1} files converted and merged into {2}!", success, total, Path.GetFileName(htmlFile)); // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true }); }
/// <summary> /// Converts multiple PDF files to HTML files. /// </summary> static void ConvertMultiplePdfToHtmls() { // Directory with *.pdf files. string pdfDirectory = Path.GetFullPath(@"..\..\"); string[] pdfFiles = Directory.GetFiles(pdfDirectory, "*.pdf"); DirectoryInfo htmlDirectory = new DirectoryInfo(@"htmls"); if (!htmlDirectory.Exists) { htmlDirectory.Create(); } PdfFocus f = new PdfFocus(); // After purchasing the license, please insert your serial number here to activate the component: //f.Serial = "XXXXXXXXXXX"; int success = 0; int total = 0; foreach (string pdfFile in pdfFiles) { Console.WriteLine("Converting {0} ...", Path.GetFileName(pdfFile)); f.OpenPdf(pdfFile); total++; if (f.PageCount > 0) { // Path (must exist) to a directory to store images after converting. Notice also to the property "ImageSubFolder". f.HtmlOptions.ImageFolder = htmlDirectory.FullName; // A folder (will be created by the component) without any drive letters, only the folder as "myfolder". f.HtmlOptions.ImageSubFolder = String.Format("{0}_images", Path.GetFileNameWithoutExtension(pdfFile)); // A template name for images f.HtmlOptions.ImageFileName = "picture"; // Auto - the same image format as in the source PDF; // 'Jpeg' to make the document size less; // 'PNG' to keep the highest quality, but the highest size too. f.EmbeddedImagesFormat = PdfFocus.eImageFormat.Auto; // How to store images: Inside HTML document as base64 images or as linked separate image files. f.HtmlOptions.IncludeImageInHtml = false; string htmlFile = Path.GetFileNameWithoutExtension(pdfFile) + ".html"; string htmlFilePath = Path.Combine(htmlDirectory.FullName, htmlFile); if (f.ToHtml(htmlFilePath) == 0) { success++; } } } // Show results: Console.WriteLine("{0} of {1} files converted successfully!", success, total); // Open folder with HTML files after converting. // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlDirectory.FullName) { UseShellExecute = true }); }
private void ExtractImage() { int h = 1; string[] ImageName = new string[TongSoTrang + 1]; foreach (DataGridViewRow dr in dataGridView1.Rows) { string temp = ""; string[] temp1 = null; if (h < dataGridView1.RowCount) { temp = dr.Cells[2].Value != null ? dr.Cells[2].Value.ToString() : ""; if (temp.IndexOf(";", StringComparison.Ordinal) > 0) { temp1 = temp.Split(';'); for (int i = 0; i < temp1.Length; i++) { if (temp1[i].IndexOf("-", StringComparison.Ordinal) > 0) { string[] temp2 = temp1[i].Split('-'); for (int j = int.Parse(temp2[0]); j <= int.Parse(temp2[1]); j++) { ImageName[j] = dr.Cells[1].Value.ToString(); } } else { ImageName[int.Parse(temp1[i])] = dr.Cells[1].Value.ToString(); } } } else { if (temp.IndexOf("-", StringComparison.Ordinal) > 0) { string[] temp2 = temp.Split('-'); for (int j = int.Parse(temp2[0]); j <= int.Parse(temp2[1]); j++) { ImageName[j] = dr.Cells[1].Value.ToString(); } } else { ImageName[int.Parse(temp)] = dr.Cells[1].Value.ToString(); } } } h++; } var f = new PdfFocus { Serial = "1234567890" }; string pdfFile = txt_ImagePath.Text; string imageDir = Path.GetDirectoryName(pdfFile); List <PdfFocus.PdfImage> pdfImages = null; f.OpenPdf(pdfFile); if (f.PageCount > 0) { pdfImages = f.ExtractImages(1, f.PageCount); // Show all extracted images. if (pdfImages != null && pdfImages.Count > 0) { for (int i = 0; i < pdfImages.Count; i++) { string imageFile = Path.Combine(txt_FolderSaveImage.Text + "\\", ImageName[i + 1] + "_Page" + (i + 1) + ".jpg"); pdfImages[i].Picture.Save(imageFile); } } } }