public static async Task ConvertPDFtoWord(TelegramBotClient bot, Message msg) { if (msg.Document.FileName.Contains(".pdf")) { string path = String.Format(@"TelegramBotIQ Users File\Convert\{0}.pdf", msg.Document.FileName); string file_id = msg.Document.FileId; try { using (var filestream = System.IO.File.OpenWrite(path)) { var filedowload = await bot.GetInfoAndDownloadFileAsync( fileId : file_id, destination : filestream ); } } catch (Exception ex) { Console.WriteLine($"Error: {ex}"); await bot.SendTextMessageAsync(msg.Chat.Id, "Возникла ошибка :(" + Environment.NewLine + "Не беспокойтесь, разрабу дадим по шапке"); } PdfFocus f = new PdfFocus(); string pathDocx = String.Format(@"TelegramBotIQ Users File\Convert\{0}.docx", msg.Document.FileName); f.OpenPdf(path); if (f.PageCount > 0) { await bot.SendTextMessageAsync(msg.Chat.Id, "Подождите, идет конвертирование!"); f.WordOptions.Format = PdfFocus.CWordOptions.eWordDocument.Docx; f.ToWord(pathDocx); } FileStream fileStream = System.IO.File.OpenRead(pathDocx); InputOnlineFile file = new InputOnlineFile(fileStream); await bot.SendDocumentAsync(msg.Chat.Id, file, caption : "Конвертирование завершено!"); } else { await bot.SendTextMessageAsync(msg.Chat.Id, "Файл имеет не верный формат"); return; } }
public Stream getWordStream(Stream file) { PdfFocus f = new PdfFocus(); f.OpenPdf(file); byte[] docx = null; if (f.PageCount > 0) { docx = f.ToWord(); } Stream stream = new MemoryStream(docx); return(stream); }
private void UploadAndConvert() { openWordDoc.Title = "Open PDF file to convert"; openWordDoc.Filter = "PDF Files|*.pdf"; if (openWordDoc.ShowDialog() == DialogResult.OK) { PdfFocus pdfFocus = new PdfFocus(); pdfFocus.OpenPdf(openWordDoc.FileName); saveWordDoc.Title = "Save Doc file"; saveWordDoc.Filter = "Word Files|*.docx"; if (saveWordDoc.ShowDialog() == DialogResult.OK) { pdfFocus.ToWord(saveWordDoc.FileName); MessageBox.Show("Converted Successfully", "Info", MessageBoxButtons.OK, MessageBoxIcon.Information); } } }
/// <summary> /// Load a scanned PDF document with help of Tesseract OCR (free OCR library) and save the result as DOCX document. /// </summary> static void LoadScannedPdf() { // Here we'll load a scanned PDF document (perform OCR) containing a text on English, Russian and Vietnamese. // Next save the OCR result as a new DOCX document. // First steps: // 1. Download data files for English, Russian and Vietnamese languages. // Please download the files: eng.traineddata, rus.traineddata and vie.traineddata. // From here (good and fast): https://github.com/tesseract-ocr/tessdata_fast // or (best and slow): https://github.com/tesseract-ocr/tessdata_best // 2. Copy the files: eng.traineddata, rus.traineddata and vie.traineddata to // the folder "tessdata" in the Project root. // 3. Be sure that the folder "tessdata" also contains "pdf.ttf" file. // Let's start: string inpFile = @"..\..\scan.pdf"; string outFile = "Result.docx"; PdfFocus f = new PdfFocus(); f.OCROptions.Mode = PdfFocus.COCROptions.eOCRMode.AllImages; f.OCROptions.Method += PerformOCRTesseract; f.OpenPdf(inpFile); bool result = false; if (f.PageCount > 0) { result = f.ToWord(outFile) == 0; } // Open the result for demonstration purposes. if (result) { System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); } else { Console.WriteLine("Conversion failed!"); } }