public string extractTextFromPdf(string sBinaryContent, Util.LOLIB logger, string LogId) { string sHashNow = ""; string sTextContentPdf = string.Empty; try { byte[] temp_backToBytes = Convert.FromBase64String(sBinaryContent); logger.WriteOnLog(LogId, "Letti i byte del file da processare", 3); using (PdfReader reader = new PdfReader(temp_backToBytes)) { logger.WriteOnLog(LogId, "Oggetto pdf generato", 3); // ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); // 1. if pdf document has only one page //here second parameter is PDF Page number //ExtractedData = PdfTextExtractor.GetTextFromPage(reader, 1, strategy); /*// 2. if pdf ducument has more than one page * // iterating through all pages */ for (int i = 1; i <= reader.NumberOfPages; i++) { logger.WriteOnLog(LogId, "Processo la pagina: " + i, 3); sTextContentPdf += PdfTextExtractor.GetTextFromPage(reader, i); // sTextContentPdf += PdfTextExtractor.GetTextFromPage(reader, i, strategy); logger.WriteOnLog(LogId, "Letti i byte del file da processare", 3); } sTextContentPdf = Regex.Replace(sTextContentPdf, @"\t|\n|\r", ""); logger.WriteOnLog(LogId, "Elimino il carattere invio dal testo", 3); var crc32 = new Crc32(); // test Encoding ascii = Encoding.ASCII; Encoding unicode = Encoding.Unicode; logger.WriteOnLog(LogId, "Dati Letti: " + sTextContentPdf, 3); sHashNow = crc32.Get(Encoding.UTF8.GetBytes(sTextContentPdf)).ToString("X").ToUpper(); logger.WriteOnLog(LogId, "Hash CRC32b: " + sHashNow, 3); } } catch (Exception ex) { throw ex; } finally { } return(sHashNow); }
public string ConvertMainDoc(MainDocument oMainDoc, Util.LOLIB logger, string sWorkingFolder, string LogId, out MainDocument oMainDocPdf) { string sFileContentPdf = string.Empty; oMainDocPdf = new MainDocument(); try { sFileContentPdf = string.Empty; Byte[] sResult; using (Printer oPrinter = new Printer()) { string sFileExtension = Path.GetExtension(oMainDoc.Filename).ToUpper(); logger.WriteOnLog(LogId, "Estensione: " + sFileExtension, 3); if ("BMP,GIF,JPEG,PNG,TIFF,TIF,WMF,EMF".IndexOf(sFileExtension) > -1) { logger.WriteOnLog(LogId, "Entro conversione image: " + sFileExtension, 3); //ImagePrintJob oPrintJob = oPrinter.ImagePrintJob; //oPrintJob.NativeOfficePDF = true; //oPrintJob.NativeOfficeStandardPDFA = true; //sResult = oPrintJob.PrintOut3(oMainDoc.oByte, sFileExtension); var oPDFSetting = oPrinter.PrinterSetting; oPDFSetting.LayoutPaperSize = (int)prnPaperSize.PRN_PAPER_A4; //oPDFSetting.LayoutPaperOrientation = prnPaperOrientation.PRN_PAPER_ORIENT_LANDSCAPE; oPDFSetting.FontEmbedding = prnFontEmbedding.PRN_FONT_EMBED_NONE; oPDFSetting.Save(); oPrinter.PrintJob.PDFSetting.StandardPdfAConformance = prnPdfAConformance.PRN_PDFA_CONFORM_1B_TC1; oPrinter.PrintJob.PDFSetting.StandardPdfXConformance = prnPdfXConformance.PRN_PDFX_CONFORM_NONE; byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent); sResult = oPrinter.PrintJob.PrintOut3(temp_backToBytes, sFileExtension); sFileContentPdf = Convert.ToBase64String(sResult); } else if ("PPTX,PPS".IndexOf(sFileExtension) > -1) { logger.WriteOnLog(LogId, "Entro conversione power point: " + sFileExtension, 3); PowerPointPrintJobEx oPrintJob = oPrinter.PowerPointPrintJobEx; //oPrintJob.NativeOfficePDF = true; //oPrintJob.NativeOfficeStandardPDFA = true; oPrintJob.FrameSlides = true; byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent); sResult = oPrintJob.PrintOut3(temp_backToBytes, sFileExtension); sFileContentPdf = Convert.ToBase64String(sResult); } else if ("HTML,HTM,XHTML,XML".IndexOf(sFileExtension) > -1) { logger.WriteOnLog(LogId, "Entro conversione HTML: " + sFileExtension, 3); IEExtendedPrintJob oPrintJob = oPrinter.IEExtendedPrintJob; IEExtendedSetting oIESetting = oPrintJob.IEExtendedSetting; oIESetting.DisableScriptDebugger = true; oIESetting.DisplayErrorDialogOnEveryError = false; oIESetting.Save(); oPrintJob.PageWidth = 11.93; oPrintJob.PageHeight = 15.98; //oPrintJob.ContentOrientation = prnContentOrientation.; byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent); sResult = oPrintJob.PrintOut3(temp_backToBytes, sFileExtension); sFileContentPdf = Convert.ToBase64String(sResult); } else if ("EML".IndexOf(sFileExtension) > -1) { logger.WriteOnLog(LogId, "Entro conversione eml: " + sFileExtension, 3); PrintJob oPrintJob = oPrinter.PrintJob; //oPrintJob.QueueWaitTimeout = 100000; //sResult = oPrintJob.PrintOut3(oMainDoc.oByte, sFileExtension); //sFileContentPdf = Convert.ToBase64String(sResult); string sPathFileTemp = sWorkingFolder + "\\test." + sFileExtension; logger.WriteOnLog(LogId, "path file di input: " + sPathFileTemp, 3); string sPathPDFTemp = sWorkingFolder + "\\test.pdf"; logger.WriteOnLog(LogId, "path file di output: " + sPathPDFTemp, 3); byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent); File.WriteAllBytes(@sPathFileTemp, temp_backToBytes); logger.WriteOnLog(LogId, "File di input creato : " + sPathFileTemp, 3); System.Threading.Thread.Sleep(5000); oPrintJob.PrintOut(@sPathFileTemp, @sPathPDFTemp); logger.WriteOnLog(LogId, "File di output creato : " + sPathPDFTemp, 3); sResult = File.ReadAllBytes(@sPathPDFTemp); //sResult = oPrintJob.PrintOut3(oMainDoc.oByte, sFileExtension); sFileContentPdf = Convert.ToBase64String(sResult); } else if ("DOCX,RTF".IndexOf(sFileExtension) > -1) { logger.WriteOnLog(LogId, "Entro conversione word: " + sFileExtension, 3); WordPrintJobEx oPrintJob = oPrinter.WordPrintJobEx; oPrintJob.NativeOfficePDF = true; oPrintJob.NativeOfficeStandardPDFA = true; byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent); sResult = oPrintJob.PrintOut3(temp_backToBytes, sFileExtension); sFileContentPdf = Convert.ToBase64String(sResult); } else if ("ODT,SWX,WPD,ODS,SXC,ODP,SXI,ODG,SXD".IndexOf(sFileExtension) > -1) { logger.WriteOnLog(LogId, "Entro conversione Open Offices: " + sFileExtension, 3); OpenOfficePrintJob oPrintJob = oPrinter.OpenOfficePrintJob; oPrintJob.ConvertBookmarks = true; byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent); sResult = oPrintJob.PrintOut3(temp_backToBytes, sFileExtension); sFileContentPdf = Convert.ToBase64String(sResult); } else if ("XLSX,CSV".IndexOf(sFileExtension) > -1) { logger.WriteOnLog(LogId, "Entro conversione excel: " + sFileExtension, 3); ExcelPrintJobEx oPrintJob = oPrinter.ExcelPrintJobEx; //oPrintJob.NativeOfficePDF = true; //oPrintJob.NativeOfficeStandardPDFA = true; oPrintJob.PrintAllSheets = true; oPrintJob.NativeOfficePDF = true; string sPathFileTemp = sWorkingFolder + "\\test." + sFileExtension; logger.WriteOnLog(LogId, "path file di input: " + sPathFileTemp, 3); string sPathPDFTemp = sWorkingFolder + "\\test.pdf"; logger.WriteOnLog(LogId, "path file di output: " + sPathPDFTemp, 3); byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent); File.WriteAllBytes(@sPathFileTemp, temp_backToBytes); logger.WriteOnLog(LogId, "File di input creato : " + sPathPDFTemp, 3); oPrintJob.PrintOut(@sPathFileTemp, @sPathPDFTemp); logger.WriteOnLog(LogId, "File di output creato : " + sPathPDFTemp, 3); sResult = File.ReadAllBytes(@sPathPDFTemp); //sResult = oPrintJob.PrintOut3(oMainDoc.oByte, sFileExtension); sFileContentPdf = Convert.ToBase64String(sResult); //oPrinter.PrintJob.PDFSetting.StandardPdfAConformance = prnPdfAConformance.PRN_PDFA_CONFORM_1B_TC1; //oPrinter.PrintJob.PDFSetting.Save(); //sResult = oPrinter.PrintJob.PrintOut3(oMainDoc.oByte, sFileExtension); //sFileContentPdf = Convert.ToBase64String(sResult); } else { logger.WriteOnLog(LogId, "Entro conversione generica: " + sFileExtension, 3); var oPrintJob = oPrinter.PrintJob; var oPDFSetting = oPrintJob.PDFSetting; oPDFSetting.FontEmbedding = prnFontEmbedding.PRN_FONT_EMBED_NONE; oPDFSetting.StandardPdfAConformance = prnPdfAConformance.PRN_PDFA_CONFORM_NONE; oPDFSetting.StandardPdfXConformance = prnPdfXConformance.PRN_PDFX_CONFORM_NONE; oPDFSetting.Save(); byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent); sResult = oPrintJob.PrintOut3(temp_backToBytes, sFileExtension); sFileContentPdf = Convert.ToBase64String(sResult); } oMainDocPdf.Filename = Path.GetFileNameWithoutExtension(oMainDoc.Filename) + ".pdf"; oMainDocPdf.BinaryContent = Convert.ToBase64String(sResult); } } catch (Exception ex) { throw ex; } finally { } return(sFileContentPdf); }