Example #1
0
        public string extractTextFromPdf(string sBinaryContent, Util.LOLIB logger, string LogId)
        {
            string sHashNow        = "";
            string sTextContentPdf = string.Empty;

            try
            {
                byte[] temp_backToBytes = Convert.FromBase64String(sBinaryContent);
                logger.WriteOnLog(LogId, "Letti i byte del file da processare", 3);

                using (PdfReader reader = new PdfReader(temp_backToBytes))
                {
                    logger.WriteOnLog(LogId, "Oggetto pdf generato", 3);
                    //	ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();

                    // 1. if pdf document has only one page
                    //here second parameter is PDF Page number
                    //ExtractedData = PdfTextExtractor.GetTextFromPage(reader, 1, strategy);


                    /*// 2. if pdf ducument has more than one page
                     *      // iterating through all pages
                     */



                    for (int i = 1; i <= reader.NumberOfPages; i++)
                    {
                        logger.WriteOnLog(LogId, "Processo la pagina: " + i, 3);
                        sTextContentPdf += PdfTextExtractor.GetTextFromPage(reader, i);
//						sTextContentPdf += PdfTextExtractor.GetTextFromPage(reader, i, strategy);
                        logger.WriteOnLog(LogId, "Letti i byte del file da processare", 3);
                    }

                    sTextContentPdf = Regex.Replace(sTextContentPdf, @"\t|\n|\r", "");
                    logger.WriteOnLog(LogId, "Elimino il carattere invio dal testo", 3);
                    var crc32 = new Crc32();

                    // test
                    Encoding ascii   = Encoding.ASCII;
                    Encoding unicode = Encoding.Unicode;

                    logger.WriteOnLog(LogId, "Dati Letti: " + sTextContentPdf, 3);
                    sHashNow = crc32.Get(Encoding.UTF8.GetBytes(sTextContentPdf)).ToString("X").ToUpper();
                    logger.WriteOnLog(LogId, "Hash CRC32b: " + sHashNow, 3);
                }
            }
            catch (Exception ex)
            { throw ex; }
            finally
            {
            }
            return(sHashNow);
        }
Example #2
0
        public string ConvertMainDoc(MainDocument oMainDoc, Util.LOLIB logger, string sWorkingFolder, string LogId, out MainDocument oMainDocPdf)
        {
            string sFileContentPdf = string.Empty;

            oMainDocPdf = new MainDocument();
            try
            {
                sFileContentPdf = string.Empty;
                Byte[] sResult;
                using (Printer oPrinter = new Printer())
                {
                    string sFileExtension = Path.GetExtension(oMainDoc.Filename).ToUpper();
                    logger.WriteOnLog(LogId, "Estensione: " + sFileExtension, 3);
                    if ("BMP,GIF,JPEG,PNG,TIFF,TIF,WMF,EMF".IndexOf(sFileExtension) > -1)
                    {
                        logger.WriteOnLog(LogId, "Entro conversione image: " + sFileExtension, 3);
                        //ImagePrintJob oPrintJob = oPrinter.ImagePrintJob;
                        //oPrintJob.NativeOfficePDF = true;
                        //oPrintJob.NativeOfficeStandardPDFA = true;
                        //sResult = oPrintJob.PrintOut3(oMainDoc.oByte, sFileExtension);
                        var oPDFSetting = oPrinter.PrinterSetting;
                        oPDFSetting.LayoutPaperSize = (int)prnPaperSize.PRN_PAPER_A4;

                        //oPDFSetting.LayoutPaperOrientation = prnPaperOrientation.PRN_PAPER_ORIENT_LANDSCAPE;
                        oPDFSetting.FontEmbedding = prnFontEmbedding.PRN_FONT_EMBED_NONE;
                        oPDFSetting.Save();
                        oPrinter.PrintJob.PDFSetting.StandardPdfAConformance = prnPdfAConformance.PRN_PDFA_CONFORM_1B_TC1;
                        oPrinter.PrintJob.PDFSetting.StandardPdfXConformance = prnPdfXConformance.PRN_PDFX_CONFORM_NONE;
                        byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent);
                        sResult         = oPrinter.PrintJob.PrintOut3(temp_backToBytes, sFileExtension);
                        sFileContentPdf = Convert.ToBase64String(sResult);
                    }
                    else if ("PPTX,PPS".IndexOf(sFileExtension) > -1)
                    {
                        logger.WriteOnLog(LogId, "Entro conversione power point: " + sFileExtension, 3);
                        PowerPointPrintJobEx oPrintJob = oPrinter.PowerPointPrintJobEx;
                        //oPrintJob.NativeOfficePDF = true;
                        //oPrintJob.NativeOfficeStandardPDFA = true;
                        oPrintJob.FrameSlides = true;
                        byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent);
                        sResult         = oPrintJob.PrintOut3(temp_backToBytes, sFileExtension);
                        sFileContentPdf = Convert.ToBase64String(sResult);
                    }
                    else if ("HTML,HTM,XHTML,XML".IndexOf(sFileExtension) > -1)
                    {
                        logger.WriteOnLog(LogId, "Entro conversione HTML: " + sFileExtension, 3);
                        IEExtendedPrintJob oPrintJob  = oPrinter.IEExtendedPrintJob;
                        IEExtendedSetting  oIESetting = oPrintJob.IEExtendedSetting;

                        oIESetting.DisableScriptDebugger          = true;
                        oIESetting.DisplayErrorDialogOnEveryError = false;
                        oIESetting.Save();

                        oPrintJob.PageWidth  = 11.93;
                        oPrintJob.PageHeight = 15.98;
                        //oPrintJob.ContentOrientation = prnContentOrientation.;
                        byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent);
                        sResult         = oPrintJob.PrintOut3(temp_backToBytes, sFileExtension);
                        sFileContentPdf = Convert.ToBase64String(sResult);
                    }
                    else if ("EML".IndexOf(sFileExtension) > -1)
                    {
                        logger.WriteOnLog(LogId, "Entro conversione eml: " + sFileExtension, 3);
                        PrintJob oPrintJob = oPrinter.PrintJob;
                        //oPrintJob.QueueWaitTimeout = 100000;
                        //sResult = oPrintJob.PrintOut3(oMainDoc.oByte, sFileExtension);
                        //sFileContentPdf = Convert.ToBase64String(sResult);

                        string sPathFileTemp = sWorkingFolder + "\\test." + sFileExtension;
                        logger.WriteOnLog(LogId, "path file di input: " + sPathFileTemp, 3);
                        string sPathPDFTemp = sWorkingFolder + "\\test.pdf";
                        logger.WriteOnLog(LogId, "path file di output: " + sPathPDFTemp, 3);
                        byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent);
                        File.WriteAllBytes(@sPathFileTemp, temp_backToBytes);
                        logger.WriteOnLog(LogId, "File di input creato : " + sPathFileTemp, 3);
                        System.Threading.Thread.Sleep(5000);
                        oPrintJob.PrintOut(@sPathFileTemp, @sPathPDFTemp);
                        logger.WriteOnLog(LogId, "File di output creato : " + sPathPDFTemp, 3);
                        sResult = File.ReadAllBytes(@sPathPDFTemp);
                        //sResult = oPrintJob.PrintOut3(oMainDoc.oByte, sFileExtension);
                        sFileContentPdf = Convert.ToBase64String(sResult);
                    }
                    else if ("DOCX,RTF".IndexOf(sFileExtension) > -1)
                    {
                        logger.WriteOnLog(LogId, "Entro conversione word: " + sFileExtension, 3);
                        WordPrintJobEx oPrintJob = oPrinter.WordPrintJobEx;
                        oPrintJob.NativeOfficePDF          = true;
                        oPrintJob.NativeOfficeStandardPDFA = true;
                        byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent);
                        sResult         = oPrintJob.PrintOut3(temp_backToBytes, sFileExtension);
                        sFileContentPdf = Convert.ToBase64String(sResult);
                    }
                    else if ("ODT,SWX,WPD,ODS,SXC,ODP,SXI,ODG,SXD".IndexOf(sFileExtension) > -1)
                    {
                        logger.WriteOnLog(LogId, "Entro conversione Open Offices: " + sFileExtension, 3);
                        OpenOfficePrintJob oPrintJob = oPrinter.OpenOfficePrintJob;
                        oPrintJob.ConvertBookmarks = true;
                        byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent);
                        sResult         = oPrintJob.PrintOut3(temp_backToBytes, sFileExtension);
                        sFileContentPdf = Convert.ToBase64String(sResult);
                    }
                    else if ("XLSX,CSV".IndexOf(sFileExtension) > -1)
                    {
                        logger.WriteOnLog(LogId, "Entro conversione excel: " + sFileExtension, 3);
                        ExcelPrintJobEx oPrintJob = oPrinter.ExcelPrintJobEx;
                        //oPrintJob.NativeOfficePDF = true;
                        //oPrintJob.NativeOfficeStandardPDFA = true;
                        oPrintJob.PrintAllSheets  = true;
                        oPrintJob.NativeOfficePDF = true;
                        string sPathFileTemp = sWorkingFolder + "\\test." + sFileExtension;
                        logger.WriteOnLog(LogId, "path file di input: " + sPathFileTemp, 3);
                        string sPathPDFTemp = sWorkingFolder + "\\test.pdf";
                        logger.WriteOnLog(LogId, "path file di output: " + sPathPDFTemp, 3);
                        byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent);
                        File.WriteAllBytes(@sPathFileTemp, temp_backToBytes);
                        logger.WriteOnLog(LogId, "File di input creato : " + sPathPDFTemp, 3);
                        oPrintJob.PrintOut(@sPathFileTemp, @sPathPDFTemp);
                        logger.WriteOnLog(LogId, "File di output creato : " + sPathPDFTemp, 3);
                        sResult = File.ReadAllBytes(@sPathPDFTemp);
                        //sResult = oPrintJob.PrintOut3(oMainDoc.oByte, sFileExtension);
                        sFileContentPdf = Convert.ToBase64String(sResult);
                        //oPrinter.PrintJob.PDFSetting.StandardPdfAConformance = prnPdfAConformance.PRN_PDFA_CONFORM_1B_TC1;
                        //oPrinter.PrintJob.PDFSetting.Save();
                        //sResult = oPrinter.PrintJob.PrintOut3(oMainDoc.oByte, sFileExtension);
                        //sFileContentPdf = Convert.ToBase64String(sResult);
                    }
                    else
                    {
                        logger.WriteOnLog(LogId, "Entro conversione generica: " + sFileExtension, 3);
                        var oPrintJob   = oPrinter.PrintJob;
                        var oPDFSetting = oPrintJob.PDFSetting;
                        oPDFSetting.FontEmbedding           = prnFontEmbedding.PRN_FONT_EMBED_NONE;
                        oPDFSetting.StandardPdfAConformance = prnPdfAConformance.PRN_PDFA_CONFORM_NONE;
                        oPDFSetting.StandardPdfXConformance = prnPdfXConformance.PRN_PDFX_CONFORM_NONE;
                        oPDFSetting.Save();
                        byte[] temp_backToBytes = Convert.FromBase64String(oMainDoc.BinaryContent);
                        sResult         = oPrintJob.PrintOut3(temp_backToBytes, sFileExtension);
                        sFileContentPdf = Convert.ToBase64String(sResult);
                    }
                    oMainDocPdf.Filename      = Path.GetFileNameWithoutExtension(oMainDoc.Filename) + ".pdf";
                    oMainDocPdf.BinaryContent = Convert.ToBase64String(sResult);
                }
            }
            catch (Exception ex)
            { throw ex; }
            finally
            {
            }
            return(sFileContentPdf);
        }