private void applyPDFFormatSettings(PDFExportSettings pFormat) { pFormat.PDFExportMode = PDFExportModeEnum.PEM_ImageOnText; pFormat.PictureResolution = 120; pFormat.Quality = 70; pFormat.UseOriginalPaperSize = true; }
public async Task <Guid> SubmitWorkItem(AccessibleConversionJob accessibleJob) { try { _context.Jobs.Add(accessibleJob); _context.SaveChanges(); } catch (DbEntityValidationException ex) { string errorMessages = string.Join("; ", ex.EntityValidationErrors.SelectMany(x => x.ValidationErrors).Select(x => x.ErrorMessage)); throw new DbEntityValidationException(errorMessages); } // send the job to OCR server var task = Task.Factory.StartNew(j => { var job = (AccessibleConversionJob)j; RSSoapServiceSoapClient wsClient; string flowName; string wsUri = WebConfigurationManager.AppSettings["AbbyyOCRServer"]; string wName = WebConfigurationManager.AppSettings["RBWorkflowName"]; try { wsClient = new RSSoapServiceSoapClient(); // enumerate all workflows var workflows = wsClient.GetWorkflows(wsUri); flowName = workflows.FirstOrDefault(e => e.Equals(wName)); } catch (Exception e) { Trace.WriteLine(e); RoboBrailleProcessor.SetJobFaulted(job, _context); throw e; } if (string.IsNullOrWhiteSpace(flowName)) { RoboBrailleProcessor.SetJobFaulted(job, _context); throw new Exception("The conversion workflow does not exist!"); } var fileContainer = new FileContainer { FileContents = job.FileContent }; var ticket = wsClient.CreateTicket(wsUri, flowName); var infile = new InputFile { FileData = fileContainer }; var fileformat = (OutputFileFormatEnum)Enum.Parse(typeof(OutputFileFormatEnum), Convert.ToString(job.TargetDocumentFormat)); #region init ocr settings OutputFormatSettings ofs; switch (fileformat) { case OutputFileFormatEnum.OFF_PDF: ofs = new PDFExportSettings { PDFExportMode = PDFExportModeEnum.PEM_TextOnly, PictureResolution = -1, Quality = 70, UseOriginalPaperSize = true, WriteTaggedPdf = true, IsEncryptionRequested = false, FileFormat = OutputFileFormatEnum.OFF_PDF, }; break; case OutputFileFormatEnum.OFF_PDFA: ofs = new PDFAExportSettings { Write1ACompliant = true, UseOriginalPaperSize = true, Quality = 70, PictureResolution = -1, PDFExportMode = PDFExportModeEnum.PEM_ImageOnText, FileFormat = OutputFileFormatEnum.OFF_PDFA }; break; case OutputFileFormatEnum.OFF_RTF: ofs = new RTFExportSettings { ForceFixedPageSize = false, HighlightErrorsWithBackgroundColor = false, PaperHeight = 16834, PaperWidth = 11909, RTFSynthesisMode = RTFSynthesisModeEnum.RSM_EditableCopy, WritePictures = true, FileFormat = OutputFileFormatEnum.OFF_RTF }; break; case OutputFileFormatEnum.OFF_Text: ofs = new TextExportSettings { ExportParagraphsAsOneLine = true, EncodingType = TextEncodingTypeEnum.TET_Simple, KeepOriginalHeadersFooters = true, FileFormat = OutputFileFormatEnum.OFF_Text }; break; case OutputFileFormatEnum.OFF_UTF8: ofs = new TextExportSettings { ExportParagraphsAsOneLine = true, EncodingType = TextEncodingTypeEnum.TET_UTF8, KeepOriginalHeadersFooters = true, FileFormat = OutputFileFormatEnum.OFF_Text }; break; case OutputFileFormatEnum.OFF_UTF16: ofs = new TextExportSettings { ExportParagraphsAsOneLine = true, EncodingType = TextEncodingTypeEnum.TET_UTF16, KeepOriginalHeadersFooters = true, FileFormat = OutputFileFormatEnum.OFF_Text }; break; case OutputFileFormatEnum.OFF_MSWord: ofs = new MSWordExportSettings { ForceFixedPageSize = false, HighlightErrorsWithBackgroundColor = false, RTFSynthesisMode = RTFSynthesisModeEnum.RSM_EditableCopy, WritePictures = true, PaperHeight = 16834, PaperWidth = 11909, FileFormat = OutputFileFormatEnum.OFF_MSWord }; break; case OutputFileFormatEnum.OFF_HTML: ofs = new HTMLExportSettings { EncodingType = TextEncodingTypeEnum.TET_UTF8, HTMLSynthesisMode = HTMLSynthesisModeEnum.HSM_PageLayout, AllowCss = true, CodePage = CodePageEnum.CP_Latin, WritePictures = true, FileFormat = OutputFileFormatEnum.OFF_HTML }; break; case OutputFileFormatEnum.OFF_CSV: ofs = new CSVExportSettings { CodePage = CodePageEnum.CP_Latin, EncodingType = TextEncodingTypeEnum.TET_UTF8, IgnoreTextOutsideTables = false, TabSeparator = ",", UsePageBreaks = false, FileFormat = OutputFileFormatEnum.OFF_CSV }; break; case OutputFileFormatEnum.OFF_DOCX: ofs = new DOCXExportSettings { ForceFixedPageSize = false, HighlightErrorsWithBackgroundColor = false, PaperHeight = 16834, PaperWidth = 11909, RTFSynthesisMode = RTFSynthesisModeEnum.RSM_EditableCopy, WritePictures = true, FileFormat = OutputFileFormatEnum.OFF_DOCX }; break; case OutputFileFormatEnum.OFF_EPUB: ofs = new EpubExportSettings { PictureResolution = -1, Quality = 70, FileFormat = OutputFileFormatEnum.OFF_EPUB }; break; case OutputFileFormatEnum.OFF_MSExcel: ofs = new XLExportSettings { ConvertNumericValuesToNumbers = true, IgnoreTextOutsideTables = false, FileFormat = OutputFileFormatEnum.OFF_MSExcel }; break; case OutputFileFormatEnum.OFF_XLSX: ofs = new XLSXExportSettings { ConvertNumericValuesToNumbers = true, IgnoreTextOutsideTables = false, FileFormat = OutputFileFormatEnum.OFF_XLSX }; break; case OutputFileFormatEnum.OFF_XML: ofs = new XMLExportSettings { PagesPerFile = 512, WriteCharactersFormatting = false, WriteCharAttributes = false, WriteExtendedCharAttributes = false, WriteNonDeskewedCoordinates = false, FileFormat = OutputFileFormatEnum.OFF_XML }; break; case OutputFileFormatEnum.OFF_TIFF: ofs = new TiffExportSettings { ColorMode = ImageColorModeEnum.ICM_AsIs, Compression = ImageCompressionTypeEnum.ICT_Jpeg, Resolution = -1, FileFormat = OutputFileFormatEnum.OFF_TIFF }; break; case OutputFileFormatEnum.OFF_JPG: ofs = new JpegExportSettings { ColorMode = ImageColorModeEnum.ICM_AsIs, Quality = 70, Resolution = -1, FileFormat = OutputFileFormatEnum.OFF_JPG }; break; case OutputFileFormatEnum.OFF_InternalFormat: ofs = new InternalFormatSettings { FileFormat = OutputFileFormatEnum.OFF_InternalFormat }; break; default: ofs = new TextExportSettings { EncodingType = TextEncodingTypeEnum.TET_UTF8, FileFormat = OutputFileFormatEnum.OFF_Text }; break; } #endregion var formatset = new[] { ofs }; var inputFiles = new[] { infile }; ticket.ExportParams.Formats = formatset; ticket.InputFiles = inputFiles; ticket.Priority = job.Priority; XmlResult result; try { result = wsClient.ProcessTicket(wsUri, flowName, ticket); } catch (Exception ex) { RoboBrailleProcessor.SetJobFaulted(job, _context); throw ex; } if (result.IsFailed) { RoboBrailleProcessor.SetJobFaulted(job, _context); throw new Exception("Conversion job failed!"); } byte[] contents = null; List <byte[]> contents2 = new List <byte[]>(); foreach (var ifile in result.InputFiles) { foreach (var odoc in ifile.OutputDocuments) { foreach (var ofile in odoc.Files) { contents = ofile.FileContents; if (ofs.FileFormat == OutputFileFormatEnum.OFF_InternalFormat) { contents2.Add(ofile.FileContents); } } } } if (ofs.FileFormat == OutputFileFormatEnum.OFF_InternalFormat) { int i = 0; string filePath = ConfigurationManager.AppSettings.Get("FileDirectory") + @"Temp\" + job.Id; Directory.CreateDirectory(filePath); foreach (var f in contents2) { File.WriteAllBytes(filePath + @"\ocrData" + i, f); i++; } ZipFile.CreateFromDirectory(filePath, filePath + ".zip"); contents = File.ReadAllBytes(filePath + ".zip"); Directory.Delete(filePath); File.Delete(filePath + ".zip"); } if (contents == null) { RoboBrailleProcessor.SetJobFaulted(job, _context); throw new Exception("Job result is null!"); } string mime; string fileExtension = ".txt"; switch (fileformat) { case OutputFileFormatEnum.OFF_PDF: mime = "application/pdf"; fileExtension = ".pdf"; break; case OutputFileFormatEnum.OFF_PDFA: mime = "application/pdf"; fileExtension = ".pdf"; break; case OutputFileFormatEnum.OFF_RTF: mime = "text/rtf"; fileExtension = ".rtf"; break; case OutputFileFormatEnum.OFF_Text: mime = "text/plain"; fileExtension = ".txt"; break; case OutputFileFormatEnum.OFF_MSWord: mime = "application/msword"; fileExtension = ".doc"; break; case OutputFileFormatEnum.OFF_HTML: mime = "text/html"; fileExtension = ".html"; break; case OutputFileFormatEnum.OFF_CSV: mime = "text/csv"; fileExtension = ".csv"; break; case OutputFileFormatEnum.OFF_DOCX: mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; fileExtension = ".docx"; break; case OutputFileFormatEnum.OFF_EPUB: mime = "application/epub+zip"; fileExtension = ".epub"; break; case OutputFileFormatEnum.OFF_MSExcel: mime = "application/vnd.ms-excel"; fileExtension = ".xls"; break; case OutputFileFormatEnum.OFF_XLSX: mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; fileExtension = ".xlsx"; break; case OutputFileFormatEnum.OFF_XML: mime = "application/xml"; fileExtension = ".xml"; break; case OutputFileFormatEnum.OFF_JPG: mime = "image/jpeg"; fileExtension = ".jpg"; break; case OutputFileFormatEnum.OFF_TIFF: mime = "image/tiff"; fileExtension = ".tiff"; break; case OutputFileFormatEnum.OFF_InternalFormat: mime = "application/zip"; fileExtension = ".zip"; break; default: mime = "text/plain"; fileExtension = ".txt"; break; } try { job.DownloadCounter = 0; job.ResultFileExtension = fileExtension; job.ResultMimeType = mime; job.ResultContent = contents; job.Status = JobStatus.Done; job.FinishTime = DateTime.Now; _context.Jobs.Attach(job); _context.Entry(job).State = EntityState.Modified; _context.SaveChanges(); } catch (Exception e) { RoboBrailleProcessor.SetJobFaulted(job, _context); throw e; } }, accessibleJob); return(accessibleJob.Id); }