protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { Logger.InfoFormat( "Delegating conversion of file {0} to Office automation", parameters.JobId ); //libreofficeconversion is registered per tenant. string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false); Logger.DebugFormat("Downloaded file {0} to be converted to pdf", pathToFile); var convertedFile = Path.ChangeExtension(pathToFile, ".pdf"); ProcessResult conversionResult = ConvertFile(pathToFile, convertedFile); if (conversionResult != null) { if (conversionResult.Result) { Logger.InfoFormat("File {0} correctly converted to PDF with office automation", parameters.FileName); await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new Client.Model.DocumentFormat(DocumentFormats.Pdf), convertedFile, new Dictionary <string, object>()).ConfigureAwait(false); } return(conversionResult); } return(ProcessResult.Fail("Unknown error during office conversion")); }
private async Task <Int32> UploadAttachmentListToDocumentStore(PollerJobParameters parameters, string[] permittedExtension, string unzippingDirectory, IEnumerable <string> files) { Int32 uploadCount = 0; foreach (string file in files) { var attachmentExtension = Path.GetExtension(file).Trim('.'); if (permittedExtension != null && !permittedExtension.Contains(attachmentExtension, StringComparer.OrdinalIgnoreCase)) { Logger.DebugFormat("job: {0} File {1} attachment is discharded because extension {2} is not permitted", parameters.JobId, file, attachmentExtension); continue; } var relativeFileName = file.Substring(unzippingDirectory.Length); await AddAttachmentToHandle( parameters.TenantId, parameters.JobId, file, "content_zip", relativeFileName, new Dictionary <string, object>() { } ); uploadCount++; } return(uploadCount); }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false); String fileName = Path.Combine(Path.GetDirectoryName(pathToFile), parameters.All[JobKeys.FileName]); Logger.DebugFormat("Move blob id {0} to real filename {1}", pathToFile, fileName); if (File.Exists(fileName)) { File.Delete(fileName); } File.Copy(pathToFile, fileName); var converter = new HtmlToPdfConverterFromDiskFile(fileName, base.JobsHostConfiguration) { Logger = Logger }; var pdfConvertedFileName = converter.Run(parameters.TenantId, parameters.JobId); await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Pdf), pdfConvertedFileName, new Dictionary <string, object>()).ConfigureAwait(false); return(ProcessResult.Ok); }
protected Boolean IsForced(PollerJobParameters parameters) { String forceValue = null; if (parameters.All.TryGetValue(JobKeys.Force, out forceValue)) { return("true".Equals(forceValue, StringComparison.OrdinalIgnoreCase)); } return(false); }
protected Boolean FromPipelineId(PollerJobParameters parameters, String pipeline) { String pipelineId = null; if (parameters.All.TryGetValue(JobKeys.PipelineId, out pipelineId)) { return(pipeline.Equals(pipelineId, StringComparison.OrdinalIgnoreCase)); } return(false); }
private async Task <bool> AddNullContentFormat( PollerJobParameters parameters, string contentFileName) { return(await AddFormatToDocumentFromObject(parameters.TenantId, this.QueueName, parameters.JobId, new DocumentFormat(DocumentFormats.Content), DocumentContent.NullContent, contentFileName, new Dictionary <string, object>())); }
private static PollerJobParameters ExtractJobParameters(QueuedJobDto nextJob) { PollerJobParameters parameters = new PollerJobParameters(); parameters.FileExtension = SafeGetParameter(nextJob, JobKeys.FileExtension); parameters.FileName = SafeGetParameter(nextJob, JobKeys.FileName); parameters.InputDocumentFormat = new DocumentFormat(SafeGetParameter(nextJob, JobKeys.Format)); parameters.JobId = nextJob.Id; parameters.TenantId = SafeGetParameter(nextJob, JobKeys.TenantId); parameters.All = nextJob.Parameters; return(parameters); }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { String format = parameters.All.GetOrDefault(JobKeys.ThumbnailFormat)?.ToLower() ?? "png"; ImageFormat imageFormat; switch (format) { case "png": imageFormat = ImageFormat.Png; break; case "bmp": imageFormat = ImageFormat.Bmp; break; default: imageFormat = ImageFormat.Png; break; } Logger.DebugFormat("Conversion for jobId {0} in format {1} starting", parameters.JobId, format); string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false); using (SolidEdgeDocument document = SolidEdgeDocument.Open(pathToFile)) { Logger.Debug(String.Format("ClassId: '{0}'", document.ClassId)); Logger.Debug(String.Format("CreatedVersion: '{0}'", document.CreatedVersion)); Logger.Debug(String.Format("LastSavedVersion: '{0}'", document.LastSavedVersion)); Logger.Debug(String.Format("Created: '{0}'", document.Created)); Logger.Debug(String.Format("LastModified: '{0}'", document.LastModified)); Logger.Debug(String.Format("Status: '{0}'", document.Status)); String thumbFileName = Path.ChangeExtension(pathToFile, "." + format); using (Bitmap bitmap = document.GetThumbnail()) { bitmap.Save(thumbFileName, imageFormat); } if (File.Exists(thumbFileName)) { await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.RasterImage), thumbFileName, new Dictionary <string, object>()); Logger.DebugFormat("Conversion of {0} in format {1} done", parameters.JobId, format); return(ProcessResult.Ok); } } return(ProcessResult.Fail("Unable to extract thumbnail")); }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false); //String fileName = Path.Combine(Path.GetDirectoryName(pathToFile), parameters.All[JobKeys.FileName]); //Logger.DebugFormat("Move blob id {0} to real filename {1}", pathToFile, fileName); //if (File.Exists(fileName)) File.Delete(fileName); //File.Copy(pathToFile, fileName); if (Logger.IsDebugEnabled) { Logger.DebugFormat("Conversion of HtmlZip to PDF: file {0}", pathToFile); } var file = pathToFile; if (pathToFile.EndsWith(".mht", StringComparison.OrdinalIgnoreCase) || pathToFile.EndsWith(".mhtml", StringComparison.OrdinalIgnoreCase)) { string mhtml = File.ReadAllText(pathToFile); MHTMLParser parser = new MHTMLParser(mhtml) { OutputDirectory = workingFolder, DecodeImageData = true }; var outFile = Path.ChangeExtension(pathToFile, ".html"); File.WriteAllText(outFile, parser.getHTMLText()); file = outFile; } var sanitizer = new SafeHtmlConverter(file) { Logger = Logger }; file = sanitizer.Run(parameters.JobId); var converter = new HtmlToPdfConverterFromDiskFile(file, base.JobsHostConfiguration) { Logger = Logger }; var pdfConvertedFileName = converter.Run(parameters.TenantId, parameters.JobId); await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Pdf), pdfConvertedFileName, new Dictionary <string, object>()).ConfigureAwait(false); return(ProcessResult.Ok); }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { var fileExtension = parameters.All[JobKeys.ThumbnailFormat]; ImageFormat format = GetFormatFromExtension(fileExtension); var sizesAsString = parameters.All[JobKeys.Sizes]; var imageSizes = SizeInfoHelper.Deserialize(sizesAsString); Logger.DebugFormat("Starting resize job for {0} - {1}", parameters.JobId, sizesAsString); string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); using (var sourceStream = File.OpenRead(pathToFile)) { using (var pageStream = new MemoryStream()) { sourceStream.CopyTo(pageStream); foreach (var size in imageSizes) { Logger.DebugFormat("Resize job for {0} - {1}", parameters.JobId, size.Name); pageStream.Seek(0, SeekOrigin.Begin); var fileFormat = new Client.Model.DocumentFormat("thumb." + size.Name); string resizeImagePath = Path.Combine( workingFolder, String.Format("{0}.{1}.{2}", Path.GetFileNameWithoutExtension(parameters.FileName), size.Name, fileExtension)); resizeImagePath = SanitizeFileNameForLength(resizeImagePath); using (var outStream = File.OpenWrite(resizeImagePath)) { Logger.DebugFormat("Resizing {0}", parameters.JobId); ImageResizer.Shrink(pageStream, outStream, size.Width, size.Height, format); } await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, fileFormat, resizeImagePath, new Dictionary <string, object>()); } } } Logger.DebugFormat("Ended resize job for {0} - {1}", parameters.JobId, sizesAsString); return(ProcessResult.Ok); }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { //if this handle already has rasterImage we do not need to do anything //we can skip only if this action is not forced or if the pdf was not generated by office //because office file can be re-generated due to errors in conversoin (libreoffice or MsOffice) if (!IsForced(parameters) && !FromPipelineId(parameters, "office")) { var formats = GetFormats(parameters.TenantId, parameters.JobId); if (formats.Any(f => f == DocumentFormats.RasterImage)) { return(ProcessResult.Ok); } } String format = parameters.All[JobKeys.ThumbnailFormat]; Logger.DebugFormat("Conversion for jobId {0} in format {1} starting", parameters.JobId, format); var task = _taskFactory(); var passwords = ClientPasswordSet.GetPasswordFor(parameters.FileName); foreach (var password in passwords) { task.Passwords.Add(password); } string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false); var convertParams = new CreatePdfImageTaskParams() { Dpi = parameters.GetIntOrDefault(JobKeys.Dpi, 150), FromPage = parameters.GetIntOrDefault(JobKeys.PagesFrom, 1), Pages = parameters.GetIntOrDefault(JobKeys.PagesCount, 1), Format = (CreatePdfImageTaskParams.ImageFormat)Enum.Parse(typeof(CreatePdfImageTaskParams.ImageFormat), format, true) }; await task.Run( pathToFile, convertParams, (i, s) => Write(workingFolder, parameters, format, i, s) //Currying ).ConfigureAwait(false); Logger.DebugFormat("Conversion of {0} in format {1} done", parameters.JobId, format); return(ProcessResult.Ok); }
public async Task <Boolean> Write(String workerFolder, PollerJobParameters parameters, String format, int pageIndex, Stream stream) { var rawFileName = Path.Combine(workerFolder, Path.GetFileNameWithoutExtension(parameters.FileName) + ".page_" + pageIndex + "." + format); rawFileName = SanitizeFileNameForLength(rawFileName); using (var outStream = File.OpenWrite(rawFileName)) { stream.CopyTo(outStream); } await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.RasterImage), rawFileName, new Dictionary <string, object>()).ConfigureAwait(false); return(true); }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { String format = parameters.All.GetOrDefault(JobKeys.ThumbnailFormat) ?? "png"; Int32 secondsOffset = Int32.Parse(parameters.All.GetOrDefault("thumb_seconds_offset") ?? "10"); Logger.DebugFormat("Conversion for jobId {0} in format {1} starting", parameters.JobId, format); String vlcExecutable = Helper.GetExecutableLocation(); if (!File.Exists(vlcExecutable)) { String error = String.Format("Unable to find VLC.exe executable in standard folders. You can specify VLC directory with 'vlc_location' job parameter or with 'vlc_location' app config configuration"); Logger.ErrorFormat(error); Console.WriteLine("Unable to start converter, press a key to close."); Console.ReadKey(); throw new ApplicationException(error); } var worker = new VlcCommandLineThumbnailCreator(vlcExecutable, format, Logger); String networkStream = base.GetBlobUriForJobBlob(parameters.TenantId, parameters.JobId); String thumbNail = worker.CreateThumbnail(networkStream, workingFolder, secondsOffset); if (String.IsNullOrEmpty(thumbNail)) { Logger.WarnFormat("Conversion returned no thumbnail for file {0} - job {1}", parameters.FileName, parameters.JobId); } else { await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.RasterImage), thumbNail, new Dictionary <string, object>()); Logger.DebugFormat("Conversion of {0} in format {1} done", parameters.JobId, format); } return(ProcessResult.Ok); }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); if (Logger.IsDebugEnabled) { Logger.DebugFormat("Conversion of HtmlZip to PDF: file {0}", pathToFile); } var file = pathToFile; if (pathToFile.ToLower().EndsWith(".mht") || pathToFile.ToLower().EndsWith(".mhtml")) { string mhtml = File.ReadAllText(pathToFile); MHTMLParser parser = new MHTMLParser(mhtml); parser.OutputDirectory = workingFolder; parser.DecodeImageData = true; var outFile = Path.ChangeExtension(pathToFile, ".html"); File.WriteAllText(outFile, parser.getHTMLText()); file = outFile; } var converter = new HtmlToPdfConverterFromDiskFileOld(file, base.JobsHostConfiguration) { Logger = Logger }; var pdfConvertedFileName = converter.Run(parameters.JobId); await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Pdf), pdfConvertedFileName, new Dictionary <string, object>()); return(ProcessResult.Ok); }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { Logger.DebugFormat( "Delegating conversion of file {0} to libreoffice", parameters.JobId ); //libreofficeconversion is registered per tenant. string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); Logger.DebugFormat("Downloaded file {0} to be converted to pdf", pathToFile); var outputFile = _conversion.Run(pathToFile, "pdf"); await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Pdf), outputFile, new Dictionary <string, object>()).ConfigureAwait(false); return(ProcessResult.Ok); }
protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { var task = new MailMessageToHtmlConverterTask() { Logger = Logger }; string localFile = await DownloadBlob( parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false); var zipFile = task.Convert(parameters.JobId, localFile, workingFolder); await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Email), zipFile, new Dictionary <string, object>()).ConfigureAwait(false); return(ProcessResult.Ok); }
protected async override Task <ProcessResult> OnPolling( PollerJobParameters parameters, String workingFolder) { Boolean result; var contentFileName = Path.ChangeExtension(parameters.FileName, ".content"); if (!_formats.Contains(parameters.FileExtension)) { Logger.DebugFormat("Document for job Id {0} has an extension not supported, setting null content", parameters.JobId); return(new ProcessResult(await AddNullContentFormat(parameters, contentFileName))); } Logger.DebugFormat("Starting tika on job: {0}, file extension {1}", parameters.JobId, parameters.FileExtension); Logger.DebugFormat("Downloading blob for job: {0}, on local path {1}", parameters.JobId, workingFolder); string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); pathToFile = ProcessFile(pathToFile, workingFolder); Boolean shouldAnalyze = _filterManager.ShouldAnalyze(parameters.FileName, pathToFile); if (!shouldAnalyze) { Logger.InfoFormat("File {0} for job {1} was discharded!", parameters.FileName, parameters.JobId); return(new ProcessResult(await AddNullContentFormat(parameters, contentFileName))); } Logger.DebugFormat("Search for password JobId:{0}", parameters.JobId); var passwords = ClientPasswordSet.GetPasswordFor(parameters.FileName).ToArray(); String content = ""; Int32 analyzerOrdinal = 0; Boolean success = false; var analyzer = BuildAnalyzer(analyzerOrdinal); do { try { if (passwords.Any()) { //Try with all the password foreach (var password in passwords) { try { content = analyzer.GetHtmlContent(pathToFile, password) ?? ""; break; //first password that can decrypt file break the list of password to try } catch (Exception) { Logger.ErrorFormat("Error opening file {0} with password", parameters.FileName); } } } else { //Simply analyze file without password Logger.DebugFormat("Analyze content JobId: {0} -> Path: {1}", parameters.JobId, pathToFile); content = analyzer.GetHtmlContent(pathToFile, "") ?? ""; } success = true; } catch (Exception ex) { Logger.ErrorFormat(ex, "Error extracting tika with analyzer {0} on file {1}", analyzer.Describe(), parameters.FileName, parameters.JobId); analyzer = BuildAnalyzer(++analyzerOrdinal); if (analyzer != null) { Logger.InfoFormat("Retry job {0} with analyzer {1}", parameters.JobId, analyzer.Describe()); } } } while (analyzer != null && success == false); Logger.DebugFormat("Finished tika on job: {0}, charsNum {1}", parameters.JobId, content.Count()); String sanitizedContent = content; if (!string.IsNullOrWhiteSpace(content)) { var resultContent = _builder.CreateFromTikaPlain(content); var documentContent = resultContent.Content; sanitizedContent = resultContent.SanitizedTikaContent; var pages = documentContent.Pages.Count(); string lang = null; if (pages > 1) { lang = LanguageDetector.GetLanguage(documentContent.Pages[1].Content); } if (lang == null && pages == 1) { lang = LanguageDetector.GetLanguage(documentContent.Pages[0].Content); } if (lang != null) { documentContent.AddMetadata(DocumentContent.MedatataLanguage, lang); } result = await AddFormatToDocumentFromObject( parameters.TenantId, this.QueueName, parameters.JobId, new DocumentFormat(DocumentFormats.Content), documentContent, contentFileName, new Dictionary <string, object>()); Logger.DebugFormat("Added format {0} to jobId {1}, result: {2}", DocumentFormats.Content, parameters.JobId, result); } var tikaFileName = Path.Combine(workingFolder, Path.GetFileNameWithoutExtension(parameters.FileName) + ".tika.html"); tikaFileName = SanitizeFileNameForLength(tikaFileName); File.WriteAllText(tikaFileName, sanitizedContent); result = await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Tika), tikaFileName, new Dictionary <string, object>()); Logger.DebugFormat("Added format {0} to jobId {1}, result: {2}", DocumentFormats.Tika, parameters.JobId, result); return(ProcessResult.Ok); }
protected abstract Task <ProcessResult> OnPolling( PollerJobParameters parameters, String workingFolder);