protected async override Task<ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { String format = parameters.All[JobKeys.ThumbnailFormat]; Logger.DebugFormat("Conversion for jobId {0} in format {1} starting", parameters.JobId, format); var task = _taskFactory(); var passwords = ClientPasswordSet.GetPasswordFor(parameters.FileName); foreach (var password in passwords) { task.Passwords.Add(password); } string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); var convertParams = new CreatePdfImageTaskParams() { Dpi = parameters.GetIntOrDefault(JobKeys.Dpi, 150), FromPage = parameters.GetIntOrDefault(JobKeys.PagesFrom, 1), Pages = parameters.GetIntOrDefault(JobKeys.PagesCount, 1), Format = (CreatePdfImageTaskParams.ImageFormat)Enum.Parse(typeof(CreatePdfImageTaskParams.ImageFormat), format, true) }; await task.Run( pathToFile, convertParams, (i, s) => Write(workingFolder, parameters, format, i, s) //Currying ); Logger.DebugFormat("Conversion of {0} in format {1} done", parameters.JobId, format); return ProcessResult.Ok; }
protected async override Task<ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); if (Logger.IsDebugEnabled) Logger.DebugFormat("Conversion of HtmlZip to PDF: file {0}", pathToFile); var file = pathToFile; if (pathToFile.ToLower().EndsWith(".mht") || pathToFile.ToLower().EndsWith(".mhtml")) { string mhtml = File.ReadAllText(pathToFile); MHTMLParser parser = new MHTMLParser(mhtml); parser.OutputDirectory = workingFolder; parser.DecodeImageData = true; var outFile = Path.ChangeExtension(pathToFile, ".html"); File.WriteAllText(outFile, parser.getHTMLText()); file = outFile; } var converter = new HtmlToPdfConverterFromDiskFileOld(file, base.JobsHostConfiguration) { Logger = Logger }; var pdfConvertedFileName = converter.Run(parameters.TenantId, parameters.JobId); await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Pdf), pdfConvertedFileName, new Dictionary<string, object>()); return ProcessResult.Ok; }
public async Task<Boolean> Write(String workerFolder, PollerJobParameters parameters, String format, int pageIndex, Stream stream) { var rawFileName = Path.Combine(workerFolder, Path.GetFileNameWithoutExtension(parameters.FileName) + ".page_" + pageIndex + "." + format); rawFileName = SanitizeFileNameForLength(rawFileName); using (var outStream = File.OpenWrite(rawFileName)) { stream.CopyTo(outStream); } await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.RasterImage), rawFileName, new Dictionary<string, object>()); return true; }
protected async override Task<ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { var fileExtension = parameters.All[JobKeys.ThumbnailFormat]; ImageFormat format = GetFormatFromExtension(fileExtension); var sizesAsString = parameters.All[JobKeys.Sizes]; var imageSizes = SizeInfoHelper.Deserialize(sizesAsString); Logger.DebugFormat("Starting resize job for {0} - {1}", parameters.JobId, sizesAsString); string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); using (var sourceStream = File.OpenRead(pathToFile)) { using (var pageStream = new MemoryStream()) { sourceStream.CopyTo(pageStream); foreach (var size in imageSizes) { Logger.DebugFormat("Resize job for {0} - {1}", parameters.JobId, size.Name); pageStream.Seek(0, SeekOrigin.Begin); var fileFormat = new Client.Model.DocumentFormat("thumb." + size.Name); string resizeImagePath = Path.Combine( workingFolder, String.Format("{0}.{1}.{2}" ,Path.GetFileNameWithoutExtension(parameters.FileName),size.Name , fileExtension)); resizeImagePath = SanitizeFileNameForLength(resizeImagePath); using (var outStream = File.OpenWrite(resizeImagePath)) { Logger.DebugFormat("Resizing {0}", parameters.JobId); ImageResizer.Shrink(pageStream, outStream, size.Width, size.Height, format); } await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, fileFormat, resizeImagePath, new Dictionary<string, object>()); } } } Logger.DebugFormat("Ended resize job for {0} - {1}", parameters.JobId, sizesAsString); return ProcessResult.Ok; }
protected async override Task<ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); String fileName = Path.Combine(Path.GetDirectoryName(pathToFile), parameters.All[JobKeys.FileName]); Logger.DebugFormat("Move blob id {0} to real filename {1}", pathToFile, fileName); if (File.Exists(fileName)) File.Delete(fileName); File.Copy(pathToFile, fileName); var converter = new HtmlToPdfConverterFromDiskFile(fileName, base.JobsHostConfiguration) { Logger = Logger }; var pdfConvertedFileName = converter.Run(parameters.TenantId, parameters.JobId); await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Pdf), pdfConvertedFileName, new Dictionary<string, object>()); return ProcessResult.Ok; }
protected async override Task<ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { Logger.DebugFormat( "Delegating conversion of file {0} to libreoffice", parameters.JobId ); //libreofficeconversion is registered per tenant. string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); Logger.DebugFormat("Downloaded file {0} to be converted to pdf", pathToFile); var outputFile = _conversion.Run(pathToFile, "pdf"); await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Pdf), outputFile, new Dictionary<string, object>()); return ProcessResult.Ok; }
protected async override Task<ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { String format = parameters.All.GetOrDefault(JobKeys.ThumbnailFormat) ?? "png"; Int32 secondsOffset = Int32.Parse(parameters.All.GetOrDefault("thumb_seconds_offset") ?? "10"); Logger.DebugFormat("Conversion for jobId {0} in format {1} starting", parameters.JobId, format); String vlcExecutable = Helper.GetExecutableLocation(); if (!File.Exists(vlcExecutable)) { String error = String.Format("Unable to find VLC.exe executable in standard folders. You can specify VLC directory with 'vlc_location' job parameter or with 'vlc_location' app config configuration"); Logger.ErrorFormat(error); Console.WriteLine("Unable to start converter, press a key to close."); Console.ReadKey(); throw new ApplicationException(error); } var worker = new VlcCommandLineThumbnailCreator(vlcExecutable, format, Logger); String networkStream = base.GetBlobUriForJob(parameters.TenantId, parameters.JobId); String thumbNail = worker.CreateThumbnail(networkStream, workingFolder, secondsOffset); if (String.IsNullOrEmpty(thumbNail)) { Logger.WarnFormat("Conversion returned no thumbnail for file {0} - job {1}", parameters.FileName, parameters.JobId); } else { await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.RasterImage), thumbNail, new Dictionary<string, object>()); Logger.DebugFormat("Conversion of {0} in format {1} done", parameters.JobId, format); } return ProcessResult.Ok; }
protected async override Task<ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder) { var task = new MailMessageToHtmlConverterTask() { Logger = Logger }; string localFile = await DownloadBlob( parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); var zipFile = task.Convert(parameters.JobId, localFile, workingFolder); await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Email), zipFile, new Dictionary<string, object>()); return ProcessResult.Ok; }
private async Task<Int32> UploadAttachmentListToDocumentStore(PollerJobParameters parameters, string[] permittedExtension, string unzippingDirectory, IEnumerable<string> files) { Int32 uploadCount = 0; foreach (string file in files) { var attachmentExtension = Path.GetExtension(file).Trim('.'); if (permittedExtension != null && !permittedExtension.Contains(attachmentExtension, StringComparer.OrdinalIgnoreCase)) { Logger.DebugFormat("job: {0} File {1} attachment is discharded because extension {2} is not permitted", parameters.JobId, file, attachmentExtension); continue; } var relativeFileName = file.Substring(unzippingDirectory.Length); await AddAttachmentToHandle( parameters.TenantId, parameters.JobId, file, "content_zip", relativeFileName, new Dictionary<string, object>() { } ); uploadCount++; } return uploadCount; }
protected abstract Task<ProcessResult> OnPolling( PollerJobParameters parameters, String workingFolder);
private static PollerJobParameters ExtractJobParameters(QueuedJobDto nextJob) { PollerJobParameters parameters = new PollerJobParameters(); parameters.FileExtension = SafeGetParameter(nextJob, JobKeys.FileExtension); parameters.FileName = SafeGetParameter(nextJob, JobKeys.FileName); parameters.InputDocumentFormat = new DocumentFormat(SafeGetParameter(nextJob, JobKeys.Format)); parameters.JobId = nextJob.Id; parameters.TenantId = SafeGetParameter(nextJob, JobKeys.TenantId); parameters.All = nextJob.Parameters; return parameters; }
protected async override Task<ProcessResult> OnPolling( PollerJobParameters parameters, String workingFolder) { Boolean result; var contentFileName = Path.ChangeExtension(parameters.FileName, ".content"); if (!_formats.Contains(parameters.FileExtension)) { Logger.DebugFormat("Document for job Id {0} has an extension not supported, setting null content", parameters.JobId); return new ProcessResult(await AddNullContentFormat(parameters, contentFileName)); } Logger.DebugFormat("Starting tika on job: {0}, file extension {1}", parameters.JobId, parameters.FileExtension); Logger.DebugFormat("Downloading blob for job: {0}, on local path {1}", parameters.JobId, workingFolder); string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder); pathToFile = ProcessFile(pathToFile, workingFolder); Boolean shouldAnalyze = _filterManager.ShouldAnalyze(parameters.FileName, pathToFile); if (!shouldAnalyze) { Logger.InfoFormat("File {0} for job {1} was discharded!", parameters.FileName, parameters.JobId); return new ProcessResult(await AddNullContentFormat(parameters, contentFileName)); } Logger.DebugFormat("Search for password JobId:{0}", parameters.JobId); var passwords = ClientPasswordSet.GetPasswordFor(parameters.FileName).ToArray(); String content = ""; Int32 analyzerOrdinal = 0; Boolean success = false; var analyzer = BuildAnalyzer(analyzerOrdinal); do { try { if (passwords.Any()) { //Try with all the password foreach (var password in passwords) { try { content = analyzer.GetHtmlContent(pathToFile, password) ?? ""; break; //first password that can decrypt file break the list of password to try } catch (Exception) { Logger.ErrorFormat("Error opening file {0} with password", parameters.FileName); } } } else { //Simply analyze file without password Logger.DebugFormat("Analyze content JobId: {0} -> Path: {1}", parameters.JobId, pathToFile); content = analyzer.GetHtmlContent(pathToFile, "") ?? ""; } success = true; } catch (Exception ex) { Logger.ErrorFormat(ex, "Error extracting tika with analyzer {0} on file {1}", analyzer.Describe(), parameters.FileName, parameters.JobId); analyzer = BuildAnalyzer(++analyzerOrdinal); if (analyzer != null) Logger.InfoFormat("Retry job {0} with analyzer {1}", parameters.JobId, analyzer.Describe()); } } while (analyzer != null && success == false); Logger.DebugFormat("Finished tika on job: {0}, charsNum {1}", parameters.JobId, content.Count()); String sanitizedContent = content; if (!string.IsNullOrWhiteSpace(content)) { var resultContent = _builder.CreateFromTikaPlain(content); var documentContent = resultContent.Content; sanitizedContent = resultContent.SanitizedTikaContent; var pages = documentContent.Pages.Count(); string lang = null; if (pages > 1) { lang = LanguageDetector.GetLanguage(documentContent.Pages[1].Content); } if (lang == null && pages == 1) { lang = LanguageDetector.GetLanguage(documentContent.Pages[0].Content); } if (lang != null) { documentContent.AddMetadata(DocumentContent.MedatataLanguage, lang); } result = await AddFormatToDocumentFromObject( parameters.TenantId, this.QueueName, parameters.JobId, new DocumentFormat(DocumentFormats.Content), documentContent, contentFileName, new Dictionary<string, object>()); Logger.DebugFormat("Added format {0} to jobId {1}, result: {2}", DocumentFormats.Content, parameters.JobId, result); } var tikaFileName = Path.Combine(workingFolder, Path.GetFileNameWithoutExtension(parameters.FileName) + ".tika.html"); tikaFileName = SanitizeFileNameForLength(tikaFileName); File.WriteAllText(tikaFileName, sanitizedContent); result = await AddFormatToDocumentFromFile( parameters.TenantId, parameters.JobId, new DocumentFormat(DocumentFormats.Tika), tikaFileName, new Dictionary<string, object>()); Logger.DebugFormat("Added format {0} to jobId {1}, result: {2}", DocumentFormats.Tika, parameters.JobId, result); return ProcessResult.Ok; }
private async Task<bool> AddNullContentFormat( PollerJobParameters parameters, string contentFileName) { return await AddFormatToDocumentFromObject(parameters.TenantId, this.QueueName, parameters.JobId, new DocumentFormat(DocumentFormats.Content), DocumentContent.NullContent, contentFileName, new Dictionary<string, object>()); }