protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder)
        {
            Logger.InfoFormat(
                "Delegating conversion of file {0} to Office automation",
                parameters.JobId
                );

            //libreofficeconversion is registered per tenant.

            string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false);

            Logger.DebugFormat("Downloaded file {0} to be converted to pdf", pathToFile);
            var convertedFile = Path.ChangeExtension(pathToFile, ".pdf");

            ProcessResult conversionResult = ConvertFile(pathToFile, convertedFile);

            if (conversionResult != null)
            {
                if (conversionResult.Result)
                {
                    Logger.InfoFormat("File {0} correctly converted to PDF with office automation", parameters.FileName);
                    await AddFormatToDocumentFromFile(
                        parameters.TenantId,
                        parameters.JobId,
                        new Client.Model.DocumentFormat(DocumentFormats.Pdf),
                        convertedFile,
                        new Dictionary <string, object>()).ConfigureAwait(false);
                }
                return(conversionResult);
            }
            return(ProcessResult.Fail("Unknown error during office conversion"));
        }
Exemple #2
0
        private async Task <Int32> UploadAttachmentListToDocumentStore(PollerJobParameters parameters, string[] permittedExtension, string unzippingDirectory, IEnumerable <string> files)
        {
            Int32 uploadCount = 0;

            foreach (string file in files)
            {
                var attachmentExtension = Path.GetExtension(file).Trim('.');
                if (permittedExtension != null &&
                    !permittedExtension.Contains(attachmentExtension, StringComparer.OrdinalIgnoreCase))
                {
                    Logger.DebugFormat("job: {0} File {1} attachment is discharded because extension {2} is not permitted",
                                       parameters.JobId, file, attachmentExtension);
                    continue;
                }
                var relativeFileName = file.Substring(unzippingDirectory.Length);
                await AddAttachmentToHandle(
                    parameters.TenantId,
                    parameters.JobId,
                    file,
                    "content_zip",
                    relativeFileName,
                    new Dictionary <string, object>() { }
                    );

                uploadCount++;
            }
            return(uploadCount);
        }
Exemple #3
0
        protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder)
        {
            string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false);

            String fileName = Path.Combine(Path.GetDirectoryName(pathToFile), parameters.All[JobKeys.FileName]);

            Logger.DebugFormat("Move blob id {0} to real filename {1}", pathToFile, fileName);
            if (File.Exists(fileName))
            {
                File.Delete(fileName);
            }
            File.Copy(pathToFile, fileName);
            var converter = new HtmlToPdfConverterFromDiskFile(fileName, base.JobsHostConfiguration)
            {
                Logger = Logger
            };

            var pdfConvertedFileName = converter.Run(parameters.TenantId, parameters.JobId);

            await AddFormatToDocumentFromFile(
                parameters.TenantId,
                parameters.JobId,
                new DocumentFormat(DocumentFormats.Pdf),
                pdfConvertedFileName,
                new Dictionary <string, object>()).ConfigureAwait(false);

            return(ProcessResult.Ok);
        }
Exemple #4
0
        protected Boolean IsForced(PollerJobParameters parameters)
        {
            String forceValue = null;

            if (parameters.All.TryGetValue(JobKeys.Force, out forceValue))
            {
                return("true".Equals(forceValue, StringComparison.OrdinalIgnoreCase));
            }
            return(false);
        }
Exemple #5
0
        protected Boolean FromPipelineId(PollerJobParameters parameters, String pipeline)
        {
            String pipelineId = null;

            if (parameters.All.TryGetValue(JobKeys.PipelineId, out pipelineId))
            {
                return(pipeline.Equals(pipelineId, StringComparison.OrdinalIgnoreCase));
            }
            return(false);
        }
Exemple #6
0
 private async Task <bool> AddNullContentFormat(
     PollerJobParameters parameters, string contentFileName)
 {
     return(await AddFormatToDocumentFromObject(parameters.TenantId,
                                                this.QueueName,
                                                parameters.JobId,
                                                new DocumentFormat(DocumentFormats.Content),
                                                DocumentContent.NullContent,
                                                contentFileName,
                                                new Dictionary <string, object>()));
 }
Exemple #7
0
        private static PollerJobParameters ExtractJobParameters(QueuedJobDto nextJob)
        {
            PollerJobParameters parameters = new PollerJobParameters();

            parameters.FileExtension       = SafeGetParameter(nextJob, JobKeys.FileExtension);
            parameters.FileName            = SafeGetParameter(nextJob, JobKeys.FileName);
            parameters.InputDocumentFormat = new DocumentFormat(SafeGetParameter(nextJob, JobKeys.Format));
            parameters.JobId    = nextJob.Id;
            parameters.TenantId = SafeGetParameter(nextJob, JobKeys.TenantId);
            parameters.All      = nextJob.Parameters;
            return(parameters);
        }
Exemple #8
0
        protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder)
        {
            String      format = parameters.All.GetOrDefault(JobKeys.ThumbnailFormat)?.ToLower() ?? "png";
            ImageFormat imageFormat;

            switch (format)
            {
            case "png":
                imageFormat = ImageFormat.Png;
                break;

            case "bmp":
                imageFormat = ImageFormat.Bmp;
                break;

            default:
                imageFormat = ImageFormat.Png;
                break;
            }
            Logger.DebugFormat("Conversion for jobId {0} in format {1} starting", parameters.JobId, format);
            string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false);

            using (SolidEdgeDocument document = SolidEdgeDocument.Open(pathToFile))
            {
                Logger.Debug(String.Format("ClassId: '{0}'", document.ClassId));
                Logger.Debug(String.Format("CreatedVersion: '{0}'", document.CreatedVersion));
                Logger.Debug(String.Format("LastSavedVersion: '{0}'", document.LastSavedVersion));
                Logger.Debug(String.Format("Created: '{0}'", document.Created));
                Logger.Debug(String.Format("LastModified: '{0}'", document.LastModified));
                Logger.Debug(String.Format("Status: '{0}'", document.Status));

                String thumbFileName = Path.ChangeExtension(pathToFile, "." + format);
                using (Bitmap bitmap = document.GetThumbnail())
                {
                    bitmap.Save(thumbFileName, imageFormat);
                }
                if (File.Exists(thumbFileName))
                {
                    await AddFormatToDocumentFromFile(
                        parameters.TenantId,
                        parameters.JobId,
                        new DocumentFormat(DocumentFormats.RasterImage),
                        thumbFileName,
                        new Dictionary <string, object>());

                    Logger.DebugFormat("Conversion of {0} in format {1} done", parameters.JobId, format);
                    return(ProcessResult.Ok);
                }
            }

            return(ProcessResult.Fail("Unable to extract thumbnail"));
        }
        protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder)
        {
            string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false);

            //String fileName = Path.Combine(Path.GetDirectoryName(pathToFile), parameters.All[JobKeys.FileName]);
            //Logger.DebugFormat("Move blob id {0} to real filename {1}", pathToFile, fileName);
            //if (File.Exists(fileName)) File.Delete(fileName);
            //File.Copy(pathToFile, fileName);
            if (Logger.IsDebugEnabled)
            {
                Logger.DebugFormat("Conversion of HtmlZip to PDF: file {0}", pathToFile);
            }

            var file = pathToFile;

            if (pathToFile.EndsWith(".mht", StringComparison.OrdinalIgnoreCase) || pathToFile.EndsWith(".mhtml", StringComparison.OrdinalIgnoreCase))
            {
                string      mhtml  = File.ReadAllText(pathToFile);
                MHTMLParser parser = new MHTMLParser(mhtml)
                {
                    OutputDirectory = workingFolder,
                    DecodeImageData = true
                };
                var outFile = Path.ChangeExtension(pathToFile, ".html");
                File.WriteAllText(outFile, parser.getHTMLText());
                file = outFile;
            }

            var sanitizer = new SafeHtmlConverter(file)
            {
                Logger = Logger
            };

            file = sanitizer.Run(parameters.JobId);


            var converter = new HtmlToPdfConverterFromDiskFile(file, base.JobsHostConfiguration)
            {
                Logger = Logger
            };

            var pdfConvertedFileName = converter.Run(parameters.TenantId, parameters.JobId);

            await AddFormatToDocumentFromFile(
                parameters.TenantId,
                parameters.JobId,
                new DocumentFormat(DocumentFormats.Pdf),
                pdfConvertedFileName,
                new Dictionary <string, object>()).ConfigureAwait(false);

            return(ProcessResult.Ok);
        }
Exemple #10
0
        protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder)
        {
            var         fileExtension = parameters.All[JobKeys.ThumbnailFormat];
            ImageFormat format        = GetFormatFromExtension(fileExtension);
            var         sizesAsString = parameters.All[JobKeys.Sizes];
            var         imageSizes    = SizeInfoHelper.Deserialize(sizesAsString);

            Logger.DebugFormat("Starting resize job for {0} - {1}", parameters.JobId, sizesAsString);

            string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder);


            using (var sourceStream = File.OpenRead(pathToFile))
            {
                using (var pageStream = new MemoryStream())
                {
                    sourceStream.CopyTo(pageStream);

                    foreach (var size in imageSizes)
                    {
                        Logger.DebugFormat("Resize job for {0} - {1}", parameters.JobId, size.Name);
                        pageStream.Seek(0, SeekOrigin.Begin);
                        var fileFormat = new Client.Model.DocumentFormat("thumb." + size.Name);

                        string resizeImagePath = Path.Combine(
                            workingFolder,
                            String.Format("{0}.{1}.{2}", Path.GetFileNameWithoutExtension(parameters.FileName), size.Name, fileExtension));
                        resizeImagePath = SanitizeFileNameForLength(resizeImagePath);
                        using (var outStream = File.OpenWrite(resizeImagePath))
                        {
                            Logger.DebugFormat("Resizing {0}", parameters.JobId);
                            ImageResizer.Shrink(pageStream, outStream, size.Width, size.Height, format);
                        }
                        await AddFormatToDocumentFromFile(
                            parameters.TenantId,
                            parameters.JobId,
                            fileFormat,
                            resizeImagePath, new Dictionary <string, object>());
                    }
                }
            }

            Logger.DebugFormat("Ended resize job for {0} - {1}", parameters.JobId, sizesAsString);
            return(ProcessResult.Ok);
        }
Exemple #11
0
        protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder)
        {
            //if this handle already has rasterImage we do not need to do anything
            //we can skip only if this action is not forced or if the pdf was not generated by office
            //because office file can be re-generated due to errors in conversoin (libreoffice or MsOffice)
            if (!IsForced(parameters) && !FromPipelineId(parameters, "office"))
            {
                var formats = GetFormats(parameters.TenantId, parameters.JobId);
                if (formats.Any(f => f == DocumentFormats.RasterImage))
                {
                    return(ProcessResult.Ok);
                }
            }

            String format = parameters.All[JobKeys.ThumbnailFormat];

            Logger.DebugFormat("Conversion for jobId {0} in format {1} starting", parameters.JobId, format);

            var task      = _taskFactory();
            var passwords = ClientPasswordSet.GetPasswordFor(parameters.FileName);

            foreach (var password in passwords)
            {
                task.Passwords.Add(password);
            }
            string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder).ConfigureAwait(false);

            var convertParams = new CreatePdfImageTaskParams()
            {
                Dpi      = parameters.GetIntOrDefault(JobKeys.Dpi, 150),
                FromPage = parameters.GetIntOrDefault(JobKeys.PagesFrom, 1),
                Pages    = parameters.GetIntOrDefault(JobKeys.PagesCount, 1),
                Format   = (CreatePdfImageTaskParams.ImageFormat)Enum.Parse(typeof(CreatePdfImageTaskParams.ImageFormat), format, true)
            };

            await task.Run(
                pathToFile,
                convertParams,
                (i, s) => Write(workingFolder, parameters, format, i, s) //Currying
                ).ConfigureAwait(false);

            Logger.DebugFormat("Conversion of {0} in format {1} done", parameters.JobId, format);
            return(ProcessResult.Ok);
        }
Exemple #12
0
        public async Task <Boolean> Write(String workerFolder, PollerJobParameters parameters, String format, int pageIndex, Stream stream)
        {
            var rawFileName = Path.Combine(workerFolder, Path.GetFileNameWithoutExtension(parameters.FileName) + ".page_" + pageIndex + "." + format);

            rawFileName = SanitizeFileNameForLength(rawFileName);
            using (var outStream = File.OpenWrite(rawFileName))
            {
                stream.CopyTo(outStream);
            }

            await AddFormatToDocumentFromFile(
                parameters.TenantId,
                parameters.JobId,
                new DocumentFormat(DocumentFormats.RasterImage),
                rawFileName,
                new Dictionary <string, object>()).ConfigureAwait(false);

            return(true);
        }
Exemple #13
0
        protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder)
        {
            String format        = parameters.All.GetOrDefault(JobKeys.ThumbnailFormat) ?? "png";
            Int32  secondsOffset = Int32.Parse(parameters.All.GetOrDefault("thumb_seconds_offset") ?? "10");

            Logger.DebugFormat("Conversion for jobId {0} in format {1} starting", parameters.JobId, format);

            String vlcExecutable = Helper.GetExecutableLocation();

            if (!File.Exists(vlcExecutable))
            {
                String error = String.Format("Unable to find VLC.exe executable in standard folders. You can specify VLC directory with 'vlc_location' job parameter or with 'vlc_location' app config configuration");
                Logger.ErrorFormat(error);
                Console.WriteLine("Unable to start converter, press a key to close.");
                Console.ReadKey();
                throw new ApplicationException(error);
            }

            var worker = new VlcCommandLineThumbnailCreator(vlcExecutable, format, Logger);

            String networkStream = base.GetBlobUriForJobBlob(parameters.TenantId, parameters.JobId);
            String thumbNail     = worker.CreateThumbnail(networkStream, workingFolder, secondsOffset);

            if (String.IsNullOrEmpty(thumbNail))
            {
                Logger.WarnFormat("Conversion returned no thumbnail for file {0} - job {1}", parameters.FileName, parameters.JobId);
            }
            else
            {
                await AddFormatToDocumentFromFile(
                    parameters.TenantId,
                    parameters.JobId,
                    new DocumentFormat(DocumentFormats.RasterImage),
                    thumbNail,
                    new Dictionary <string, object>());

                Logger.DebugFormat("Conversion of {0} in format {1} done", parameters.JobId, format);
            }
            return(ProcessResult.Ok);
        }
Exemple #14
0
        protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder)
        {
            string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder);

            if (Logger.IsDebugEnabled)
            {
                Logger.DebugFormat("Conversion of HtmlZip to PDF: file {0}", pathToFile);
            }

            var file = pathToFile;

            if (pathToFile.ToLower().EndsWith(".mht") || pathToFile.ToLower().EndsWith(".mhtml"))
            {
                string      mhtml  = File.ReadAllText(pathToFile);
                MHTMLParser parser = new MHTMLParser(mhtml);
                parser.OutputDirectory = workingFolder;
                parser.DecodeImageData = true;
                var outFile = Path.ChangeExtension(pathToFile, ".html");
                File.WriteAllText(outFile, parser.getHTMLText());
                file = outFile;
            }

            var converter = new HtmlToPdfConverterFromDiskFileOld(file, base.JobsHostConfiguration)
            {
                Logger = Logger
            };

            var pdfConvertedFileName = converter.Run(parameters.JobId);

            await AddFormatToDocumentFromFile(
                parameters.TenantId,
                parameters.JobId,
                new  DocumentFormat(DocumentFormats.Pdf),
                pdfConvertedFileName,
                new Dictionary <string, object>());

            return(ProcessResult.Ok);
        }
        protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder)
        {
            Logger.DebugFormat(
                "Delegating conversion of file {0} to libreoffice",
                parameters.JobId
                );

            //libreofficeconversion is registered per tenant.

            string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder);

            Logger.DebugFormat("Downloaded file {0} to be converted to pdf", pathToFile);
            var outputFile = _conversion.Run(pathToFile, "pdf");

            await AddFormatToDocumentFromFile(
                parameters.TenantId,
                parameters.JobId,
                new DocumentFormat(DocumentFormats.Pdf),
                outputFile,
                new Dictionary <string, object>()).ConfigureAwait(false);

            return(ProcessResult.Ok);
        }
Exemple #16
0
        protected async override Task <ProcessResult> OnPolling(PollerJobParameters parameters, string workingFolder)
        {
            var task = new MailMessageToHtmlConverterTask()
            {
                Logger = Logger
            };

            string localFile = await DownloadBlob(
                parameters.TenantId,
                parameters.JobId,
                parameters.FileName,
                workingFolder).ConfigureAwait(false);

            var zipFile = task.Convert(parameters.JobId, localFile, workingFolder);

            await AddFormatToDocumentFromFile(
                parameters.TenantId,
                parameters.JobId,
                new DocumentFormat(DocumentFormats.Email),
                zipFile,
                new Dictionary <string, object>()).ConfigureAwait(false);

            return(ProcessResult.Ok);
        }
Exemple #17
0
        protected async override Task <ProcessResult> OnPolling(
            PollerJobParameters parameters,
            String workingFolder)
        {
            Boolean result;
            var     contentFileName = Path.ChangeExtension(parameters.FileName, ".content");

            if (!_formats.Contains(parameters.FileExtension))
            {
                Logger.DebugFormat("Document for job Id {0} has an extension not supported, setting null content", parameters.JobId);
                return(new ProcessResult(await AddNullContentFormat(parameters, contentFileName)));
            }

            Logger.DebugFormat("Starting tika on job: {0}, file extension {1}", parameters.JobId, parameters.FileExtension);

            Logger.DebugFormat("Downloading blob for job: {0}, on local path {1}", parameters.JobId, workingFolder);
            string pathToFile = await DownloadBlob(parameters.TenantId, parameters.JobId, parameters.FileName, workingFolder);

            pathToFile = ProcessFile(pathToFile, workingFolder);

            Boolean shouldAnalyze = _filterManager.ShouldAnalyze(parameters.FileName, pathToFile);

            if (!shouldAnalyze)
            {
                Logger.InfoFormat("File {0} for job {1} was discharded!", parameters.FileName, parameters.JobId);
                return(new ProcessResult(await AddNullContentFormat(parameters, contentFileName)));
            }
            Logger.DebugFormat("Search for password JobId:{0}", parameters.JobId);
            var     passwords       = ClientPasswordSet.GetPasswordFor(parameters.FileName).ToArray();
            String  content         = "";
            Int32   analyzerOrdinal = 0;
            Boolean success         = false;

            var analyzer = BuildAnalyzer(analyzerOrdinal);

            do
            {
                try
                {
                    if (passwords.Any())
                    {
                        //Try with all the password
                        foreach (var password in passwords)
                        {
                            try
                            {
                                content = analyzer.GetHtmlContent(pathToFile, password) ?? "";
                                break; //first password that can decrypt file break the list of password to try
                            }
                            catch (Exception)
                            {
                                Logger.ErrorFormat("Error opening file {0} with password", parameters.FileName);
                            }
                        }
                    }
                    else
                    {
                        //Simply analyze file without password
                        Logger.DebugFormat("Analyze content JobId: {0} -> Path: {1}", parameters.JobId, pathToFile);
                        content = analyzer.GetHtmlContent(pathToFile, "") ?? "";
                    }
                    success = true;
                }
                catch (Exception ex)
                {
                    Logger.ErrorFormat(ex, "Error extracting tika with analyzer {0} on file {1}", analyzer.Describe(), parameters.FileName, parameters.JobId);
                    analyzer = BuildAnalyzer(++analyzerOrdinal);
                    if (analyzer != null)
                    {
                        Logger.InfoFormat("Retry job  {0} with analyzer {1}", parameters.JobId, analyzer.Describe());
                    }
                }
            } while (analyzer != null && success == false);

            Logger.DebugFormat("Finished tika on job: {0}, charsNum {1}", parameters.JobId, content.Count());
            String sanitizedContent = content;

            if (!string.IsNullOrWhiteSpace(content))
            {
                var resultContent   = _builder.CreateFromTikaPlain(content);
                var documentContent = resultContent.Content;
                sanitizedContent = resultContent.SanitizedTikaContent;
                var    pages = documentContent.Pages.Count();
                string lang  = null;
                if (pages > 1)
                {
                    lang = LanguageDetector.GetLanguage(documentContent.Pages[1].Content);
                }

                if (lang == null && pages == 1)
                {
                    lang = LanguageDetector.GetLanguage(documentContent.Pages[0].Content);
                }

                if (lang != null)
                {
                    documentContent.AddMetadata(DocumentContent.MedatataLanguage, lang);
                }

                result = await AddFormatToDocumentFromObject(
                    parameters.TenantId,
                    this.QueueName,
                    parameters.JobId,
                    new DocumentFormat(DocumentFormats.Content),
                    documentContent,
                    contentFileName,
                    new Dictionary <string, object>());

                Logger.DebugFormat("Added format {0} to jobId {1}, result: {2}", DocumentFormats.Content, parameters.JobId, result);
            }

            var tikaFileName = Path.Combine(workingFolder, Path.GetFileNameWithoutExtension(parameters.FileName) + ".tika.html");

            tikaFileName = SanitizeFileNameForLength(tikaFileName);
            File.WriteAllText(tikaFileName, sanitizedContent);
            result = await AddFormatToDocumentFromFile(
                parameters.TenantId,
                parameters.JobId,
                new DocumentFormat(DocumentFormats.Tika),
                tikaFileName,
                new Dictionary <string, object>());

            Logger.DebugFormat("Added format {0} to jobId {1}, result: {2}", DocumentFormats.Tika, parameters.JobId, result);

            return(ProcessResult.Ok);
        }
Exemple #18
0
 protected abstract Task <ProcessResult> OnPolling(
     PollerJobParameters parameters,
     String workingFolder);