private async Task ConvertFiles(string id, List <RepositoryFile> files, PaketDIP paket, string rootFolder, string tempFolder, JobContext context) { // Skip empty collections if (files.Count == 0) { return; } // Create the list with conversion files. // This list will contain the splitted file names for processing // This list does not contain files that didn't have the flag exported or should be skipped var conversionFiles = pdfManipulator.ConvertToConversionFiles(files.ToList(), tempFolder, true); var sw = new Stopwatch(); sw.Start(); var parallelism = Settings.Default.DocumentTransformParallelism; Log.Information("Starting parallel document transform for-each-loop with parallelism of {parallelism} for {Count} files of archiveRecordId or orderId {id}", parallelism, files.Count, id); var supportedFileTypesForRendering = await renderEngine.GetSupportedFileTypes(); await conversionFiles.ParallelForEachAsync(async conversionFile => { var file = new FileInfo(conversionFile.FullName); Log.Information("Start conversion for file: {file} for archive record or order id {id}", file, id); conversionFile.ConvertedFile = await ConvertFile(file, supportedFileTypesForRendering, context); }, parallelism, true); // Now stich back files that were possibly splitted pdfManipulator.MergeSplittedFiles(conversionFiles); // Update the metadata.xml for all the converted files // As speed is not an issue, we're not doing it in parallel foreach (var conversionFile in conversionFiles) { var file = new FileInfo(conversionFile.FullName); if (string.IsNullOrEmpty(conversionFile.ParentId)) { MetadataXmlUpdater.UpdateFile(file, new FileInfo(conversionFile.ConvertedFile), paket, rootFolder); } // Delete the original file, if the convertedFile exists and is not the same as the original file. // In case of PDF the name of the original and converted file could be the same. --> PDF to PDF with OCR if (file.Exists && conversionFile.ConvertedFile != file.FullName) { file.Delete(); } } sw.Stop(); Log.Information("Finished parallel document transform for-each-loop with parallelism of {parallelism} for {Count} files of archiveRecordId or orderId {id} in {TotalSeconds}", parallelism, files.Count, id, sw.Elapsed.TotalSeconds); }
private async Task <string> ConvertFile(FileInfo file, PaketDIP paket, string tempFolder) { if (!file.Exists) { throw new FileNotFoundException($"Unable to find file {file.FullName}", file.FullName); } var supportedFileTypesForRendering = await renderEngine.GetSupportedFileTypes(); if (!supportedFileTypesForRendering.Contains(file.Extension.Replace(".", "").ToLowerInvariant())) { return(file.FullName); } var targetExtension = GetTargetExtension(file); var convertedFile = await renderEngine.ConvertFile(file.FullName, targetExtension); MetadataXmlUpdater.UpdateFile(file, new FileInfo(convertedFile), paket, tempFolder); return(convertedFile); }