Exemple #1
0
        private async Task ConvertFiles(string id, List <RepositoryFile> files, PaketDIP paket, string rootFolder, string tempFolder, JobContext context)
        {
            // Skip empty collections
            if (files.Count == 0)
            {
                return;
            }

            // Create the list with conversion files.
            // This list will contain the splitted file names for processing
            // This list does not contain files that didn't have the flag exported or should be skipped
            var conversionFiles = pdfManipulator.ConvertToConversionFiles(files.ToList(), tempFolder, true);

            var sw = new Stopwatch();

            sw.Start();
            var parallelism = Settings.Default.DocumentTransformParallelism;

            Log.Information("Starting parallel document transform for-each-loop with parallelism of {parallelism} for {Count} files of archiveRecordId or orderId {id}",
                            parallelism, files.Count, id);
            var supportedFileTypesForRendering = await renderEngine.GetSupportedFileTypes();


            await conversionFiles.ParallelForEachAsync(async conversionFile =>
            {
                var file = new FileInfo(conversionFile.FullName);
                Log.Information("Start conversion for file: {file} for archive record or order id {id}", file, id);
                conversionFile.ConvertedFile = await ConvertFile(file, supportedFileTypesForRendering, context);
            }, parallelism, true);

            // Now stich back files that were possibly splitted
            pdfManipulator.MergeSplittedFiles(conversionFiles);

            // Update the metadata.xml for all the converted files
            // As speed is not an issue, we're not doing it in parallel
            foreach (var conversionFile in conversionFiles)
            {
                var file = new FileInfo(conversionFile.FullName);
                if (string.IsNullOrEmpty(conversionFile.ParentId))
                {
                    MetadataXmlUpdater.UpdateFile(file, new FileInfo(conversionFile.ConvertedFile), paket, rootFolder);
                }

                // Delete the original file, if the convertedFile exists and is not the same as the original file.
                // In case of PDF the name of the original and converted file could be the same. --> PDF to PDF with OCR
                if (file.Exists && conversionFile.ConvertedFile != file.FullName)
                {
                    file.Delete();
                }
            }

            sw.Stop();
            Log.Information("Finished parallel document transform for-each-loop with parallelism of {parallelism} for {Count} files of archiveRecordId or orderId {id} in {TotalSeconds}",
                            parallelism, files.Count, id, sw.Elapsed.TotalSeconds);
        }
        private async Task <string> ConvertFile(FileInfo file, PaketDIP paket, string tempFolder)
        {
            if (!file.Exists)
            {
                throw new FileNotFoundException($"Unable to find file {file.FullName}", file.FullName);
            }

            var supportedFileTypesForRendering = await renderEngine.GetSupportedFileTypes();

            if (!supportedFileTypesForRendering.Contains(file.Extension.Replace(".", "").ToLowerInvariant()))
            {
                return(file.FullName);
            }

            var targetExtension = GetTargetExtension(file);
            var convertedFile   = await renderEngine.ConvertFile(file.FullName, targetExtension);

            MetadataXmlUpdater.UpdateFile(file, new FileInfo(convertedFile), paket, tempFolder);
            return(convertedFile);
        }