public void Empty_Zusatzdaten_are_not_serialized()
        {
            // Arrange
            var sut = new PaketDIP();

            // Act
            sut.Generierungsdatum = DateTime.Today;
            sut.SchemaVersion     = SchemaVersion.Item41;

            sut.Ablieferung = new AblieferungDIP
            {
                AblieferndeStelle = "ablieferndeStelle",
                Ordnungssystem    = new OrdnungssystemDIP
                {
                    Ordnungssystemposition = new List <OrdnungssystempositionDIP>
                    {
                        new OrdnungssystempositionDIP
                        {
                            Dossier = new List <DossierDIP>
                            {
                                new DossierDIP
                                {
                                    Id = "myDossierId"
                                }
                            }
                        }
                    }
                }
            };

            var serialized = sut.Serialize();

            // Assert
            serialized.Should().NotContain("<zusatzDaten");
        }
        public DossierDIP FindDossierInPackage(FolderInfo folder, PaketDIP dip)
        {
            Log.Verbose("Trying to find dossier in dip package for folder with Id {Id}", folder.Id);
            var allDossiers = dip.Ablieferung.Ordnungssystem.Ordnungssystemposition.Traverse(p => p.Ordnungssystemposition).SelectMany(p => p.Dossier)
                              .Traverse(p => p.Dossier);
            var found = allDossiers.FirstOrDefault(p => p.Id == folder.Id);

            Log.Verbose("Found this item: {found}", JsonConvert.SerializeObject(found));
            return(found);
        }
Beispiel #3
0
        private async Task ConvertFiles(string id, List <RepositoryFile> files, PaketDIP paket, string rootFolder, string tempFolder, JobContext context)
        {
            // Skip empty collections
            if (files.Count == 0)
            {
                return;
            }

            // Create the list with conversion files.
            // This list will contain the splitted file names for processing
            // This list does not contain files that didn't have the flag exported or should be skipped
            var conversionFiles = pdfManipulator.ConvertToConversionFiles(files.ToList(), tempFolder, true);

            var sw = new Stopwatch();

            sw.Start();
            var parallelism = Settings.Default.DocumentTransformParallelism;

            Log.Information("Starting parallel document transform for-each-loop with parallelism of {parallelism} for {Count} files of archiveRecordId or orderId {id}",
                            parallelism, files.Count, id);
            var supportedFileTypesForRendering = await renderEngine.GetSupportedFileTypes();


            await conversionFiles.ParallelForEachAsync(async conversionFile =>
            {
                var file = new FileInfo(conversionFile.FullName);
                Log.Information("Start conversion for file: {file} for archive record or order id {id}", file, id);
                conversionFile.ConvertedFile = await ConvertFile(file, supportedFileTypesForRendering, context);
            }, parallelism, true);

            // Now stich back files that were possibly splitted
            pdfManipulator.MergeSplittedFiles(conversionFiles);

            // Update the metadata.xml for all the converted files
            // As speed is not an issue, we're not doing it in parallel
            foreach (var conversionFile in conversionFiles)
            {
                var file = new FileInfo(conversionFile.FullName);
                if (string.IsNullOrEmpty(conversionFile.ParentId))
                {
                    MetadataXmlUpdater.UpdateFile(file, new FileInfo(conversionFile.ConvertedFile), paket, rootFolder);
                }

                // Delete the original file, if the convertedFile exists and is not the same as the original file.
                // In case of PDF the name of the original and converted file could be the same. --> PDF to PDF with OCR
                if (file.Exists && conversionFile.ConvertedFile != file.FullName)
                {
                    file.Delete();
                }
            }

            sw.Stop();
            Log.Information("Finished parallel document transform for-each-loop with parallelism of {parallelism} for {Count} files of archiveRecordId or orderId {id} in {TotalSeconds}",
                            parallelism, files.Count, id, sw.Elapsed.TotalSeconds);
        }
        /// <summary>
        /// This procedure updates the repository package so it correctly reflects the contents of the package
        /// </summary>
        /// <param name="repositoryPackage"></param>
        /// <param name="paket"></param>
        private void UpdateRepositoryPackage(RepositoryPackage repositoryPackage, PaketDIP paket)
        {
            repositoryPackage.Files.Clear();
            repositoryPackage.Folders.Clear();

            var contentFolder = paket.Inhaltsverzeichnis.Ordner;

            Debug.Assert(contentFolder.Count == 1, "There should be only one folder at the content level");
            repositoryPackage.Files.AddRange(ConvertToRepositoryFiles(contentFolder.First().Datei));
            repositoryPackage.Folders.AddRange(ConvertToRepositoryFolders(contentFolder.First().Ordner));
        }
        public static DateiDIP GetDatei(FileInfo file, PaketDIP paket, string tempFolder, out object ordnerOderInhaltsverzeichnis)
        {
            var fileWithShortPath = file.FullName.Remove(0, tempFolder.Length);
            var fileNameParts     = fileWithShortPath.Split(new[] { '\\' }, StringSplitOptions.RemoveEmptyEntries);
            var ordnerOderInhaltsverzeichnisList = new List <object> {
                paket.Inhaltsverzeichnis
            };

            ordnerOderInhaltsverzeichnis = ordnerOderInhaltsverzeichnisList.FirstOrDefault();

            for (var i = 0; i < fileNameParts.Length - 1; i++)
            {
                var fileNamePart = fileNameParts[i];
                ordnerOderInhaltsverzeichnisList = FindOrdner(fileNamePart, ordnerOderInhaltsverzeichnisList).Cast <object>().ToList();
                // If we can't find a folder with the given name, then certainly we don't find a file --> Can stop here
                if (!ordnerOderInhaltsverzeichnisList.Any())
                {
                    return(null);
                }
            }

            var dateien = FindDateiInOrdnerList(fileNameParts.Last(), ordnerOderInhaltsverzeichnisList);

            // Actually only one file could/should be found. But in case
            if (dateien.Count > 1)
            {
                throw new InvalidOperationException(
                          $"Found more than one file with the name {fileNameParts.Last()} with the same path <{fileWithShortPath}>. This is not possible.");
            }

            // As an output we have to set the folder or root where the file was found
            ordnerOderInhaltsverzeichnis = ordnerOderInhaltsverzeichnisList.FirstOrDefault(o =>
            {
                switch (o)
                {
                case InhaltsverzeichnisDIP inhaltsVerzeichnis:
                    return(inhaltsVerzeichnis.Datei.Any(d => d.Name.Equals(fileNameParts.Last(), StringComparison.InvariantCultureIgnoreCase)));

                case OrdnerDIP ordner:
                    return(ordner.Datei.Any(d => d.Name.Equals(fileNameParts.Last(), StringComparison.InvariantCultureIgnoreCase)));

                default:
                    return(false);
                }
            });

            return(dateien.FirstOrDefault());
        }
        public void Enum_values_are_correctly_serialized()
        {
            // Arrange
            var sut = new PaketDIP();

            // Act
            sut.Generierungsdatum = DateTime.Today;
            sut.SchemaVersion     = SchemaVersion.Item41;

            sut.SchemaLocation = "http://bar.admin.ch/gebrauchskopie/v1 gebrauchskopie.xsd";
            var serialized = sut.Serialize();

            // Assert
            serialized.Should().Contain("schemaVersion=\"4.1\"");
            serialized.Should().Contain("schemaLocation=\"http://bar.admin.ch/gebrauchskopie/v1 gebrauchskopie.xsd\"");
        }
Beispiel #7
0
        /// <summary>
        ///     <para>
        ///         Converts single page jpeg2000 Scans found within the package into (multi-paged) pdf document.
        ///         Per document or dossier (with direct dateiRef's) one pdf is created. The metadata information in the package is
        ///         updated to reflect the changes made.
        ///     </para>
        ///     <para>The following assumptions are made:</para>
        ///     <list type="bullet">
        ///         <item>JPEG 2000 Files have the extension .jp2</item>
        ///         <item>The .jp2 may be accompanied by a premis xml file. The premis filename is "[jpeg200Filename]_premis.xml</item>
        ///         <item>
        ///             Within one document or (dossier with dateiRef) only .jp2 files are allowed.
        ///             If other file types are mixed in, (except for the premis files) the conversion silently fails for that
        ///             document.
        ///         </item>
        ///         <item>The premis files are removed after the pdf creation took place.</item>
        ///     </list>
        /// </summary>
        /// <param name="paket">The package to be converted</param>
        /// <param name="folder">The root folder where the files can be found.</param>
        /// <param name="settings">The conversion settings</param>
        public void ConvertSingleJpeg2000ScansToPdfDocuments(PaketDIP paket, string folder)
        {
            rootFolder     = folder;
            paketToConvert = paket;

            // Default settings for Image conversion
            encoderParameters = new EncoderParameters(1);
            var encoderParameter = new EncoderParameter(Encoder.Quality, settings.JpegQualitaetInProzent);

            encoderParameters.Param[0] = encoderParameter;
            BitmapImageCreator.Register();

            foreach (var ordnungssystemposition in paket.Ablieferung.Ordnungssystem.Ordnungssystemposition)
            {
                ProcessOrdnungssystemPosition(ordnungssystemposition);
            }
        }
        public void Zusatzdaten_are_correctly_serialized()
        {
            // Arrange
            var sut = new PaketDIP();

            // Act
            sut.Generierungsdatum = DateTime.Today;
            sut.SchemaVersion     = SchemaVersion.Item41;

            sut.Ablieferung = new AblieferungDIP
            {
                AblieferndeStelle = "ablieferndeStelle",
                Ordnungssystem    = new OrdnungssystemDIP
                {
                    Ordnungssystemposition = new List <OrdnungssystempositionDIP>
                    {
                        new OrdnungssystempositionDIP
                        {
                            Dossier = new List <DossierDIP>
                            {
                                new DossierDIP
                                {
                                    Id          = "myDossierId",
                                    zusatzDaten = new List <ZusatzDatenMerkmal>
                                    {
                                        new ZusatzDatenMerkmal {
                                            Name = "propName1", Value = "Value1"
                                        },
                                        new ZusatzDatenMerkmal {
                                            Name = "propName2", Value = "Value2"
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            };

            var serialized = sut.Serialize();

            // Assert
            serialized.Should().Contain("<zusatzDaten>");
            serialized.Should().Contain("<merkmal name=\"");
        }
        public static void UpdateFile(FileInfo file, FileInfo newFile, PaketDIP paket, string tempFolder)
        {
            var datei = GetDatei(file, paket, tempFolder, out var _);

            if (datei == null)
            {
                if (paket.Ablieferung.Bemerkung != "Metadata.xml das nicht zum Inhalt passt für Testsysteme")
                {
                    throw new Exception($"Im Metadata.xml wurde für die Datei '{file.FullName}' kein Eintrag gefunden.");
                }

                return;
            }

            datei.Name             = newFile.Name;
            datei.Pruefsumme       = CalculateMd5(newFile);
            datei.Pruefalgorithmus = Pruefalgorithmus.MD5;
            datei.Eigenschaft.Clear();
        }
        private async Task <string> ConvertFile(FileInfo file, PaketDIP paket, string tempFolder)
        {
            if (!file.Exists)
            {
                throw new FileNotFoundException($"Unable to find file {file.FullName}", file.FullName);
            }

            var supportedFileTypesForRendering = await renderEngine.GetSupportedFileTypes();

            if (!supportedFileTypesForRendering.Contains(file.Extension.Replace(".", "").ToLowerInvariant()))
            {
                return(file.FullName);
            }

            var targetExtension = GetTargetExtension(file);
            var convertedFile   = await renderEngine.ConvertFile(file.FullName, targetExtension);

            MetadataXmlUpdater.UpdateFile(file, new FileInfo(convertedFile), paket, tempFolder);
            return(convertedFile);
        }
        public static DateiParents RemoveFile(FileInfo file, PaketDIP paket, string tempFolder)
        {
            var datei   = GetDatei(file, paket, tempFolder, out var ordnerOderInhaltverzeinis);
            var parents = new DateiParents
            {
                OrdnerOderInhaltverzeinis = ordnerOderInhaltverzeinis
            };

            GetDateiList(ordnerOderInhaltverzeinis).Remove(datei);

            foreach (var ordnungssystemposition in paket.Ablieferung.Ordnungssystem.Ordnungssystemposition)
            {
                var dossierOderDokument = RemoveDateiRef(ordnungssystemposition, datei.Id);
                if (dossierOderDokument != null)
                {
                    parents.DossierOderDokument = dossierOderDokument;
                }
            }

            return(parents);
        }
Beispiel #12
0
        private void AddFileToPackage(string sampleFileName, string targetFolderInsideContent, PaketDIP paket, string rootFolder)
        {
            var newFile = Path.Combine(rootFolder, "content", targetFolderInsideContent, sampleFileName);

            using (var file = new StreamWriter(newFile))
            {
                file.Write("just a file");
            }

            MetadataXmlUpdater.AddFile(new FileInfo(newFile), new DateiParents
            {
                DossierOderDokument = paket.Ablieferung.Ordnungssystem.Ordnungssystemposition[0].Ordnungssystemposition[0].Ordnungssystemposition[0]
                                      .Dossier[0].Dokument.FirstOrDefault(d => d.Titel == targetFolderInsideContent),
                OrdnerOderInhaltverzeinis = paket.Inhaltsverzeichnis.Ordner[0].Ordner.FirstOrDefault(o => o.Name == targetFolderInsideContent)
            });
        }
        public async Task CreateMetadataXml(string folderName, RepositoryPackage package, List <RepositoryFile> filesToIgnore)
        {
            // Make sure folder name exists
            if (!Directory.Exists(folderName))
            {
                Directory.CreateDirectory(folderName);
            }

            CopyXsdFiles(folderName);

            // Lesen der Bestellposition und aller Kinder aus dem Elastic Index.
            var getArchiveRecordsForPackageRequest = new GetArchiveRecordsForPackageRequest {
                PackageId = package.PackageId
            };
            var response = await indexClient.GetResponse <GetArchiveRecordsForPackageResponse>(getArchiveRecordsForPackageRequest);

            var indexRecords = response.Message?.Result;

            Log.Debug($"Found the following archive records for pack" +
                      $"ageId {package.PackageId}: {JsonConvert.SerializeObject(indexRecords)}");

            // If using the Alfresco Repository, then we simply return a "hard coded" file
            if (repositoryAccess.GetRepositoryName().StartsWith("Alfresco", StringComparison.InvariantCultureIgnoreCase))
            {
                var defaultMetadata = GetFileFromRessource();
                File.WriteAllText(Path.Combine(folderName, "metadata.xml"), defaultMetadata);
                return;
            }

            // Einlesen aller Folder Objekte aus dem DIR. Einerseit vom Einstiegspunkt hinauf bis zur Ablieferung
            // andererseits nach unten bis in die tiefste Ebene.
            InitFolders(package);

            var dip = new PaketDIP
            {
                SchemaVersion      = SchemaVersion.Item41,
                Generierungsdatum  = DateTime.Today,
                Bestellinformation = null // Info nur für Benutzungskopie von Vecteur
            };

            // Schreibe die Daten in die Schema-Struktur, ausgehend vom "obersten" Ordner, der Ablieferung.
            var root = FoldersTreeList.Find(f => f.Parent == null);

            if (root == null)
            {
                throw new InvalidOperationException("Unable to find root folder for exporting metadata.");
            }

            AddFolderData(root, null, dip, indexRecords.ToList(), package.PackageId, filesToIgnore);


            // Generiere noch das Inhaltsverzeichnis
            var contentRoot = new OrdnerDIP
            {
                Id           = $"contentRoot{DateTime.Now.Ticks}",
                Name         = "content",
                OriginalName = "content"
            };

            dip.Inhaltsverzeichnis.Ordner.Add(contentRoot);
            foreach (var folder in package.Folders)
            {
                ProcessFolder(contentRoot.Ordner, folder);
            }

            ProcessFiles(contentRoot.Datei, package.Files);


            // Save data to disk
            dip.SchemaLocation = "http://bar.admin.ch/gebrauchskopie/v1 gebrauchskopie.xsd";
            ((Paket)dip).SaveToFile(Path.Combine(folderName, "metadata.xml"));
        }
        public OrdnungssystempositionDIP FindOrdnungssystemPositionInPackage(FolderInfo folder, PaketDIP dip)
        {
            Log.Verbose("Trying to find ordnungssystem position in dip package for folder with Id {Id}", folder.Id);
            var allPositions = dip.Ablieferung.Ordnungssystem.Ordnungssystemposition.Traverse(p => p.Ordnungssystemposition).ToList();
            var found        = allPositions.FirstOrDefault(p => p.Id == folder.Id);

            Log.Verbose("Found this item: {found}", JsonConvert.SerializeObject(found));
            return(found);
        }
        /// <summary>
        ///     Adds the folder data to the DIP package.
        /// </summary>
        /// <param name="folder">The folder to add.</param>
        /// <param name="parent">The parent under which the new folder should be added.</param>
        /// <param name="dip">The existing dip object where the data is added.</param>
        /// <param name="indexRecords">
        ///     A list with the metadata information from the elastic index containing the ordered archive record and all its
        ///     children.
        ///     The first record in the collection is the ordered archive record
        /// </param>
        /// <param name="packageId">The packageId of the ordered item</param>
        /// <param name="filesToIgnore">A list of files that should not be included in the output</param>
        /// <exception cref="InvalidOperationException"></exception>
        /// <exception cref="ArgumentOutOfRangeException"></exception>
        private void AddFolderData(FolderInfo folder, FolderInfo parent, PaketDIP dip, List <ElasticArchiveRecord> indexRecords, string packageId,
                                   List <RepositoryFile> filesToIgnore)
        {
            var extensions = folder.CmisFolder.GetExtensions(ExtensionLevel.Object);

            Log.Verbose("Adding folder data to DIP Package for {FolderType}: {data} with parent {parent}", folder.FolderType,
                        JsonConvert.SerializeObject(extensions), parent);
            switch (folder.FolderType)
            {
            case PackageFolderType.Ablieferung:
                var orderedRecord = indexRecords.FirstOrDefault(i => i.PrimaryDataLink == packageId);
                AddAblieferungData(dip.Ablieferung, extensions, orderedRecord);
                break;

            case PackageFolderType.OrdnungssystemPosition:
                OrdnungssystempositionDIP position;
                switch (parent.FolderType)
                {
                case PackageFolderType.Ablieferung:
                    position = AddOrdnungssystemPositionData(dip.Ablieferung.Ordnungssystem.Ordnungssystemposition, extensions);
                    break;

                case PackageFolderType.OrdnungssystemPosition:
                    var parentPosition = FindOrdnungssystemPositionInPackage(parent, dip);
                    position = AddOrdnungssystemPositionData(parentPosition.Ordnungssystemposition, extensions);
                    break;

                default:
                    throw new InvalidOperationException(
                              "A <Ordnungssystemposition> can only be added to a <Ordnungssystem> or another <Ordnungssystemposition>.");
                }

                folder.Id = position.Id;
                break;

            case PackageFolderType.Dossier:
                DossierDIP dossier;
                var        dossierRecord = GetArchiveRecordFromDossier(folder, indexRecords);
                switch (parent.FolderType)
                {
                case PackageFolderType.OrdnungssystemPosition:
                    var parentPosition = FindOrdnungssystemPositionInPackage(parent, dip);
                    dossier = AddDossierData(parentPosition.Dossier, extensions, dossierRecord,
                                             folder.IsOrderedItem || folder.IsChildOfOrderedItem, filesToIgnore);
                    break;

                case PackageFolderType.Dossier:
                    var parentDossier = FindDossierInPackage(parent, dip);
                    dossier = AddDossierData(parentDossier.Dossier, extensions, dossierRecord,
                                             folder.IsOrderedItem || folder.IsChildOfOrderedItem, filesToIgnore);
                    break;

                default:
                    throw new InvalidOperationException("A <Dossier> can only be added to a <Ordnungssystemposition> or another <Dossier>.");
                }

                folder.Id = dossier.Id;
                break;

            case PackageFolderType.Dokument:
                DokumentDIP dokument;
                var         documentRecord = GetArchiveRecordFromDocument(folder, indexRecords);
                switch (parent.FolderType)
                {
                case PackageFolderType.Dossier:
                    var parentDossier = FindDossierInPackage(parent, dip);
                    dokument = AddDokumentData(parentDossier, extensions, documentRecord, filesToIgnore);
                    break;

                default:
                    throw new InvalidOperationException("A <Dokument> can only be added to a <Dossier>.");
                }

                folder.Id = dokument.Id;
                break;

            default:
                throw new ArgumentOutOfRangeException();
            }

            foreach (var child in FoldersTreeList.GetChildren(folder))
            {
                AddFolderData(child, folder, dip, indexRecords, packageId, filesToIgnore);
            }
        }
Beispiel #16
0
 private async Task ConvertFolders(string id, List <RepositoryFolder> folders, PaketDIP paket, string rootFolder, string tempFolder, JobContext context)
 {
     foreach (var repositoryFolder in folders)
     {
         var newPath = Path.Combine(tempFolder, repositoryFolder.PhysicalName);
         await ConvertFiles(id, repositoryFolder.Files, paket, rootFolder, newPath, context);
         await ConvertFolders(id, repositoryFolder.Folders, paket, rootFolder, newPath, context);
     }
 }