public void Empty_Zusatzdaten_are_not_serialized() { // Arrange var sut = new PaketDIP(); // Act sut.Generierungsdatum = DateTime.Today; sut.SchemaVersion = SchemaVersion.Item41; sut.Ablieferung = new AblieferungDIP { AblieferndeStelle = "ablieferndeStelle", Ordnungssystem = new OrdnungssystemDIP { Ordnungssystemposition = new List <OrdnungssystempositionDIP> { new OrdnungssystempositionDIP { Dossier = new List <DossierDIP> { new DossierDIP { Id = "myDossierId" } } } } } }; var serialized = sut.Serialize(); // Assert serialized.Should().NotContain("<zusatzDaten"); }
public DossierDIP FindDossierInPackage(FolderInfo folder, PaketDIP dip) { Log.Verbose("Trying to find dossier in dip package for folder with Id {Id}", folder.Id); var allDossiers = dip.Ablieferung.Ordnungssystem.Ordnungssystemposition.Traverse(p => p.Ordnungssystemposition).SelectMany(p => p.Dossier) .Traverse(p => p.Dossier); var found = allDossiers.FirstOrDefault(p => p.Id == folder.Id); Log.Verbose("Found this item: {found}", JsonConvert.SerializeObject(found)); return(found); }
private async Task ConvertFiles(string id, List <RepositoryFile> files, PaketDIP paket, string rootFolder, string tempFolder, JobContext context) { // Skip empty collections if (files.Count == 0) { return; } // Create the list with conversion files. // This list will contain the splitted file names for processing // This list does not contain files that didn't have the flag exported or should be skipped var conversionFiles = pdfManipulator.ConvertToConversionFiles(files.ToList(), tempFolder, true); var sw = new Stopwatch(); sw.Start(); var parallelism = Settings.Default.DocumentTransformParallelism; Log.Information("Starting parallel document transform for-each-loop with parallelism of {parallelism} for {Count} files of archiveRecordId or orderId {id}", parallelism, files.Count, id); var supportedFileTypesForRendering = await renderEngine.GetSupportedFileTypes(); await conversionFiles.ParallelForEachAsync(async conversionFile => { var file = new FileInfo(conversionFile.FullName); Log.Information("Start conversion for file: {file} for archive record or order id {id}", file, id); conversionFile.ConvertedFile = await ConvertFile(file, supportedFileTypesForRendering, context); }, parallelism, true); // Now stich back files that were possibly splitted pdfManipulator.MergeSplittedFiles(conversionFiles); // Update the metadata.xml for all the converted files // As speed is not an issue, we're not doing it in parallel foreach (var conversionFile in conversionFiles) { var file = new FileInfo(conversionFile.FullName); if (string.IsNullOrEmpty(conversionFile.ParentId)) { MetadataXmlUpdater.UpdateFile(file, new FileInfo(conversionFile.ConvertedFile), paket, rootFolder); } // Delete the original file, if the convertedFile exists and is not the same as the original file. // In case of PDF the name of the original and converted file could be the same. --> PDF to PDF with OCR if (file.Exists && conversionFile.ConvertedFile != file.FullName) { file.Delete(); } } sw.Stop(); Log.Information("Finished parallel document transform for-each-loop with parallelism of {parallelism} for {Count} files of archiveRecordId or orderId {id} in {TotalSeconds}", parallelism, files.Count, id, sw.Elapsed.TotalSeconds); }
/// <summary> /// This procedure updates the repository package so it correctly reflects the contents of the package /// </summary> /// <param name="repositoryPackage"></param> /// <param name="paket"></param> private void UpdateRepositoryPackage(RepositoryPackage repositoryPackage, PaketDIP paket) { repositoryPackage.Files.Clear(); repositoryPackage.Folders.Clear(); var contentFolder = paket.Inhaltsverzeichnis.Ordner; Debug.Assert(contentFolder.Count == 1, "There should be only one folder at the content level"); repositoryPackage.Files.AddRange(ConvertToRepositoryFiles(contentFolder.First().Datei)); repositoryPackage.Folders.AddRange(ConvertToRepositoryFolders(contentFolder.First().Ordner)); }
public static DateiDIP GetDatei(FileInfo file, PaketDIP paket, string tempFolder, out object ordnerOderInhaltsverzeichnis) { var fileWithShortPath = file.FullName.Remove(0, tempFolder.Length); var fileNameParts = fileWithShortPath.Split(new[] { '\\' }, StringSplitOptions.RemoveEmptyEntries); var ordnerOderInhaltsverzeichnisList = new List <object> { paket.Inhaltsverzeichnis }; ordnerOderInhaltsverzeichnis = ordnerOderInhaltsverzeichnisList.FirstOrDefault(); for (var i = 0; i < fileNameParts.Length - 1; i++) { var fileNamePart = fileNameParts[i]; ordnerOderInhaltsverzeichnisList = FindOrdner(fileNamePart, ordnerOderInhaltsverzeichnisList).Cast <object>().ToList(); // If we can't find a folder with the given name, then certainly we don't find a file --> Can stop here if (!ordnerOderInhaltsverzeichnisList.Any()) { return(null); } } var dateien = FindDateiInOrdnerList(fileNameParts.Last(), ordnerOderInhaltsverzeichnisList); // Actually only one file could/should be found. But in case if (dateien.Count > 1) { throw new InvalidOperationException( $"Found more than one file with the name {fileNameParts.Last()} with the same path <{fileWithShortPath}>. This is not possible."); } // As an output we have to set the folder or root where the file was found ordnerOderInhaltsverzeichnis = ordnerOderInhaltsverzeichnisList.FirstOrDefault(o => { switch (o) { case InhaltsverzeichnisDIP inhaltsVerzeichnis: return(inhaltsVerzeichnis.Datei.Any(d => d.Name.Equals(fileNameParts.Last(), StringComparison.InvariantCultureIgnoreCase))); case OrdnerDIP ordner: return(ordner.Datei.Any(d => d.Name.Equals(fileNameParts.Last(), StringComparison.InvariantCultureIgnoreCase))); default: return(false); } }); return(dateien.FirstOrDefault()); }
public void Enum_values_are_correctly_serialized() { // Arrange var sut = new PaketDIP(); // Act sut.Generierungsdatum = DateTime.Today; sut.SchemaVersion = SchemaVersion.Item41; sut.SchemaLocation = "http://bar.admin.ch/gebrauchskopie/v1 gebrauchskopie.xsd"; var serialized = sut.Serialize(); // Assert serialized.Should().Contain("schemaVersion=\"4.1\""); serialized.Should().Contain("schemaLocation=\"http://bar.admin.ch/gebrauchskopie/v1 gebrauchskopie.xsd\""); }
/// <summary> /// <para> /// Converts single page jpeg2000 Scans found within the package into (multi-paged) pdf document. /// Per document or dossier (with direct dateiRef's) one pdf is created. The metadata information in the package is /// updated to reflect the changes made. /// </para> /// <para>The following assumptions are made:</para> /// <list type="bullet"> /// <item>JPEG 2000 Files have the extension .jp2</item> /// <item>The .jp2 may be accompanied by a premis xml file. The premis filename is "[jpeg200Filename]_premis.xml</item> /// <item> /// Within one document or (dossier with dateiRef) only .jp2 files are allowed. /// If other file types are mixed in, (except for the premis files) the conversion silently fails for that /// document. /// </item> /// <item>The premis files are removed after the pdf creation took place.</item> /// </list> /// </summary> /// <param name="paket">The package to be converted</param> /// <param name="folder">The root folder where the files can be found.</param> /// <param name="settings">The conversion settings</param> public void ConvertSingleJpeg2000ScansToPdfDocuments(PaketDIP paket, string folder) { rootFolder = folder; paketToConvert = paket; // Default settings for Image conversion encoderParameters = new EncoderParameters(1); var encoderParameter = new EncoderParameter(Encoder.Quality, settings.JpegQualitaetInProzent); encoderParameters.Param[0] = encoderParameter; BitmapImageCreator.Register(); foreach (var ordnungssystemposition in paket.Ablieferung.Ordnungssystem.Ordnungssystemposition) { ProcessOrdnungssystemPosition(ordnungssystemposition); } }
public void Zusatzdaten_are_correctly_serialized() { // Arrange var sut = new PaketDIP(); // Act sut.Generierungsdatum = DateTime.Today; sut.SchemaVersion = SchemaVersion.Item41; sut.Ablieferung = new AblieferungDIP { AblieferndeStelle = "ablieferndeStelle", Ordnungssystem = new OrdnungssystemDIP { Ordnungssystemposition = new List <OrdnungssystempositionDIP> { new OrdnungssystempositionDIP { Dossier = new List <DossierDIP> { new DossierDIP { Id = "myDossierId", zusatzDaten = new List <ZusatzDatenMerkmal> { new ZusatzDatenMerkmal { Name = "propName1", Value = "Value1" }, new ZusatzDatenMerkmal { Name = "propName2", Value = "Value2" } } } } } } } }; var serialized = sut.Serialize(); // Assert serialized.Should().Contain("<zusatzDaten>"); serialized.Should().Contain("<merkmal name=\""); }
public static void UpdateFile(FileInfo file, FileInfo newFile, PaketDIP paket, string tempFolder) { var datei = GetDatei(file, paket, tempFolder, out var _); if (datei == null) { if (paket.Ablieferung.Bemerkung != "Metadata.xml das nicht zum Inhalt passt für Testsysteme") { throw new Exception($"Im Metadata.xml wurde für die Datei '{file.FullName}' kein Eintrag gefunden."); } return; } datei.Name = newFile.Name; datei.Pruefsumme = CalculateMd5(newFile); datei.Pruefalgorithmus = Pruefalgorithmus.MD5; datei.Eigenschaft.Clear(); }
private async Task <string> ConvertFile(FileInfo file, PaketDIP paket, string tempFolder) { if (!file.Exists) { throw new FileNotFoundException($"Unable to find file {file.FullName}", file.FullName); } var supportedFileTypesForRendering = await renderEngine.GetSupportedFileTypes(); if (!supportedFileTypesForRendering.Contains(file.Extension.Replace(".", "").ToLowerInvariant())) { return(file.FullName); } var targetExtension = GetTargetExtension(file); var convertedFile = await renderEngine.ConvertFile(file.FullName, targetExtension); MetadataXmlUpdater.UpdateFile(file, new FileInfo(convertedFile), paket, tempFolder); return(convertedFile); }
public static DateiParents RemoveFile(FileInfo file, PaketDIP paket, string tempFolder) { var datei = GetDatei(file, paket, tempFolder, out var ordnerOderInhaltverzeinis); var parents = new DateiParents { OrdnerOderInhaltverzeinis = ordnerOderInhaltverzeinis }; GetDateiList(ordnerOderInhaltverzeinis).Remove(datei); foreach (var ordnungssystemposition in paket.Ablieferung.Ordnungssystem.Ordnungssystemposition) { var dossierOderDokument = RemoveDateiRef(ordnungssystemposition, datei.Id); if (dossierOderDokument != null) { parents.DossierOderDokument = dossierOderDokument; } } return(parents); }
private void AddFileToPackage(string sampleFileName, string targetFolderInsideContent, PaketDIP paket, string rootFolder) { var newFile = Path.Combine(rootFolder, "content", targetFolderInsideContent, sampleFileName); using (var file = new StreamWriter(newFile)) { file.Write("just a file"); } MetadataXmlUpdater.AddFile(new FileInfo(newFile), new DateiParents { DossierOderDokument = paket.Ablieferung.Ordnungssystem.Ordnungssystemposition[0].Ordnungssystemposition[0].Ordnungssystemposition[0] .Dossier[0].Dokument.FirstOrDefault(d => d.Titel == targetFolderInsideContent), OrdnerOderInhaltverzeinis = paket.Inhaltsverzeichnis.Ordner[0].Ordner.FirstOrDefault(o => o.Name == targetFolderInsideContent) }); }
public async Task CreateMetadataXml(string folderName, RepositoryPackage package, List <RepositoryFile> filesToIgnore) { // Make sure folder name exists if (!Directory.Exists(folderName)) { Directory.CreateDirectory(folderName); } CopyXsdFiles(folderName); // Lesen der Bestellposition und aller Kinder aus dem Elastic Index. var getArchiveRecordsForPackageRequest = new GetArchiveRecordsForPackageRequest { PackageId = package.PackageId }; var response = await indexClient.GetResponse <GetArchiveRecordsForPackageResponse>(getArchiveRecordsForPackageRequest); var indexRecords = response.Message?.Result; Log.Debug($"Found the following archive records for pack" + $"ageId {package.PackageId}: {JsonConvert.SerializeObject(indexRecords)}"); // If using the Alfresco Repository, then we simply return a "hard coded" file if (repositoryAccess.GetRepositoryName().StartsWith("Alfresco", StringComparison.InvariantCultureIgnoreCase)) { var defaultMetadata = GetFileFromRessource(); File.WriteAllText(Path.Combine(folderName, "metadata.xml"), defaultMetadata); return; } // Einlesen aller Folder Objekte aus dem DIR. Einerseit vom Einstiegspunkt hinauf bis zur Ablieferung // andererseits nach unten bis in die tiefste Ebene. InitFolders(package); var dip = new PaketDIP { SchemaVersion = SchemaVersion.Item41, Generierungsdatum = DateTime.Today, Bestellinformation = null // Info nur für Benutzungskopie von Vecteur }; // Schreibe die Daten in die Schema-Struktur, ausgehend vom "obersten" Ordner, der Ablieferung. var root = FoldersTreeList.Find(f => f.Parent == null); if (root == null) { throw new InvalidOperationException("Unable to find root folder for exporting metadata."); } AddFolderData(root, null, dip, indexRecords.ToList(), package.PackageId, filesToIgnore); // Generiere noch das Inhaltsverzeichnis var contentRoot = new OrdnerDIP { Id = $"contentRoot{DateTime.Now.Ticks}", Name = "content", OriginalName = "content" }; dip.Inhaltsverzeichnis.Ordner.Add(contentRoot); foreach (var folder in package.Folders) { ProcessFolder(contentRoot.Ordner, folder); } ProcessFiles(contentRoot.Datei, package.Files); // Save data to disk dip.SchemaLocation = "http://bar.admin.ch/gebrauchskopie/v1 gebrauchskopie.xsd"; ((Paket)dip).SaveToFile(Path.Combine(folderName, "metadata.xml")); }
public OrdnungssystempositionDIP FindOrdnungssystemPositionInPackage(FolderInfo folder, PaketDIP dip) { Log.Verbose("Trying to find ordnungssystem position in dip package for folder with Id {Id}", folder.Id); var allPositions = dip.Ablieferung.Ordnungssystem.Ordnungssystemposition.Traverse(p => p.Ordnungssystemposition).ToList(); var found = allPositions.FirstOrDefault(p => p.Id == folder.Id); Log.Verbose("Found this item: {found}", JsonConvert.SerializeObject(found)); return(found); }
/// <summary> /// Adds the folder data to the DIP package. /// </summary> /// <param name="folder">The folder to add.</param> /// <param name="parent">The parent under which the new folder should be added.</param> /// <param name="dip">The existing dip object where the data is added.</param> /// <param name="indexRecords"> /// A list with the metadata information from the elastic index containing the ordered archive record and all its /// children. /// The first record in the collection is the ordered archive record /// </param> /// <param name="packageId">The packageId of the ordered item</param> /// <param name="filesToIgnore">A list of files that should not be included in the output</param> /// <exception cref="InvalidOperationException"></exception> /// <exception cref="ArgumentOutOfRangeException"></exception> private void AddFolderData(FolderInfo folder, FolderInfo parent, PaketDIP dip, List <ElasticArchiveRecord> indexRecords, string packageId, List <RepositoryFile> filesToIgnore) { var extensions = folder.CmisFolder.GetExtensions(ExtensionLevel.Object); Log.Verbose("Adding folder data to DIP Package for {FolderType}: {data} with parent {parent}", folder.FolderType, JsonConvert.SerializeObject(extensions), parent); switch (folder.FolderType) { case PackageFolderType.Ablieferung: var orderedRecord = indexRecords.FirstOrDefault(i => i.PrimaryDataLink == packageId); AddAblieferungData(dip.Ablieferung, extensions, orderedRecord); break; case PackageFolderType.OrdnungssystemPosition: OrdnungssystempositionDIP position; switch (parent.FolderType) { case PackageFolderType.Ablieferung: position = AddOrdnungssystemPositionData(dip.Ablieferung.Ordnungssystem.Ordnungssystemposition, extensions); break; case PackageFolderType.OrdnungssystemPosition: var parentPosition = FindOrdnungssystemPositionInPackage(parent, dip); position = AddOrdnungssystemPositionData(parentPosition.Ordnungssystemposition, extensions); break; default: throw new InvalidOperationException( "A <Ordnungssystemposition> can only be added to a <Ordnungssystem> or another <Ordnungssystemposition>."); } folder.Id = position.Id; break; case PackageFolderType.Dossier: DossierDIP dossier; var dossierRecord = GetArchiveRecordFromDossier(folder, indexRecords); switch (parent.FolderType) { case PackageFolderType.OrdnungssystemPosition: var parentPosition = FindOrdnungssystemPositionInPackage(parent, dip); dossier = AddDossierData(parentPosition.Dossier, extensions, dossierRecord, folder.IsOrderedItem || folder.IsChildOfOrderedItem, filesToIgnore); break; case PackageFolderType.Dossier: var parentDossier = FindDossierInPackage(parent, dip); dossier = AddDossierData(parentDossier.Dossier, extensions, dossierRecord, folder.IsOrderedItem || folder.IsChildOfOrderedItem, filesToIgnore); break; default: throw new InvalidOperationException("A <Dossier> can only be added to a <Ordnungssystemposition> or another <Dossier>."); } folder.Id = dossier.Id; break; case PackageFolderType.Dokument: DokumentDIP dokument; var documentRecord = GetArchiveRecordFromDocument(folder, indexRecords); switch (parent.FolderType) { case PackageFolderType.Dossier: var parentDossier = FindDossierInPackage(parent, dip); dokument = AddDokumentData(parentDossier, extensions, documentRecord, filesToIgnore); break; default: throw new InvalidOperationException("A <Dokument> can only be added to a <Dossier>."); } folder.Id = dokument.Id; break; default: throw new ArgumentOutOfRangeException(); } foreach (var child in FoldersTreeList.GetChildren(folder)) { AddFolderData(child, folder, dip, indexRecords, packageId, filesToIgnore); } }
private async Task ConvertFolders(string id, List <RepositoryFolder> folders, PaketDIP paket, string rootFolder, string tempFolder, JobContext context) { foreach (var repositoryFolder in folders) { var newPath = Path.Combine(tempFolder, repositoryFolder.PhysicalName); await ConvertFiles(id, repositoryFolder.Files, paket, rootFolder, newPath, context); await ConvertFolders(id, repositoryFolder.Folders, paket, rootFolder, newPath, context); } }