public void A_valid_file_is_happily_converted() { var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK\header\metadata.xml"); var rootFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK"); var paket = (PaketDIP)Paket.LoadFromFile(metadataFile); var settings = new ScansZusammenfassenSettings { GroesseInProzent = 100, DefaultAufloesungInDpi = 300, JpegQualitaetInProzent = 80 }; var processor = new ScanProcessor(new FileResolution(settings), settings); // Act processor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, rootFolder); // Assert // Every Dokument and Umschlag got converted var contentFolder = paket.Inhaltsverzeichnis.Ordner[0]; contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000001")?.Datei.Count.Should().Be(1); // Just the pdf contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000002")?.Datei.Count.Should().Be(1); // Just the pdf contentFolder.Ordner.FirstOrDefault(o => o.Name == "U_m_s_c_h_l_a_g_0000001")?.Datei.Count.Should().Be(1); // Just the pdf // Premis Dateien gelöscht? var dokument1 = new DirectoryInfo(Path.Combine(rootFolder, "content", "D_o_k_u_m_e_n_t_0000001")); var dokument2 = new DirectoryInfo(Path.Combine(rootFolder, "content", "D_o_k_u_m_e_n_t_0000002")); var umschlagDirectory = new DirectoryInfo(Path.Combine(rootFolder, "content", "U_m_s_c_h_l_a_g_0000001")); umschlagDirectory.GetFiles("*.xml").Length.Should().Be(0); dokument1.GetFiles("*.xml").Length.Should().Be(0); dokument2.GetFiles("*.xml").Length.Should().Be(0); }
public void A_messed_up_metadata_file_results_in_unchanged_document_1_and_2() { // This metadata file has data that has wrongly named jp2/premis pairs, so it does not line up var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_NOK\header\metadata.xml"); var rootFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_NOK"); var paket = (PaketDIP)Paket.LoadFromFile(metadataFile); var settings = new ScansZusammenfassenSettings { GroesseInProzent = 100, DefaultAufloesungInDpi = 300, JpegQualitaetInProzent = 80 }; var processor = new ScanProcessor(new FileResolution(settings), settings); // Act processor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, rootFolder); // Assert // Document 1 and 2 are the same // Umschlag 1 got converted to pdf var contentFolder = paket.Inhaltsverzeichnis.Ordner[0]; contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000001")?.Datei.Count.Should().Be(8); // The original jp2 and premis contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000002")?.Datei.Count.Should().Be(8); // The original jp2 and premis contentFolder.Ordner.FirstOrDefault(o => o.Name == "U_m_s_c_h_l_a_g_0000001")?.Datei.Count.Should().Be(1); // Just the pdf // Premis Dateien gelöscht? var umschlagDirectory = new DirectoryInfo(Path.Combine(rootFolder, "content", "U_m_s_c_h_l_a_g_0000001")); umschlagDirectory.GetFiles("*.xml").Length.Should().Be(0); }
private static void Main(string[] args) { ConfigureLogging(); Log.Information("CMI.Manager.Asset.TransformJp2ToPdfTester starting"); if (args.Length == 0 || !Directory.Exists(args[0])) { Console.WriteLine( "You need to provide a directory with a sample DIP package to process as an argument. As a second argument the JPEG quality can be provided. A third parameter sets the new size in percent of the original image"); Console.ReadLine(); return; } // Read source folder var sourceFolder = args[0]; var jpegQuality = 80; // Default if (args.Length == 2 && int.TryParse(args[1], out var quality)) { jpegQuality = quality; } var sizeInPercent = 100; // Default if (args.Length == 3 && int.TryParse(args[2], out var size)) { sizeInPercent = size; } try { var transformEngine = new TransformEngine(new Xsl2Processor()); ConvertAreldaMetadataXml(sourceFolder, transformEngine); var metadataFile = Path.Combine(sourceFolder, "header", "metadata.xml"); var paket = (PaketDIP)Paket.LoadFromFile(metadataFile); // Create pdf documents from scanned jpeg 2000 scans. var scanProcessor = new ScanProcessor(); scanProcessor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, sourceFolder, new ScansZusammenfassenSettings { DefaultAufloesungInDpi = 300, GroesseInProzent = sizeInPercent, JpegQualitaetInProzent = jpegQuality }); } catch (Exception ex) { Log.Error(ex, $"Unexpected error. {ex.Message}"); } }
public void Finding_nested_dossier_returns_correct_item() { // Arrange var sut = new PackageHandler(null, null, null); var dip = (PaketDIP)Paket.LoadFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "dipTestdata1.xml")); // Act var value = sut.FindDossierInPackage(new FolderInfo { Id = "EkYg" }, dip); // Assert value.Id.Should().Be("EkYg"); }
public void Find_file_that_does_not_exist_returns_null() { // Arrange var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\metadata.xml"); var paket = (PaketDIP)Paket.LoadFromFile(metadataFile); var testFile = new FileInfo( @"D:\localdata\repository\aezg240y.nxz\content\Besuche aus dem Ausland 2008\Besuch aus Tschechien\Besuch aus Tschechien_ engültiges Programm\dummy.pdf"); var tempFolder = "D:\\localdata\\repository\\aezg240y.nxz\\"; // Act var file = MetadataXmlUpdater.GetDatei(testFile, paket, tempFolder, out var ordner); // Assert file.Should().BeNull(); ordner.Should().BeNull(); }
public void Find_file_in_root_must_return_correct_file() { // Arrange var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\metadata.xml"); var paket = (PaketDIP)Paket.LoadFromFile(metadataFile); var testFile = new FileInfo(@"D:\localdata\repository\aezg240y.nxz\p999999.pdf"); var tempFolder = "D:\\localdata\\repository\\aezg240y.nxz\\"; // Act var file = MetadataXmlUpdater.GetDatei(testFile, paket, tempFolder, out var ordner); // Assert file.Should().NotBeNull(); file.Name.Should().Be(testFile.Name); ordner.Should().BeOfType <InhaltsverzeichnisDIP>(); }
public void File_not_found_in_content_structure_results_in_exception() { // Arrange var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK\header\metadata.xml"); var rootFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK"); var paket = (PaketDIP)Paket.LoadFromFile(metadataFile); // Make the file invalid to trigger the exception by removing one file reference in the first folder. paket.Inhaltsverzeichnis.Ordner[0].Ordner[0].Datei.RemoveAt(0); var processor = new ScanProcessor(); // Act(ion) Action action = () => processor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, rootFolder, new ScansZusammenfassenSettings()); // Asert action.Should().Throw <InvalidOperationException>(); }
public void Find_file_in_xml_that_has_two_parent_folders_with_the_same_name() { // Arrange var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\metadata.xml"); var paket = (PaketDIP)Paket.LoadFromFile(metadataFile); var testFile = new FileInfo( @"D:\localdata\repository\aezg240y.nxz\content\Besuche aus dem Ausland 2008\Besuch aus Tschechien\Besuch aus Tschechien_ engültiges Programm\p000075.pdf"); var tempFolder = "D:\\localdata\\repository\\aezg240y.nxz\\"; // Act var file = MetadataXmlUpdater.GetDatei(testFile, paket, tempFolder, out var ordner); // Assert file.Should().NotBeNull(); file.Name.Should().Be(testFile.Name); ordner.Should().BeOfType <OrdnerDIP>(); ((OrdnerDIP)ordner).Id.Should().Be("COO.2080.100.2.2142784_D"); }
private Task <PreprocessingResult> ConvertSingleJp2ToPdf(PrepareForTransformationMessage message) { try { var tempFolder = GetTempFolder(message.RepositoryPackage); var metadataFile = Path.Combine(tempFolder, "header", "metadata.xml"); var paket = (PaketDIP)Paket.LoadFromFile(metadataFile); // Create pdf documents from scanned jpeg 2000 scans. scanProcessor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, tempFolder); // Save the changed info to the metadata file ((Paket)paket).SaveToFile(metadataFile); // As we changed files we need to update the RepositoryPackage if (paket.Ablieferung.Bemerkung != "Metadata.xml das nicht zum Inhalt passt für Testsysteme") { UpdateRepositoryPackage(message.RepositoryPackage, paket); } else { UpdateRepositoryPackageFromDisk(message.RepositoryPackage, tempFolder); } return(Task.FromResult(new PreprocessingResult { Success = true })); } catch (Exception ex) { var msg = "Unexpected error while converting single jpeg 2000 to pdf."; Log.Error(ex, msg); return(Task.FromResult(new PreprocessingResult { Success = false, ErrorMessage = msg })); } }
public void Files_linked_to_document_that_are_not_jp2_files_and_not_premis_result_in_unchanged_package() { var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK\header\metadata.xml"); var rootFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK"); var paket = (PaketDIP)Paket.LoadFromFile(metadataFile); var settings = new ScansZusammenfassenSettings { GroesseInProzent = 100, DefaultAufloesungInDpi = 300, JpegQualitaetInProzent = 80 }; var processor = new ScanProcessor(new FileResolution(settings), settings); // Add some weird files to the package AddFileToPackage("test01.txt", "D_o_k_u_m_e_n_t_0000001", paket, rootFolder); AddFileToPackage("test02.txt", "D_o_k_u_m_e_n_t_0000002", paket, rootFolder); AddFileToPackage("test03.txt", "U_m_s_c_h_l_a_g_0000001", paket, rootFolder); // Act processor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, rootFolder); // Assert // Nothing should be changed var contentFolder = paket.Inhaltsverzeichnis.Ordner[0]; contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000001")?.Datei.Count.Should().Be(9); contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000002")?.Datei.Count.Should().Be(9); contentFolder.Ordner.FirstOrDefault(o => o.Name == "U_m_s_c_h_l_a_g_0000001")?.Datei.Count.Should().Be(5); // Alle Dateien vorhanden? var dokument1 = new DirectoryInfo(Path.Combine(rootFolder, "content", "D_o_k_u_m_e_n_t_0000001")); var dokument2 = new DirectoryInfo(Path.Combine(rootFolder, "content", "D_o_k_u_m_e_n_t_0000002")); var umschlagDirectory = new DirectoryInfo(Path.Combine(rootFolder, "content", "U_m_s_c_h_l_a_g_0000001")); umschlagDirectory.GetFiles("*.xml").Length.Should().Be(2); umschlagDirectory.GetFiles("*.jp2").Length.Should().Be(2); dokument1.GetFiles("*.xml").Length.Should().Be(4); dokument1.GetFiles("*.jp2").Length.Should().Be(4); dokument2.GetFiles("*.xml").Length.Should().Be(4); dokument2.GetFiles("*.jp2").Length.Should().Be(4); }
private static void ConvertAreldaMetadataXml(string sourceFile) { // Get Metadata xml var transformationFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Html", "Xslt", "areldaConvert.xsl"); // IF one of the files does not exist, log warning and create an "error" index.html file. if (!File.Exists(transformationFile) || !File.Exists(sourceFile)) { return; } // Do transformation var result = transformEngine.TransformXml(sourceFile, transformationFile, null); var tempFile = Path.GetTempFileName(); File.WriteAllText(tempFile, result); var paket = (PaketDIP)Paket.LoadFromFile(tempFile); Console.WriteLine($"Paket generiert am: {paket.Generierungsdatum.ToShortDateString()}"); Console.ReadLine(); }
/// <summary> /// Converts a package to a usage copy. /// </summary> /// <param name="id">ArchiveRecordId oder OrderItemId</param> /// <param name="assetType">The asset type.</param> /// <param name="package">The package to convert</param> /// <returns>PackageConversionResult.</returns> public async Task <PackageConversionResult> ConvertPackage(string id, AssetType assetType, bool protectWithPassword, RepositoryPackage package) { var retVal = new PackageConversionResult { Valid = true }; var packageFileName = Path.Combine(Settings.Default.PickupPath, package.PackageFileName); var fi = new FileInfo(packageFileName); // Make sure Gebrauchskopien have a packageId if (assetType == AssetType.Gebrauchskopie && string.IsNullOrEmpty(package.PackageId)) { throw new InvalidOperationException("Assets of type <Gebrauchskopie> require a packageId"); } if (File.Exists(fi.FullName)) { Log.Information("Found zip file {Name}. File is already unzipped.", fi.Name); var tempFolder = Path.Combine(fi.DirectoryName ?? throw new InvalidOperationException(), fi.Name.Remove(fi.Name.Length - fi.Extension.Length)); try { var metadataFile = Path.Combine(tempFolder, "header", "metadata.xml"); var paket = (PaketDIP)Paket.LoadFromFile(metadataFile); var contentFolder = Path.Combine(tempFolder, "content"); var context = new JobContext { ArchiveRecordId = package.ArchiveRecordId, PackageId = package.PackageId }; await ConvertFiles(id, package.Files, paket, tempFolder, contentFolder, context); await ConvertFolders(id, package.Folders, paket, tempFolder, contentFolder, context); paket.Generierungsdatum = DateTime.Today; ((Paket)paket).SaveToFile(metadataFile); AddReadmeFile(tempFolder); AddDesignFiles(tempFolder); CreateIndexHtml(tempFolder, package.PackageId); // Create zip file with the name of the archive var finalZipFolder = Path.Combine(fi.DirectoryName, assetType.ToString(), id); var finalZipFile = finalZipFolder + ".zip"; CreateZipFile(finalZipFolder, finalZipFile, tempFolder, protectWithPassword, id); retVal.FileName = finalZipFile; // if we are here everything is groovy Log.Information("Successfully processed (converted formats) zip file {Name}", fi.Name); } catch (Exception ex) { Log.Error(ex, "Unexpected exception while converting the package."); retVal.Valid = false; retVal.ErrorMessage = $"Unexpected exception while converting the package.\nException:\n{ex}"; return(retVal); } finally { // Delete the temp files if (Directory.Exists(tempFolder)) { Directory.Delete(tempFolder, true); } } } else { Log.Warning("Unable to find the zip file {packageFileName} for conversion.", packageFileName); retVal.Valid = false; retVal.ErrorMessage = $"Unable to find the zip file {packageFileName} for conversion."; return(retVal); } return(retVal); }
/// <summary> /// Converts a package to a usage copy. /// </summary> /// <param name="id">ArchiveRecordId oder OrderItemId</param> /// <param name="assetType">The asset type.</param> /// <param name="fileName">Name of the package file to convert.</param> /// <param name="packageId">The id of the ordered package</param> /// <returns>PackageConversionResult.</returns> public async Task <PackageConversionResult> ConvertPackage(string id, AssetType assetType, bool protectWithPassword, string fileName, string packageId) { var retVal = new PackageConversionResult { Valid = true }; var packageFileName = Path.Combine(Settings.Default.PickupPath, fileName); var fi = new FileInfo(packageFileName); // Make sure Gebrauchskopien have a packageId if (assetType == AssetType.Gebrauchskopie && string.IsNullOrEmpty(packageId)) { throw new InvalidOperationException("Assets of type <Gebrauchskopie> require a packageId"); } if (File.Exists(fi.FullName)) { Log.Information("Found zip file {Name}. Starting to extract...", fi.Name); var tempFolder = Path.Combine(fi.DirectoryName, fi.Name.Remove(fi.Name.Length - fi.Extension.Length)); try { // Extract zip file to disk ZipFile.ExtractToDirectory(packageFileName, tempFolder); if (assetType == AssetType.Benutzungskopie) { ConvertAreldaMetadataXml(tempFolder); } var metadataFile = Path.Combine(tempFolder, "header", "metadata.xml"); var paket = (PaketDIP)Paket.LoadFromFile(metadataFile); // Create pdf documents from scanned jpeg 2000 scans. scanProcessor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, tempFolder, parameterHelper.GetSetting <ScansZusammenfassenSettings>()); // Get all the files from the subdirectory "content" in the extracted directory var files = new DirectoryInfo(Path.Combine(tempFolder, "content")).GetFiles("*.*", SearchOption.AllDirectories); foreach (var file in files) { Log.Information("Start extracting text for file: {file} for archive record or order id {id}", file, id); var convertedFile = await ConvertFile(file, paket, tempFolder); // Delete the original file, if the convertedFile exists and is not the same as the original file. // In case of PDF the name of the original and converted file could be the same. --> PDF to PDF with OCR if (!string.IsNullOrEmpty(convertedFile) && File.Exists(convertedFile) && convertedFile != file.FullName) { file.Delete(); } } paket.Generierungsdatum = DateTime.Today; ((Paket)paket).SaveToFile(metadataFile); AddReadmeFile(tempFolder); AddDesignFiles(tempFolder); CreateIndexHtml(tempFolder, packageId); // Create zip file with the name of the archive var finalZipFolder = Path.Combine(fi.DirectoryName, assetType.ToString(), id); var finalZipFile = finalZipFolder + ".zip"; CreateZipFile(finalZipFolder, finalZipFile, tempFolder, protectWithPassword, id); retVal.FileName = finalZipFile; // if we are here everything is groovy Log.Information("Successfully processed (converted formats) zip file {Name}", fi.Name); } catch (Exception ex) { Log.Error(ex, "Unexpected exception while converting the package."); retVal.Valid = false; retVal.ErrorMessage = $"Unexpected exception while converting the package.\nException:\n{ex}"; return(retVal); } finally { // Delete the temp files if (Directory.Exists(tempFolder)) { Directory.Delete(tempFolder, true); } } } else { Log.Warning("Unable to find the zip file {packageFileName} for conversion.", packageFileName); retVal.Valid = false; retVal.ErrorMessage = $"Unable to find the zip file {packageFileName} for conversion."; return(retVal); } return(retVal); }