Example #1
0
        public void A_valid_file_is_happily_converted()
        {
            var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK\header\metadata.xml");
            var rootFolder   = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK");
            var paket        = (PaketDIP)Paket.LoadFromFile(metadataFile);
            var settings     = new ScansZusammenfassenSettings {
                GroesseInProzent = 100, DefaultAufloesungInDpi = 300, JpegQualitaetInProzent = 80
            };
            var processor = new ScanProcessor(new FileResolution(settings), settings);

            // Act
            processor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, rootFolder);

            // Assert
            // Every Dokument and Umschlag got converted
            var contentFolder = paket.Inhaltsverzeichnis.Ordner[0];

            contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000001")?.Datei.Count.Should().Be(1); // Just the pdf
            contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000002")?.Datei.Count.Should().Be(1); // Just the pdf
            contentFolder.Ordner.FirstOrDefault(o => o.Name == "U_m_s_c_h_l_a_g_0000001")?.Datei.Count.Should().Be(1); // Just the pdf

            // Premis Dateien gelöscht?
            var dokument1         = new DirectoryInfo(Path.Combine(rootFolder, "content", "D_o_k_u_m_e_n_t_0000001"));
            var dokument2         = new DirectoryInfo(Path.Combine(rootFolder, "content", "D_o_k_u_m_e_n_t_0000002"));
            var umschlagDirectory = new DirectoryInfo(Path.Combine(rootFolder, "content", "U_m_s_c_h_l_a_g_0000001"));

            umschlagDirectory.GetFiles("*.xml").Length.Should().Be(0);
            dokument1.GetFiles("*.xml").Length.Should().Be(0);
            dokument2.GetFiles("*.xml").Length.Should().Be(0);
        }
Example #2
0
        public void A_messed_up_metadata_file_results_in_unchanged_document_1_and_2()
        {
            // This metadata file has data that has wrongly named jp2/premis pairs, so it does not line up
            var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_NOK\header\metadata.xml");
            var rootFolder   = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_NOK");
            var paket        = (PaketDIP)Paket.LoadFromFile(metadataFile);
            var settings     = new ScansZusammenfassenSettings {
                GroesseInProzent = 100, DefaultAufloesungInDpi = 300, JpegQualitaetInProzent = 80
            };
            var processor = new ScanProcessor(new FileResolution(settings), settings);

            // Act
            processor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, rootFolder);

            // Assert
            // Document 1 and 2 are the same
            // Umschlag 1 got converted to pdf
            var contentFolder = paket.Inhaltsverzeichnis.Ordner[0];

            contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000001")?.Datei.Count.Should().Be(8); // The original jp2 and premis
            contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000002")?.Datei.Count.Should().Be(8); // The original jp2 and premis
            contentFolder.Ordner.FirstOrDefault(o => o.Name == "U_m_s_c_h_l_a_g_0000001")?.Datei.Count.Should().Be(1); // Just the pdf

            // Premis Dateien gelöscht?
            var umschlagDirectory = new DirectoryInfo(Path.Combine(rootFolder, "content", "U_m_s_c_h_l_a_g_0000001"));

            umschlagDirectory.GetFiles("*.xml").Length.Should().Be(0);
        }
Example #3
0
        private static void Main(string[] args)
        {
            ConfigureLogging();

            Log.Information("CMI.Manager.Asset.TransformJp2ToPdfTester starting");

            if (args.Length == 0 || !Directory.Exists(args[0]))
            {
                Console.WriteLine(
                    "You need to provide a directory with a sample DIP package to process as an argument. As a second argument the JPEG quality can be provided. A third parameter sets the new size in percent of the original image");
                Console.ReadLine();
                return;
            }

            // Read source folder
            var sourceFolder = args[0];
            var jpegQuality  = 80; // Default

            if (args.Length == 2 && int.TryParse(args[1], out var quality))
            {
                jpegQuality = quality;
            }

            var sizeInPercent = 100; // Default

            if (args.Length == 3 && int.TryParse(args[2], out var size))
            {
                sizeInPercent = size;
            }

            try
            {
                var transformEngine = new TransformEngine(new Xsl2Processor());
                ConvertAreldaMetadataXml(sourceFolder, transformEngine);

                var metadataFile = Path.Combine(sourceFolder, "header", "metadata.xml");
                var paket        = (PaketDIP)Paket.LoadFromFile(metadataFile);

                // Create pdf documents from scanned jpeg 2000 scans.
                var scanProcessor = new ScanProcessor();
                scanProcessor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, sourceFolder,
                                                                       new ScansZusammenfassenSettings
                {
                    DefaultAufloesungInDpi = 300, GroesseInProzent = sizeInPercent, JpegQualitaetInProzent = jpegQuality
                });
            }
            catch (Exception ex)
            {
                Log.Error(ex, $"Unexpected error. {ex.Message}");
            }
        }
        public void Finding_nested_dossier_returns_correct_item()
        {
            // Arrange
            var sut = new PackageHandler(null, null, null);
            var dip = (PaketDIP)Paket.LoadFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "dipTestdata1.xml"));

            // Act
            var value = sut.FindDossierInPackage(new FolderInfo {
                Id = "EkYg"
            }, dip);

            // Assert
            value.Id.Should().Be("EkYg");
        }
        public void Find_file_that_does_not_exist_returns_null()
        {
            // Arrange
            var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\metadata.xml");
            var paket        = (PaketDIP)Paket.LoadFromFile(metadataFile);
            var testFile     = new FileInfo(
                @"D:\localdata\repository\aezg240y.nxz\content\Besuche aus dem Ausland 2008\Besuch aus Tschechien\Besuch aus Tschechien_ engültiges Programm\dummy.pdf");
            var tempFolder = "D:\\localdata\\repository\\aezg240y.nxz\\";

            // Act
            var file = MetadataXmlUpdater.GetDatei(testFile, paket, tempFolder, out var ordner);

            // Assert
            file.Should().BeNull();
            ordner.Should().BeNull();
        }
        public void Find_file_in_root_must_return_correct_file()
        {
            // Arrange
            var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\metadata.xml");
            var paket        = (PaketDIP)Paket.LoadFromFile(metadataFile);
            var testFile     = new FileInfo(@"D:\localdata\repository\aezg240y.nxz\p999999.pdf");
            var tempFolder   = "D:\\localdata\\repository\\aezg240y.nxz\\";

            // Act
            var file = MetadataXmlUpdater.GetDatei(testFile, paket, tempFolder, out var ordner);

            // Assert
            file.Should().NotBeNull();
            file.Name.Should().Be(testFile.Name);

            ordner.Should().BeOfType <InhaltsverzeichnisDIP>();
        }
        public void File_not_found_in_content_structure_results_in_exception()
        {
            // Arrange
            var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK\header\metadata.xml");
            var rootFolder   = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK");
            var paket        = (PaketDIP)Paket.LoadFromFile(metadataFile);

            // Make the file invalid to trigger the exception by removing one file reference in the first folder.
            paket.Inhaltsverzeichnis.Ordner[0].Ordner[0].Datei.RemoveAt(0);
            var processor = new ScanProcessor();

            // Act(ion)
            Action action = () => processor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, rootFolder, new ScansZusammenfassenSettings());

            // Asert
            action.Should().Throw <InvalidOperationException>();
        }
        public void Find_file_in_xml_that_has_two_parent_folders_with_the_same_name()
        {
            // Arrange
            var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\metadata.xml");
            var paket        = (PaketDIP)Paket.LoadFromFile(metadataFile);
            var testFile     = new FileInfo(
                @"D:\localdata\repository\aezg240y.nxz\content\Besuche aus dem Ausland 2008\Besuch aus Tschechien\Besuch aus Tschechien_ engültiges Programm\p000075.pdf");
            var tempFolder = "D:\\localdata\\repository\\aezg240y.nxz\\";

            // Act
            var file = MetadataXmlUpdater.GetDatei(testFile, paket, tempFolder, out var ordner);

            // Assert
            file.Should().NotBeNull();
            file.Name.Should().Be(testFile.Name);

            ordner.Should().BeOfType <OrdnerDIP>();
            ((OrdnerDIP)ordner).Id.Should().Be("COO.2080.100.2.2142784_D");
        }
        private Task <PreprocessingResult> ConvertSingleJp2ToPdf(PrepareForTransformationMessage message)
        {
            try
            {
                var tempFolder = GetTempFolder(message.RepositoryPackage);

                var metadataFile = Path.Combine(tempFolder, "header", "metadata.xml");
                var paket        = (PaketDIP)Paket.LoadFromFile(metadataFile);

                // Create pdf documents from scanned jpeg 2000 scans.
                scanProcessor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, tempFolder);

                // Save the changed info to the metadata file
                ((Paket)paket).SaveToFile(metadataFile);

                // As we changed files we need to update the RepositoryPackage
                if (paket.Ablieferung.Bemerkung != "Metadata.xml das nicht zum Inhalt passt für Testsysteme")
                {
                    UpdateRepositoryPackage(message.RepositoryPackage, paket);
                }
                else
                {
                    UpdateRepositoryPackageFromDisk(message.RepositoryPackage, tempFolder);
                }


                return(Task.FromResult(new PreprocessingResult {
                    Success = true
                }));
            }
            catch (Exception ex)
            {
                var msg = "Unexpected error while converting single jpeg 2000 to pdf.";
                Log.Error(ex, msg);
                return(Task.FromResult(new PreprocessingResult {
                    Success = false, ErrorMessage = msg
                }));
            }
        }
Example #10
0
        public void Files_linked_to_document_that_are_not_jp2_files_and_not_premis_result_in_unchanged_package()
        {
            var metadataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK\header\metadata.xml");
            var rootFolder   = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestDataCopy\jp2_OK");
            var paket        = (PaketDIP)Paket.LoadFromFile(metadataFile);
            var settings     = new ScansZusammenfassenSettings {
                GroesseInProzent = 100, DefaultAufloesungInDpi = 300, JpegQualitaetInProzent = 80
            };
            var processor = new ScanProcessor(new FileResolution(settings), settings);

            // Add some weird files to the package
            AddFileToPackage("test01.txt", "D_o_k_u_m_e_n_t_0000001", paket, rootFolder);
            AddFileToPackage("test02.txt", "D_o_k_u_m_e_n_t_0000002", paket, rootFolder);
            AddFileToPackage("test03.txt", "U_m_s_c_h_l_a_g_0000001", paket, rootFolder);

            // Act
            processor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, rootFolder);

            // Assert
            // Nothing should be changed
            var contentFolder = paket.Inhaltsverzeichnis.Ordner[0];

            contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000001")?.Datei.Count.Should().Be(9);
            contentFolder.Ordner.FirstOrDefault(o => o.Name == "D_o_k_u_m_e_n_t_0000002")?.Datei.Count.Should().Be(9);
            contentFolder.Ordner.FirstOrDefault(o => o.Name == "U_m_s_c_h_l_a_g_0000001")?.Datei.Count.Should().Be(5);

            // Alle Dateien vorhanden?
            var dokument1         = new DirectoryInfo(Path.Combine(rootFolder, "content", "D_o_k_u_m_e_n_t_0000001"));
            var dokument2         = new DirectoryInfo(Path.Combine(rootFolder, "content", "D_o_k_u_m_e_n_t_0000002"));
            var umschlagDirectory = new DirectoryInfo(Path.Combine(rootFolder, "content", "U_m_s_c_h_l_a_g_0000001"));

            umschlagDirectory.GetFiles("*.xml").Length.Should().Be(2);
            umschlagDirectory.GetFiles("*.jp2").Length.Should().Be(2);
            dokument1.GetFiles("*.xml").Length.Should().Be(4);
            dokument1.GetFiles("*.jp2").Length.Should().Be(4);
            dokument2.GetFiles("*.xml").Length.Should().Be(4);
            dokument2.GetFiles("*.jp2").Length.Should().Be(4);
        }
        private static void ConvertAreldaMetadataXml(string sourceFile)
        {
            // Get Metadata xml
            var transformationFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Html", "Xslt", "areldaConvert.xsl");

            // IF one of the files does not exist, log warning and create an "error" index.html file.
            if (!File.Exists(transformationFile) || !File.Exists(sourceFile))
            {
                return;
            }

            // Do transformation
            var result   = transformEngine.TransformXml(sourceFile, transformationFile, null);
            var tempFile = Path.GetTempFileName();

            File.WriteAllText(tempFile, result);

            var paket = (PaketDIP)Paket.LoadFromFile(tempFile);


            Console.WriteLine($"Paket generiert am: {paket.Generierungsdatum.ToShortDateString()}");
            Console.ReadLine();
        }
Example #12
0
        /// <summary>
        ///     Converts a package to a usage copy.
        /// </summary>
        /// <param name="id">ArchiveRecordId oder OrderItemId</param>
        /// <param name="assetType">The asset type.</param>
        /// <param name="package">The package to convert</param>
        /// <returns>PackageConversionResult.</returns>
        public async Task <PackageConversionResult> ConvertPackage(string id, AssetType assetType, bool protectWithPassword, RepositoryPackage package)
        {
            var retVal = new PackageConversionResult {
                Valid = true
            };
            var packageFileName = Path.Combine(Settings.Default.PickupPath, package.PackageFileName);
            var fi = new FileInfo(packageFileName);

            // Make sure Gebrauchskopien have a packageId
            if (assetType == AssetType.Gebrauchskopie && string.IsNullOrEmpty(package.PackageId))
            {
                throw new InvalidOperationException("Assets of type <Gebrauchskopie> require a packageId");
            }

            if (File.Exists(fi.FullName))
            {
                Log.Information("Found zip file {Name}. File is already unzipped.", fi.Name);
                var tempFolder = Path.Combine(fi.DirectoryName ?? throw new InvalidOperationException(), fi.Name.Remove(fi.Name.Length - fi.Extension.Length));
                try
                {
                    var metadataFile = Path.Combine(tempFolder, "header", "metadata.xml");
                    var paket        = (PaketDIP)Paket.LoadFromFile(metadataFile);

                    var contentFolder = Path.Combine(tempFolder, "content");
                    var context       = new JobContext {
                        ArchiveRecordId = package.ArchiveRecordId, PackageId = package.PackageId
                    };
                    await ConvertFiles(id, package.Files, paket, tempFolder, contentFolder, context);
                    await ConvertFolders(id, package.Folders, paket, tempFolder, contentFolder, context);

                    paket.Generierungsdatum = DateTime.Today;
                    ((Paket)paket).SaveToFile(metadataFile);

                    AddReadmeFile(tempFolder);
                    AddDesignFiles(tempFolder);
                    CreateIndexHtml(tempFolder, package.PackageId);

                    // Create zip file with the name of the archive
                    var finalZipFolder = Path.Combine(fi.DirectoryName, assetType.ToString(), id);
                    var finalZipFile   = finalZipFolder + ".zip";
                    CreateZipFile(finalZipFolder, finalZipFile, tempFolder, protectWithPassword, id);

                    retVal.FileName = finalZipFile;

                    // if we are here everything is groovy
                    Log.Information("Successfully processed (converted formats) zip file {Name}", fi.Name);
                }
                catch (Exception ex)
                {
                    Log.Error(ex, "Unexpected exception while converting the package.");
                    retVal.Valid        = false;
                    retVal.ErrorMessage = $"Unexpected exception while converting the package.\nException:\n{ex}";
                    return(retVal);
                }
                finally
                {
                    // Delete the temp files
                    if (Directory.Exists(tempFolder))
                    {
                        Directory.Delete(tempFolder, true);
                    }
                }
            }
            else
            {
                Log.Warning("Unable to find the zip file {packageFileName} for conversion.", packageFileName);
                retVal.Valid        = false;
                retVal.ErrorMessage = $"Unable to find the zip file {packageFileName} for conversion.";
                return(retVal);
            }

            return(retVal);
        }
        /// <summary>
        ///     Converts a package to a usage copy.
        /// </summary>
        /// <param name="id">ArchiveRecordId oder OrderItemId</param>
        /// <param name="assetType">The asset type.</param>
        /// <param name="fileName">Name of the package file to convert.</param>
        /// <param name="packageId">The id of the ordered package</param>
        /// <returns>PackageConversionResult.</returns>
        public async Task <PackageConversionResult> ConvertPackage(string id, AssetType assetType, bool protectWithPassword, string fileName,
                                                                   string packageId)
        {
            var retVal = new PackageConversionResult {
                Valid = true
            };
            var packageFileName = Path.Combine(Settings.Default.PickupPath, fileName);
            var fi = new FileInfo(packageFileName);

            // Make sure Gebrauchskopien have a packageId
            if (assetType == AssetType.Gebrauchskopie && string.IsNullOrEmpty(packageId))
            {
                throw new InvalidOperationException("Assets of type <Gebrauchskopie> require a packageId");
            }

            if (File.Exists(fi.FullName))
            {
                Log.Information("Found zip file {Name}. Starting to extract...", fi.Name);
                var tempFolder = Path.Combine(fi.DirectoryName, fi.Name.Remove(fi.Name.Length - fi.Extension.Length));
                try
                {
                    // Extract zip file to disk
                    ZipFile.ExtractToDirectory(packageFileName, tempFolder);

                    if (assetType == AssetType.Benutzungskopie)
                    {
                        ConvertAreldaMetadataXml(tempFolder);
                    }

                    var metadataFile = Path.Combine(tempFolder, "header", "metadata.xml");
                    var paket        = (PaketDIP)Paket.LoadFromFile(metadataFile);

                    // Create pdf documents from scanned jpeg 2000 scans.
                    scanProcessor.ConvertSingleJpeg2000ScansToPdfDocuments(paket, tempFolder,
                                                                           parameterHelper.GetSetting <ScansZusammenfassenSettings>());

                    // Get all the files from the subdirectory "content" in the extracted directory
                    var files = new DirectoryInfo(Path.Combine(tempFolder, "content")).GetFiles("*.*", SearchOption.AllDirectories);
                    foreach (var file in files)
                    {
                        Log.Information("Start extracting text for file: {file} for archive record or order id {id}", file, id);
                        var convertedFile = await ConvertFile(file, paket, tempFolder);

                        // Delete the original file, if the convertedFile exists and is not the same as the original file.
                        // In case of PDF the name of the original and converted file could be the same. --> PDF to PDF with OCR
                        if (!string.IsNullOrEmpty(convertedFile) && File.Exists(convertedFile) && convertedFile != file.FullName)
                        {
                            file.Delete();
                        }
                    }

                    paket.Generierungsdatum = DateTime.Today;
                    ((Paket)paket).SaveToFile(metadataFile);

                    AddReadmeFile(tempFolder);
                    AddDesignFiles(tempFolder);
                    CreateIndexHtml(tempFolder, packageId);

                    // Create zip file with the name of the archive
                    var finalZipFolder = Path.Combine(fi.DirectoryName, assetType.ToString(), id);
                    var finalZipFile   = finalZipFolder + ".zip";
                    CreateZipFile(finalZipFolder, finalZipFile, tempFolder, protectWithPassword, id);

                    retVal.FileName = finalZipFile;

                    // if we are here everything is groovy
                    Log.Information("Successfully processed (converted formats) zip file {Name}", fi.Name);
                }
                catch (Exception ex)
                {
                    Log.Error(ex, "Unexpected exception while converting the package.");
                    retVal.Valid        = false;
                    retVal.ErrorMessage = $"Unexpected exception while converting the package.\nException:\n{ex}";
                    return(retVal);
                }
                finally
                {
                    // Delete the temp files
                    if (Directory.Exists(tempFolder))
                    {
                        Directory.Delete(tempFolder, true);
                    }
                }
            }
            else
            {
                Log.Warning("Unable to find the zip file {packageFileName} for conversion.", packageFileName);
                retVal.Valid        = false;
                retVal.ErrorMessage = $"Unable to find the zip file {packageFileName} for conversion.";
                return(retVal);
            }

            return(retVal);
        }