/// <summary>
        ///     Extracts the fulltext and adds the resulting text to the ArchiveRecord.
        /// </summary>
        /// <param name="mutationId">The mutation identifier.</param>
        /// <param name="archiveRecord">The archive record.</param>
        /// <returns><c>true</c> if successful, <c>false</c> otherwise.</returns>
        public async Task <bool> ExtractFulltext(long mutationId, ArchiveRecord archiveRecord, int primaerdatenAuftragStatusId)
        {
            var packages = archiveRecord.PrimaryData;
            var processingTimeForMissingFiles = 0L;

            foreach (var repositoryPackage in packages.Where(p => !string.IsNullOrEmpty(p.PackageFileName)))
            {
                var packageFileName = Path.Combine(Settings.Default.PickupPath, repositoryPackage.PackageFileName);
                var fi    = new FileInfo(packageFileName);
                var watch = Stopwatch.StartNew();

                if (File.Exists(fi.FullName))
                {
                    Log.Information("Found zip file {Name}. Starting to extract...", fi.Name);
                    var tempFolder = Path.Combine(fi.DirectoryName, fi.Name.Remove(fi.Name.Length - fi.Extension.Length));
                    try
                    {
                        ZipFile.ExtractToDirectory(packageFileName, tempFolder);
                        var sizeInBytesOnDisk = Directory.GetFiles(tempFolder, "*.*", SearchOption.AllDirectories).Select(f => new FileInfo(f).Length)
                                                .Sum();

                        var status = new UpdatePrimaerdatenAuftragStatus
                        {
                            PrimaerdatenAuftragId = primaerdatenAuftragStatusId,
                            Service = AufbereitungsServices.AssetService,
                            Status  = AufbereitungsStatusEnum.ZipEntpackt
                        };
                        await UpdatePrimaerdatenAuftragStatus(status);

                        await ProcessFiles(repositoryPackage.Files, Path.Combine(tempFolder, "content"), archiveRecord.ArchiveRecordId);
                        await ProcessFolders(repositoryPackage.Folders, Path.Combine(tempFolder, "content"), archiveRecord.ArchiveRecordId);

                        // if we are here everything is okay
                        Log.Information("Successfully processed (fulltext extracted) zip file {Name}", fi.Name);
                        processingTimeForMissingFiles += GetProcessingTimeOfIgnoredFilesInTicks(repositoryPackage.SizeInBytes - sizeInBytesOnDisk);
                    }
                    catch (Exception ex)
                    {
                        Log.Error(ex, "Unexpected error while extracting full text. Error Message is: {Message}", ex.Message);
                        return(false);
                    }
                    finally
                    {
                        // Delete the temp files
                        Directory.Delete(tempFolder, true);
                        File.Delete(packageFileName);
                    }
                }
                else
                {
                    Log.Warning("Unable to find the zip file {packageFileName}. No text was extracted.", packageFileName);
                    return(false);
                }

                repositoryPackage.FulltextExtractionDuration = watch.ElapsedTicks + processingTimeForMissingFiles;
            }

            return(true);
        }
Esempio n. 2
0
        public async Task <bool> ExtractZipFile(ExtractZipArgument extractZipArgument)
        {
            var primaerdatenAuftragId = extractZipArgument.PrimaerdatenAuftragId;

            var packageFileName = Path.Combine(Settings.Default.PickupPath, extractZipArgument.PackageFileName);
            var fi = new FileInfo(packageFileName);

            if (File.Exists(fi.FullName))
            {
                Log.Information("Found zip file {Name}. Starting to extract...", fi.Name);
                var tempFolder = Path.Combine(fi.DirectoryName ?? throw new InvalidOperationException(), fi.Name.Remove(fi.Name.Length - fi.Extension.Length));
                try
                {
                    ZipFile.ExtractToDirectory(packageFileName, tempFolder);

                    // Primaerdatenauftrag could be 0 if we have a Benutzungskopie
                    if (primaerdatenAuftragId > 0)
                    {
                        var status = new UpdatePrimaerdatenAuftragStatus
                        {
                            PrimaerdatenAuftragId = primaerdatenAuftragId,
                            Service = AufbereitungsServices.AssetService,
                            Status  = AufbereitungsStatusEnum.ZipEntpackt
                        };
                        await UpdatePrimaerdatenAuftragStatus(status);
                    }

                    return(true);
                }
                catch (Exception ex)
                {
                    Log.Error(ex, "Unexpected error while unzipping package {packageFileName}. Error Message is: {Message}", packageFileName, ex.Message);
                    return(false);
                }
            }

            Log.Warning("Unable to find the zip file for {packageFileName}. Nothing was unzipped.", packageFileName);
            return(false);
        }