/// <summary> /// Extracts the fulltext and adds the resulting text to the ArchiveRecord. /// </summary> /// <param name="mutationId">The mutation identifier.</param> /// <param name="archiveRecord">The archive record.</param> /// <returns><c>true</c> if successful, <c>false</c> otherwise.</returns> public async Task <bool> ExtractFulltext(long mutationId, ArchiveRecord archiveRecord, int primaerdatenAuftragStatusId) { var packages = archiveRecord.PrimaryData; var processingTimeForMissingFiles = 0L; foreach (var repositoryPackage in packages.Where(p => !string.IsNullOrEmpty(p.PackageFileName))) { var packageFileName = Path.Combine(Settings.Default.PickupPath, repositoryPackage.PackageFileName); var fi = new FileInfo(packageFileName); var watch = Stopwatch.StartNew(); if (File.Exists(fi.FullName)) { Log.Information("Found zip file {Name}. Starting to extract...", fi.Name); var tempFolder = Path.Combine(fi.DirectoryName, fi.Name.Remove(fi.Name.Length - fi.Extension.Length)); try { ZipFile.ExtractToDirectory(packageFileName, tempFolder); var sizeInBytesOnDisk = Directory.GetFiles(tempFolder, "*.*", SearchOption.AllDirectories).Select(f => new FileInfo(f).Length) .Sum(); var status = new UpdatePrimaerdatenAuftragStatus { PrimaerdatenAuftragId = primaerdatenAuftragStatusId, Service = AufbereitungsServices.AssetService, Status = AufbereitungsStatusEnum.ZipEntpackt }; await UpdatePrimaerdatenAuftragStatus(status); await ProcessFiles(repositoryPackage.Files, Path.Combine(tempFolder, "content"), archiveRecord.ArchiveRecordId); await ProcessFolders(repositoryPackage.Folders, Path.Combine(tempFolder, "content"), archiveRecord.ArchiveRecordId); // if we are here everything is okay Log.Information("Successfully processed (fulltext extracted) zip file {Name}", fi.Name); processingTimeForMissingFiles += GetProcessingTimeOfIgnoredFilesInTicks(repositoryPackage.SizeInBytes - sizeInBytesOnDisk); } catch (Exception ex) { Log.Error(ex, "Unexpected error while extracting full text. Error Message is: {Message}", ex.Message); return(false); } finally { // Delete the temp files Directory.Delete(tempFolder, true); File.Delete(packageFileName); } } else { Log.Warning("Unable to find the zip file {packageFileName}. No text was extracted.", packageFileName); return(false); } repositoryPackage.FulltextExtractionDuration = watch.ElapsedTicks + processingTimeForMissingFiles; } return(true); }
public async Task <bool> ExtractZipFile(ExtractZipArgument extractZipArgument) { var primaerdatenAuftragId = extractZipArgument.PrimaerdatenAuftragId; var packageFileName = Path.Combine(Settings.Default.PickupPath, extractZipArgument.PackageFileName); var fi = new FileInfo(packageFileName); if (File.Exists(fi.FullName)) { Log.Information("Found zip file {Name}. Starting to extract...", fi.Name); var tempFolder = Path.Combine(fi.DirectoryName ?? throw new InvalidOperationException(), fi.Name.Remove(fi.Name.Length - fi.Extension.Length)); try { ZipFile.ExtractToDirectory(packageFileName, tempFolder); // Primaerdatenauftrag could be 0 if we have a Benutzungskopie if (primaerdatenAuftragId > 0) { var status = new UpdatePrimaerdatenAuftragStatus { PrimaerdatenAuftragId = primaerdatenAuftragId, Service = AufbereitungsServices.AssetService, Status = AufbereitungsStatusEnum.ZipEntpackt }; await UpdatePrimaerdatenAuftragStatus(status); } return(true); } catch (Exception ex) { Log.Error(ex, "Unexpected error while unzipping package {packageFileName}. Error Message is: {Message}", packageFileName, ex.Message); return(false); } } Log.Warning("Unable to find the zip file for {packageFileName}. Nothing was unzipped.", packageFileName); return(false); }