public async Task GenerateManifest(List <FileModel> filesToPublish, PathIdentifier manifestDestination, string packageName, string packageDate) { var manifestName = $"Compliance Report-{packageName.Replace("/", "-").Replace(":", "")}.pdf"; using (var ms = new MemoryStream()) { var manifestEntries = new List <ManifestEntry>(); foreach (var file in filesToPublish) { manifestEntries.Add(new ManifestEntry() { Name = file.Name, Size = file.LengthForHumans, Path = file.MetaEDiscoveryPathIdentifierRead()?.PathKey, }); } await ManifestGenerator.Generate(ms, packageName, manifestEntries); // Move the stream back to zero. ms.Seek(0, SeekOrigin.Begin); // Now we send our manifest up to the api. var newFile = new FileModel { Identifier = new FileIdentifier(manifestDestination as FolderIdentifier, null), Name = manifestName, Length = ms.Length, Created = DateTime.UtcNow, Modified = DateTime.UtcNow, MimeType = "application/pdf" }; newFile.InitializeEmptyMetadata(); newFile.MetaPathIdentifierWrite(manifestDestination); UpdateShareState(newFile, EDiscoveryShareState.Published); TagSharePackage(newFile, packageName); newFile = await this.connection.File.PostAsync(newFile); newFile = await this.connection.File.UploadAsync(newFile, ms); } }
protected override async Task Process() { // get the callback response file var file = await API.File.GetAsync(CurrentMessage.FileIdentifier); var originalFileIdentifier = file.Read <FileIdentifier>(MetadataKeyConstants.CHILDOF); var originalFile = await API.File.GetAsync(originalFileIdentifier); var maxDepth = 10; var parent = originalFile.Read <FileIdentifier>(MetadataKeyConstants.CHILDOF); while (parent != null && maxDepth-- > 0) { originalFileIdentifier = parent; originalFile = await API.File.GetAsync(parent); parent = originalFile.Read <FileIdentifier>(MetadataKeyConstants.CHILDOF); } // download the callback response file and deserialize it var obj = await API.File.DownloadAsAsync <MediaResponse>(CurrentMessage.FileIdentifier); var message = $"Transcription of file {originalFile.Name} completed"; await API.Log.PostAsync(new AuditLogEntryModel { Identifier = new AuditLogEntryIdentifier(originalFileIdentifier), FileIdentifier = originalFileIdentifier, ActionType = "Transcription", Description = message, Details = JsonConvert.SerializeObject(new { obj.MediaID, obj.Status, obj.Length }), InitiatorUserIdentifier = originalFile.Read <UserIdentifier>("attribute.requestedBy") ?? API.UserIdentifier, Generated = DateTime.UtcNow, UserAgent = API.UserAgent }); // grab the VTT formatted transcript var vttContents = Convert.FromBase64String(obj.Transcript.AlternateFormats.First(a => a.Format == "webvtt").Data); // upload the vtt transcript var vttFile = new FileModel { Identifier = new FileIdentifier( originalFileIdentifier as FolderIdentifier, Guid.NewGuid().ToString() ), Name = Path.GetFileNameWithoutExtension(originalFile.Name) + ".vtt", MimeType = "text/vtt" }; vttFile .InitializeEmptyMetadata() .Write(MetadataKeyConstants.CHILDOF, originalFileIdentifier) .Write(MetadataKeyConstants.HIDDEN, true); vttFile = await API.File.UploadAsync(vttFile, Encoding.UTF8.GetString(vttContents)); await API.ConcurrencyRetryBlock(async() => { // tag the original originalFile = await API.File.GetAsync(originalFileIdentifier); var views = originalFile.Read(MetadataKeyConstants.ALTERNATIVE_VIEWS, defaultValue: new List <AlternativeView>()); views.Add(new AlternativeView { FileIdentifier = vttFile.Identifier, MimeType = "text/vtt", Name = "Voicebase WebVTT", }); originalFile.Write(MetadataKeyConstants.ALTERNATIVE_VIEWS, views); originalFile.Write("attributes.voicebase.status", "complete"); await API.File.PutAsync(originalFile); }); using (var voicebase = new VoiceBaseClient(new Uri(Configuration.VoicebaseURL), Configuration.VoicebaseToken)) await voicebase.DeleteMediaAsync(obj.MediaID); }
private async Task <string> ExtractAsync( Connection api, FileModel fileModel, string executable, string input, string arguments, string tag, string extension, string contentType ) { var output = $"{input}.{extension}"; var stdout = ExtractExecute(executable, input, output, arguments); if (!string.IsNullOrWhiteSpace(stdout)) { using (var ms = new MemoryStream()) using (var sw = new StreamWriter(ms)) { sw.Write(stdout); await sw.FlushAsync(); if (ms.Length > 0) { ms.Seek(0, SeekOrigin.Begin); var textModel = new FileModel { Identifier = new FileIdentifier(fileModel.Identifier as FolderIdentifier, Guid.NewGuid().ToString()), Created = DateTime.UtcNow, Modified = DateTime.UtcNow, Length = ms.Length, MimeType = "text/plain", Name = "extracted text", FilePrivileges = fileModel.FilePrivileges }; // if the file we were extracting from was a child itself, attach our // results to its parent var childOfFileIdentifier = fileModel.Read <FileIdentifier>("_childof") ?? fileModel.Identifier; textModel.InitializeEmptyMetadata(); textModel.Write(MetadataKeyConstants.CHILDOF, childOfFileIdentifier); textModel.Write(MetadataKeyConstants.HIDDEN, true); textModel = await api.File.UploadAsync(textModel, ms); await TagAlternativeView(childOfFileIdentifier, textModel.Identifier, new Documents.API.Common.Models.MetadataModels.AlternativeView { FileIdentifier = textModel.Identifier, MimeType = textModel.MimeType, Name = "text" }); /*await api.Queue.EnqueueAsync("Index", new IndexMessage * { * Identifier = childOfFileIdentifier, * Action = IndexMessage.IndexActions.IndexFile * });*/ } } Console.WriteLine("Done"); } else { if (Configuration.OCRPDFsIfNoText && fileModel.Extension == "pdf") { // ensure this is a user uploaded file, not already a searchable output or other artifact. var childOf = fileModel.Read <FileIdentifier>(MetadataKeyConstants.CHILDOF); if (childOf == null) { // there was no text found by the extractor.. if it's a PDF, let's OCR it. await api.Queue.EnqueueAsync("PDFOCR", new FileBasedMessage(fileModel.Identifier)); } } } return(stdout); }
private async Task ExifToolAsync ( Connection api, FileModel fileModel, string executable, string input, string arguments ) { var stdout = ExifToolExecute(executable, input, arguments); stdout = Regex.Replace(stdout, @"\r\n|\n\r|\n|\r", "\r\n"); if (!string.IsNullOrWhiteSpace(stdout)) { using (var ms = new MemoryStream()) using (var sw = new StreamWriter(ms)) { sw.Write(stdout); await sw.FlushAsync(); if (ms.Length > 0) { ms.Seek(0, SeekOrigin.Begin); var textModel = new FileModel { Identifier = new FileIdentifier(fileModel.Identifier as FolderIdentifier, Guid.NewGuid().ToString()), Created = DateTime.UtcNow, Modified = DateTime.UtcNow, Length = ms.Length, MimeType = "text/plain", Name = $"EXIF-{fileModel.NameWithoutExtension()}.txt", FilePrivileges = fileModel.FilePrivileges }; // if the file we were extracting from was a child itself, attach our // results to its parent var childOfFileIdentifier = fileModel.Read <FileIdentifier>("_childof") ?? fileModel.Identifier; textModel.InitializeEmptyMetadata(); textModel.Write(MetadataKeyConstants.CHILDOF, childOfFileIdentifier); textModel.Write(MetadataKeyConstants.HIDDEN, true); var reportFields = ParseReport(stdout); await api.ConcurrencyRetryBlock(async() => { var original = await api.File.GetAsync(fileModel.Identifier); ExtractAttributes(original, reportFields); await api.File.PutAsync(original); }); textModel = await api.File.UploadAsync(textModel, ms); await TagAlternativeView(childOfFileIdentifier, textModel.Identifier, new Documents.API.Common.Models.MetadataModels.AlternativeView { FileIdentifier = textModel.Identifier, MimeType = textModel.MimeType, Name = "EXIF" }); } } } }