Пример #1
0
        public async Task GenerateManifest(List <FileModel> filesToPublish, PathIdentifier manifestDestination, string packageName, string packageDate)
        {
            var manifestName = $"Compliance Report-{packageName.Replace("/", "-").Replace(":", "")}.pdf";

            using (var ms = new MemoryStream())
            {
                var manifestEntries = new List <ManifestEntry>();
                foreach (var file in filesToPublish)
                {
                    manifestEntries.Add(new ManifestEntry()
                    {
                        Name = file.Name,
                        Size = file.LengthForHumans,
                        Path = file.MetaEDiscoveryPathIdentifierRead()?.PathKey,
                    });
                }
                await ManifestGenerator.Generate(ms, packageName, manifestEntries);

                // Move the stream back to zero.
                ms.Seek(0, SeekOrigin.Begin);

                // Now we send our manifest up to the api.
                var newFile = new FileModel
                {
                    Identifier = new FileIdentifier(manifestDestination as FolderIdentifier, null),
                    Name       = manifestName,
                    Length     = ms.Length,
                    Created    = DateTime.UtcNow,
                    Modified   = DateTime.UtcNow,
                    MimeType   = "application/pdf"
                };
                newFile.InitializeEmptyMetadata();

                newFile.MetaPathIdentifierWrite(manifestDestination);

                UpdateShareState(newFile, EDiscoveryShareState.Published);
                TagSharePackage(newFile, packageName);

                newFile = await this.connection.File.PostAsync(newFile);

                newFile = await this.connection.File.UploadAsync(newFile, ms);
            }
        }
Пример #2
0
        protected override async Task Process()
        {
            // get the callback response file
            var file = await API.File.GetAsync(CurrentMessage.FileIdentifier);

            var originalFileIdentifier = file.Read <FileIdentifier>(MetadataKeyConstants.CHILDOF);
            var originalFile           = await API.File.GetAsync(originalFileIdentifier);

            var maxDepth = 10;
            var parent   = originalFile.Read <FileIdentifier>(MetadataKeyConstants.CHILDOF);

            while (parent != null && maxDepth-- > 0)
            {
                originalFileIdentifier = parent;
                originalFile           = await API.File.GetAsync(parent);

                parent = originalFile.Read <FileIdentifier>(MetadataKeyConstants.CHILDOF);
            }

            // download the callback response file and deserialize it
            var obj = await API.File.DownloadAsAsync <MediaResponse>(CurrentMessage.FileIdentifier);

            var message = $"Transcription of file {originalFile.Name} completed";

            await API.Log.PostAsync(new AuditLogEntryModel
            {
                Identifier     = new AuditLogEntryIdentifier(originalFileIdentifier),
                FileIdentifier = originalFileIdentifier,
                ActionType     = "Transcription",
                Description    = message,
                Details        = JsonConvert.SerializeObject(new
                {
                    obj.MediaID,
                    obj.Status,
                    obj.Length
                }),
                InitiatorUserIdentifier = originalFile.Read <UserIdentifier>("attribute.requestedBy") ?? API.UserIdentifier,
                Generated = DateTime.UtcNow,
                UserAgent = API.UserAgent
            });

            // grab the VTT formatted transcript
            var vttContents = Convert.FromBase64String(obj.Transcript.AlternateFormats.First(a => a.Format == "webvtt").Data);
            // upload the vtt transcript
            var vttFile = new FileModel
            {
                Identifier = new FileIdentifier(
                    originalFileIdentifier as FolderIdentifier,
                    Guid.NewGuid().ToString()
                    ),
                Name     = Path.GetFileNameWithoutExtension(originalFile.Name) + ".vtt",
                MimeType = "text/vtt"
            };

            vttFile
            .InitializeEmptyMetadata()
            .Write(MetadataKeyConstants.CHILDOF, originalFileIdentifier)
            .Write(MetadataKeyConstants.HIDDEN, true);

            vttFile = await API.File.UploadAsync(vttFile, Encoding.UTF8.GetString(vttContents));

            await API.ConcurrencyRetryBlock(async() =>
            {
                // tag the original
                originalFile = await API.File.GetAsync(originalFileIdentifier);

                var views = originalFile.Read(MetadataKeyConstants.ALTERNATIVE_VIEWS, defaultValue: new List <AlternativeView>());
                views.Add(new AlternativeView
                {
                    FileIdentifier = vttFile.Identifier,
                    MimeType       = "text/vtt",
                    Name           = "Voicebase WebVTT",
                });
                originalFile.Write(MetadataKeyConstants.ALTERNATIVE_VIEWS, views);
                originalFile.Write("attributes.voicebase.status", "complete");

                await API.File.PutAsync(originalFile);
            });

            using (var voicebase = new VoiceBaseClient(new Uri(Configuration.VoicebaseURL), Configuration.VoicebaseToken))
                await voicebase.DeleteMediaAsync(obj.MediaID);
        }
Пример #3
0
        private async Task <string> ExtractAsync(
            Connection api,
            FileModel fileModel,
            string executable,
            string input,
            string arguments,
            string tag,
            string extension,
            string contentType
            )
        {
            var output = $"{input}.{extension}";
            var stdout = ExtractExecute(executable, input, output, arguments);

            if (!string.IsNullOrWhiteSpace(stdout))
            {
                using (var ms = new MemoryStream())
                    using (var sw = new StreamWriter(ms))
                    {
                        sw.Write(stdout);
                        await sw.FlushAsync();

                        if (ms.Length > 0)
                        {
                            ms.Seek(0, SeekOrigin.Begin);

                            var textModel = new FileModel
                            {
                                Identifier     = new FileIdentifier(fileModel.Identifier as FolderIdentifier, Guid.NewGuid().ToString()),
                                Created        = DateTime.UtcNow,
                                Modified       = DateTime.UtcNow,
                                Length         = ms.Length,
                                MimeType       = "text/plain",
                                Name           = "extracted text",
                                FilePrivileges = fileModel.FilePrivileges
                            };

                            // if the file we were extracting from was a child itself, attach our
                            // results to its parent
                            var childOfFileIdentifier = fileModel.Read <FileIdentifier>("_childof") ?? fileModel.Identifier;

                            textModel.InitializeEmptyMetadata();

                            textModel.Write(MetadataKeyConstants.CHILDOF, childOfFileIdentifier);
                            textModel.Write(MetadataKeyConstants.HIDDEN, true);

                            textModel = await api.File.UploadAsync(textModel, ms);

                            await TagAlternativeView(childOfFileIdentifier, textModel.Identifier, new Documents.API.Common.Models.MetadataModels.AlternativeView
                            {
                                FileIdentifier = textModel.Identifier,
                                MimeType       = textModel.MimeType,
                                Name           = "text"
                            });

                            /*await api.Queue.EnqueueAsync("Index", new IndexMessage
                             * {
                             *  Identifier = childOfFileIdentifier,
                             *  Action = IndexMessage.IndexActions.IndexFile
                             * });*/
                        }
                    }

                Console.WriteLine("Done");
            }
            else
            {
                if (Configuration.OCRPDFsIfNoText && fileModel.Extension == "pdf")
                {
                    // ensure this is a user uploaded file, not already a searchable output or other artifact.
                    var childOf = fileModel.Read <FileIdentifier>(MetadataKeyConstants.CHILDOF);
                    if (childOf == null)
                    {
                        // there was no text found by the extractor.. if it's a PDF, let's OCR it.
                        await api.Queue.EnqueueAsync("PDFOCR", new FileBasedMessage(fileModel.Identifier));
                    }
                }
            }

            return(stdout);
        }
Пример #4
0
        private async Task ExifToolAsync
        (
            Connection api,
            FileModel fileModel,
            string executable,
            string input,
            string arguments
        )
        {
            var stdout = ExifToolExecute(executable, input, arguments);

            stdout = Regex.Replace(stdout, @"\r\n|\n\r|\n|\r", "\r\n");

            if (!string.IsNullOrWhiteSpace(stdout))
            {
                using (var ms = new MemoryStream())
                    using (var sw = new StreamWriter(ms))
                    {
                        sw.Write(stdout);
                        await sw.FlushAsync();

                        if (ms.Length > 0)
                        {
                            ms.Seek(0, SeekOrigin.Begin);

                            var textModel = new FileModel
                            {
                                Identifier     = new FileIdentifier(fileModel.Identifier as FolderIdentifier, Guid.NewGuid().ToString()),
                                Created        = DateTime.UtcNow,
                                Modified       = DateTime.UtcNow,
                                Length         = ms.Length,
                                MimeType       = "text/plain",
                                Name           = $"EXIF-{fileModel.NameWithoutExtension()}.txt",
                                FilePrivileges = fileModel.FilePrivileges
                            };

                            // if the file we were extracting from was a child itself, attach our
                            // results to its parent
                            var childOfFileIdentifier = fileModel.Read <FileIdentifier>("_childof") ?? fileModel.Identifier;

                            textModel.InitializeEmptyMetadata();
                            textModel.Write(MetadataKeyConstants.CHILDOF, childOfFileIdentifier);
                            textModel.Write(MetadataKeyConstants.HIDDEN, true);

                            var reportFields = ParseReport(stdout);
                            await api.ConcurrencyRetryBlock(async() =>
                            {
                                var original = await api.File.GetAsync(fileModel.Identifier);
                                ExtractAttributes(original, reportFields);
                                await api.File.PutAsync(original);
                            });

                            textModel = await api.File.UploadAsync(textModel, ms);

                            await TagAlternativeView(childOfFileIdentifier, textModel.Identifier, new Documents.API.Common.Models.MetadataModels.AlternativeView
                            {
                                FileIdentifier = textModel.Identifier,
                                MimeType       = textModel.MimeType,
                                Name           = "EXIF"
                            });
                        }
                    }
            }
        }