Ejemplo n.º 1
0
        public override async Task ProcessMessageAsync(ProgressMessage message)
        {
            if (message.Type == MessageType.INDEXING_STARTED)
            {
                var indexingMessage = JsonConvert.DeserializeObject <IndexingStartedMessage>(message.Message);
                var jobName         = indexingMessage.Job.TranscriptionJob.TranscriptionJobName;
                var videoEtag       = jobName.Substring("transcribe-".Length);

                LogInfo($"Received job: {jobName}");

                var response = await _transcribe.GetTranscriptionJobAsync(new GetTranscriptionJobRequest {
                    TranscriptionJobName = jobName
                }, CancellationToken.None);

                // check the job status
                switch (response.TranscriptionJob.TranscriptionJobStatus.Value)
                {
                case "COMPLETED":

                    var transciptUri = response.TranscriptionJob.Transcript.TranscriptFileUri;
                    LogInfo($"Job {jobName} has completed successfully. The results are here: {transciptUri}");

                    // update the transcription uri in the DB
                    await _table.UpdateRowAsync(new IndexingStatus {
                        VideoEtag          = videoEtag,
                        TranscriptionS3Key = transciptUri
                    });

                    break;

                case "FAILED":
                    LogWarn($"Job {jobName} has failed");
                    break;

                case "IN_PROGRESS":

                    // if the job has not yet finished, let's fail the message so that it stays in the queue
                    throw new LambdaRetriableException($"Job {jobName} is still in progress");

                default:
                    throw new Exception("Unknown TranscriptionJobStatus");
                }
            }
            else
            {
                LogInfo($"Skipping message type: {message.Type}");
            }
        }
Ejemplo n.º 2
0
        private async void BeginTranscription_Click(object sender, RoutedEventArgs e)
        {
            this._ctlBeginTranscription.IsEnabled = false;
            try
            {
                this._ctlStatusLog.Text     = "";
                this._ctlTranscription.Text = "";

                var filepath = this._ctlFilepath.Text;
                if (string.IsNullOrEmpty(filepath))
                {
                    MessageBox.Show($"An mp4 file must be selected first before transcribing", "Error", MessageBoxButton.OK, MessageBoxImage.Error);
                    return;
                }
                if (!System.IO.File.Exists(filepath))
                {
                    MessageBox.Show($"File {filepath} does not exist", "Error", MessageBoxButton.OK, MessageBoxImage.Error);
                    return;
                }

                var s3Key    = System.IO.Path.GetFileName(filepath);
                var s3Bucket = _ctlS3Bucket.Text;


                var            chain = new CredentialProfileStoreChain();
                AWSCredentials credentials;
                if (!chain.TryGetAWSCredentials(this._ctlProfile.Text, out credentials))
                {
                    MessageBox.Show($"Profile {this._ctlProfile.Text} was not found", "Error", MessageBoxButton.OK, MessageBoxImage.Error);
                    return;
                }

                var region = RegionEndpoint.GetBySystemName(this._ctlRegion.Text);

                var transcriptionJobName = $"{s3Key}-{Guid.NewGuid().ToString()}";

                using (var s3Client = new AmazonS3Client(credentials, region))
                    using (var transcribeClient = new AmazonTranscribeServiceClient(credentials, region))
                        using (var httpClient = new HttpClient()) // Http Client to download the transcription once complete
                        {
                            AppendStatusLine("Ensuring S3 bucket exists");
                            await s3Client.PutBucketAsync(s3Bucket);


                            var transferUtility = new TransferUtility(s3Client);

                            AppendStatusLine("Starting upload");

                            var uploadRequest = new TransferUtilityUploadRequest
                            {
                                FilePath   = filepath,
                                BucketName = s3Bucket,
                                Key        = s3Key
                            };

                            uploadRequest.UploadProgressEvent += ProgressUploadStatus;

                            await transferUtility.UploadAsync(uploadRequest);

                            var mediaFileUri = $"https://s3.{region.SystemName}.amazonaws.com/{s3Bucket}/{s3Key}";
                            AppendStatusLine($"Upload Complete to: {mediaFileUri}");

                            await transcribeClient.StartTranscriptionJobAsync(new StartTranscriptionJobRequest
                            {
                                LanguageCode = LanguageCode.EnUS,
                                Media        = new Media
                                {
                                    MediaFileUri = mediaFileUri
                                },
                                MediaFormat          = MediaFormat.Mp4,
                                TranscriptionJobName = transcriptionJobName
                            });

                            AppendStatusLine($"Started transcription job: {transcriptionJobName}");

                            GetTranscriptionJobRequest request = new GetTranscriptionJobRequest {
                                TranscriptionJobName = transcriptionJobName
                            };
                            GetTranscriptionJobResponse response = null;
                            do
                            {
                                AppendStatusLine($"... {DateTime.Now} Waiting for transcription job to complete");
                                await Task.Delay(TimeSpan.FromSeconds(2));

                                response = await transcribeClient.GetTranscriptionJobAsync(request);
                            } while (response.TranscriptionJob.TranscriptionJobStatus == TranscriptionJobStatus.IN_PROGRESS);

                            if (response.TranscriptionJob.TranscriptionJobStatus == TranscriptionJobStatus.FAILED)
                            {
                                AppendStatusLine($"Transcription job failed: {response.TranscriptionJob.FailureReason}");
                                return;
                            }

                            AppendStatusLine("Job Done");

                            var transcriptionDocument = await httpClient.GetStringAsync(response.TranscriptionJob.Transcript.TranscriptFileUri);

                            var root = JsonConvert.DeserializeObject(transcriptionDocument) as JObject;

                            var sb = new StringBuilder();
                            foreach (JObject transcriptionNode in root["results"]["transcripts"])
                            {
                                if (sb.Length != 0)
                                {
                                    sb.AppendLine("\n\n");
                                }

                                sb.Append(transcriptionNode["transcript"]);
                            }

                            this._ctlTranscription.Text = sb.ToString();
                        }
            }
            catch (Exception ex)
            {
                AppendStatusLine($"Unknown error: {ex.Message}");
            }
            finally
            {
                this._ctlBeginTranscription.IsEnabled = true;
            }
        }
Ejemplo n.º 3
0
        public override async Task ProcessMessageAsync(Message message, ILambdaContext context)
        {
            if (message.Iterations < 0)
            {
                return;
            }
            var suffix = DateTime.UtcNow.ToString("yyyyMMddHHmmss");

            // Initiate describe voices request.
            var describeVoiceResponse = await _pollyClient.DescribeVoicesAsync(new DescribeVoicesRequest {
                // LanguageCode = "en-US"
            });

            // LogInfo(JsonConvert.SerializeObject(describeVoiceResponse.Voices));
            var randomIndex = _rand.Next(describeVoiceResponse.Voices.Count);
            var randomVoice = describeVoiceResponse.Voices[randomIndex];

            LogInfo($"Selected random voice '{randomVoice.Name}' in {randomVoice.LanguageName}");

            // Initiate speech synthesis request.
            var synthesizeResponse = await _pollyClient.SynthesizeSpeechAsync(new SynthesizeSpeechRequest {
                VoiceId      = randomVoice.Id,
                OutputFormat = OutputFormat.Mp3,
                Text         = message.Text
            });

            var audioStream = new MemoryStream();
            await synthesizeResponse.AudioStream.CopyToAsync(audioStream);

            audioStream.Position = 0;

            // Ensure audio file is in an s3 bucket for Transcribe to consume.
            await _s3Client.PutObjectAsync(new PutObjectRequest {
                BucketName  = _audioBucket,
                Key         = $"polly_{suffix}.mp3",
                InputStream = audioStream
            });

            // Initiate Transcription Job
            var transcriptionName     = $"transcribe_{suffix}";
            var mediaUri              = $"https://s3-us-east-1.amazonaws.com/{_audioBucket}/polly_{suffix}.mp3";
            var transcriptionResponse = await _transcribeClient.StartTranscriptionJobAsync(new StartTranscriptionJobRequest {
                LanguageCode = "en-US",
                MediaFormat  = MediaFormat.Mp3,
                Media        = new Media {
                    MediaFileUri = mediaUri
                },
                OutputBucketName     = _textBucket,
                TranscriptionJobName = transcriptionName
            });

            // Wait for transcription job to complete
            while (true)
            {
                await Task.Delay(TimeSpan.FromSeconds(3));

                var transcriptionJobStatusReponse = await _transcribeClient.GetTranscriptionJobAsync(new GetTranscriptionJobRequest {
                    TranscriptionJobName = transcriptionName
                });

                if (transcriptionJobStatusReponse.TranscriptionJob.TranscriptionJobStatus == TranscriptionJobStatus.FAILED)
                {
                    LogWarn("Transcribe job failed");
                    return;
                }
                if (transcriptionJobStatusReponse.TranscriptionJob.TranscriptionJobStatus == TranscriptionJobStatus.COMPLETED)
                {
                    LogInfo("Job Completed!!");
                    break;
                }
                LogInfo("Checking job status... again.");
            }

            // Retrieve text file from s3 and extract the text
            var s3Response = await _s3Client.GetObjectAsync(new GetObjectRequest {
                BucketName = _textBucket,
                Key        = $"{transcriptionName}.json"
            });

            var text = await new StreamReader(s3Response.ResponseStream).ReadToEndAsync();

            LogInfo(text);
            var json          = JObject.Parse(text);
            var transcription = json["results"]["transcripts"][0]["transcript"].ToString();

            LogInfo(transcription);

            if (message.Iterations >= 0 && transcription.Length > 0)
            {
                await _snsClient.PublishAsync(new PublishRequest {
                    TopicArn = _topic,
                    Message  = JsonConvert.SerializeObject(new Message {
                        Iterations = message.Iterations - 1,
                        Text       = transcription,
                        Link       = mediaUri
                    })
                });
            }

            // :)
        }