/// <summary> /// Convert text to speech to an audio file then save it to a S3 bucket /// </summary> /// <param name="request"></param> /// <returns></returns> public async Task <ConvertTextResponse> AddTextToSpeechFileToBucket(ConvertTextRequest request) { if (string.IsNullOrEmpty(request.Content)) { throw new ArgumentNullException("Content cannot be empty"); } _logger.Info($"Content requested to convert: {request.Content}"); var pollyRequest = new SynthesizeSpeechRequest { OutputFormat = OutputFormat.Mp3, Text = request.Content, TextType = "text", VoiceId = VoiceId.Amy, // https://docs.aws.amazon.com/sdkfornet/v3/apidocs/items/Polly/TVoiceId.html LanguageCode = LanguageCode.EnUS // https://docs.aws.amazon.com/sdkfornet/v3/apidocs/items/Polly/TLanguageCode.html }; // Send request to Amazon Polly to convert text to audio. var synthesizeSpeechResponse = await _pollyClient.SynthesizeSpeechAsync(pollyRequest); if (synthesizeSpeechResponse == null || synthesizeSpeechResponse.HttpStatusCode != (HttpStatusCode)200) { throw new Exception("Text could not be converted to audio."); } _logger.Info($"Requested characters was: {synthesizeSpeechResponse.RequestCharacters}"); // Stream the audio from Amazon Polly to a S3 bucket using the filename provided by the request PutObjectResponse s3Response; using (var memoryStream = new MemoryStream()) { synthesizeSpeechResponse.AudioStream.CopyTo(memoryStream); s3Response = await _s3Client.PutObjectAsync(new PutObjectRequest { BucketName = _s3BucketName, Key = request.FileName, InputStream = memoryStream }); } if (s3Response == null || s3Response.HttpStatusCode != (HttpStatusCode)200) { throw new Exception("Unable to save audio file to s3"); } // Publish to a SNS topic that a new audio file has been saved. var publishRequest = new PublishRequest { TopicArn = _notificationSnsTopic, Message = "An audio file was saved", Subject = "New MP3 File" }; await _snsClient.PublishAsync(publishRequest); return(new ConvertTextResponse { FileName = request.FileName }); }
/// <summary> /// Calls the Amazon Polly SynthesizeSpeechAsync method to convert text /// to speech. /// </summary> /// <param name="client">The Amazon Polly client object used to connect /// to the Amazon Polly service.</param> /// <param name="text">The text to convert to speech.</param> /// <returns>A SynthesizeSpeechResponse object that includes an AudioStream /// object with the converted text.</returns> private static async Task <SynthesizeSpeechResponse> PollySynthesizeSpeech(IAmazonPolly client, string text) { var synthesizeSpeechRequest = new SynthesizeSpeechRequest() { OutputFormat = OutputFormat.Mp3, VoiceId = VoiceId.Joanna, Text = text, }; var synthesizeSpeechResponse = await client.SynthesizeSpeechAsync(synthesizeSpeechRequest); return(synthesizeSpeechResponse); }
public async static Task <string> PollyDemo(IAmazonPolly pollyClient, IAmazonS3 S3Client, string text) { string result = null; SynthesizeSpeechRequest synthesizeRequest = new SynthesizeSpeechRequest() { LanguageCode = LanguageCode.EnUS, OutputFormat = "mp3", SampleRate = "8000", Text = text, TextType = "text", VoiceId = "Joanna" }; try { Task <SynthesizeSpeechResponse> synthesizeTask = pollyClient.SynthesizeSpeechAsync(synthesizeRequest); SynthesizeSpeechResponse syntheizeResponse = await synthesizeTask; Console.WriteLine(syntheizeResponse.ContentType); Console.WriteLine(syntheizeResponse.RequestCharacters); using (MemoryStream ms = new MemoryStream()) { syntheizeResponse.AudioStream.CopyTo(ms); Console.WriteLine(ms.Length); // Upload image to S3 bucket string bucketName = "reinvent-indiamazones"; //string key = dto.text; string key = "pollytest"; await Task.Run(() => S3Util.UploadToS3(S3Client, bucketName, key, ms)); // TODO : need to check the file exists in S3 result = S3Util.GetPresignedURL(S3Client, bucketName, key); } //syntheizeResponse.AudioStream.CopyTo(result); //result.Flush(); } catch (AmazonPollyException pollyException) { Console.WriteLine(pollyException.Message, pollyException.InnerException); } return(result); }
private Amazon.Polly.Model.SynthesizeSpeechResponse CallAWSServiceOperation(IAmazonPolly client, Amazon.Polly.Model.SynthesizeSpeechRequest request) { Utils.Common.WriteVerboseEndpointMessage(this, client.Config, "Amazon Polly", "SynthesizeSpeech"); try { #if DESKTOP return(client.SynthesizeSpeech(request)); #elif CORECLR return(client.SynthesizeSpeechAsync(request).GetAwaiter().GetResult()); #else #error "Unknown build edition" #endif } catch (AmazonServiceException exc) { var webException = exc.InnerException as System.Net.WebException; if (webException != null) { throw new Exception(Utils.Common.FormatNameResolutionFailureMessage(client.Config, webException.Message), webException); } throw; } }
/// <summary> /// /// </summary> /// <param name="input"></param> /// <param name="context"></param> /// <returns></returns> public async Task FunctionHandler(SNSEvent @event, ILambdaContext context) { var logger = context.Logger; var postId = @event.Records[0].Sns.Message; logger.LogLine($"Text to Speech function. Post ID in DynamoDB: {postId}"); // Retrieving information about the post from DynamoDB table var tableName = Environment.GetEnvironmentVariable("DB_TABLE_NAME"); var queryResult = await _dbClient.QueryAsync(new QueryRequest { TableName = tableName, KeyConditions = new Dictionary <string, Condition> { { "id", new Condition() { ComparisonOperator = ComparisonOperator.EQ, AttributeValueList = new List <AttributeValue> { new AttributeValue(postId) } } } } }); var post = queryResult.Items[0]; var text = post["text"].S; var voice = post["voice"].S; // Chunk and Synthesize text into speech. var chunkSize = 1000; var textBlocks = text.Chunk(chunkSize).ToArray(); var path = System.IO.Path.Combine("/tmp/", postId); for (var i = 0; i < textBlocks.Length; i++) { var textBlock = textBlocks[i]; var pollyResponse = await _pollyClient.SynthesizeSpeechAsync(new SynthesizeSpeechRequest { OutputFormat = "mp3", Text = textBlock, VoiceId = voice }); if (pollyResponse?.AudioStream != null) { using (var fileStream = System.IO.File.Open(path, System.IO.FileMode.Append)) { await pollyResponse.AudioStream.CopyToAsync(fileStream); } logger.LogLine($"Finished synthesizing text chunck {i + 1}"); } } //Put synthesized audio into s3 bucket var bucketName = Environment.GetEnvironmentVariable("BUCKET_NAME"); await _s3Client.UploadObjectFromFilePathAsync(bucketName, $"{postId}.mp3", path, null); await _s3Client.PutACLAsync(new PutACLRequest { BucketName = bucketName, Key = $"{postId}.mp3", CannedACL = S3CannedACL.PublicRead }); var bucketLocation = await _s3Client.GetBucketLocationAsync(new GetBucketLocationRequest { BucketName = bucketName }); var region = bucketLocation.Location; // Update DynamoDB with the audio file url and the "UPDATED" status var urlBeginning = region == null || string.IsNullOrWhiteSpace(region.Value) ? "https://s3.amazonaws.com/" : $"https://s3-{region.Value}.amazonaws.com/"; var url = string.Concat(urlBeginning, bucketName, "/", postId, ".mp3"); await _dbClient.UpdateItemAsync(new UpdateItemRequest { TableName = tableName, Key = new Dictionary <string, AttributeValue> { { "id", new AttributeValue(postId) } }, UpdateExpression = "SET #statusAtt = :statusValue, #urlAtt = :urlValue", ExpressionAttributeValues = new Dictionary <string, AttributeValue> { { ":statusValue", new AttributeValue("UPDATED") }, { ":urlValue", new AttributeValue(url) } }, ExpressionAttributeNames = new Dictionary <string, string> { { "#statusAtt", "status" }, { "#urlAtt", "url" } } }); logger.LogLine($"Finished updating post ,ID: {postId}, in table {tableName}"); }
public AudioInfo Synthesize(string text) { Dictionary <uint, byte[]> audioBytes = new Dictionary <uint, byte[]>(); var ulawFormat = WaveFormat.CreateMuLawFormat(8000, 1); string tempFile = Path.GetTempFileName(); try { SynthesizeSpeechRequest sreq = new SynthesizeSpeechRequest(); sreq.Text = text; sreq.OutputFormat = OutputFormat.Mp3; sreq.VoiceId = VoiceId.Salli; SynthesizeSpeechResponse sres = _amazonPolly.SynthesizeSpeechAsync(sreq).GetAwaiter().GetResult(); using (var pollyMemoryStream = new MemoryStream()) { sres.AudioStream.CopyTo(pollyMemoryStream); pollyMemoryStream.Flush(); pollyMemoryStream.Position = 0; using (Mp3FileReader reader = new Mp3FileReader(pollyMemoryStream, wave => new DmoMp3FrameDecompressor(wave))) { using (WaveStream pcmStream = WaveFormatConversionStream.CreatePcmStream(reader)) { WaveFileWriter.CreateWaveFile(tempFile, pcmStream); } } } var pcmFormat = new WaveFormat(8000, 16, 1); List <byte[]> allBytes = new List <byte[]>(); using (WaveFormatConversionStream pcmStm = new WaveFormatConversionStream(pcmFormat, new WaveFileReader(tempFile))) { using (WaveFormatConversionStream ulawStm = new WaveFormatConversionStream(ulawFormat, pcmStm)) { byte[] buffer = new byte[160]; int bytesRead = ulawStm.Read(buffer, 0, 160); while (bytesRead > 0) { byte[] sample = new byte[bytesRead]; Array.Copy(buffer, sample, bytesRead); allBytes.Add(sample); bytesRead = ulawStm.Read(buffer, 0, 160); } int secondsToAdd = 1; var silentBytes = new byte[ulawStm.WaveFormat.AverageBytesPerSecond * secondsToAdd]; allBytes.Add(silentBytes); } } AudioInfo ai = new AudioInfo() { AudioData = allBytes.SelectMany(p => p).ToArray(), Codec = Codec.G711U }; return(ai); //string fileName = @".\TestAudio\output-from-polly-mp3-then-wav.wav"; //using (WaveFileWriter writer = new WaveFileWriter(fileName, ulawFormat)) //{ // var testSequence = audioBytes.SelectMany(p => p.Value).ToArray(); // writer.Write(testSequence, 0, testSequence.Length); //} } finally { try { File.Delete(tempFile); } catch { } } }
/// <summary> /// A function handling the APIGatewayProxyRequest /// </summary> /// <param name="request"></param> /// <param name="context"></param> /// <returns></returns> public async Task <APIGatewayProxyResponse> FunctionHandler(APIGatewayProxyRequest request, ILambdaContext context) { var synthesizeSpeechResponse = new SynthesizeSpeechResponse(); try { synthesizeSpeechResponse = await _pollyClient.SynthesizeSpeechAsync(new SynthesizeSpeechRequest() { Engine = Engine.Neural, LanguageCode = LanguageCode.EnUS, OutputFormat = OutputFormat.Mp3, SampleRate = "24000", Text = "Hi, my name is Szilard. I am from Brasov and I work at Siemens.", TextType = TextType.Text, VoiceId = VoiceId.Joanna }); } catch (AmazonPollyException ex) { context.Logger.LogLine(ex.Message); return(new APIGatewayProxyResponse() { StatusCode = 500, Body = ex.Message, Headers = new Dictionary <string, string>() { { "Content-Type", "text/plain" }, { "Access-Control-Allow-Origin", ALLOWED_ORIGIN } } }); } // // put audio stream into S3 // using (MemoryStream stream = new MemoryStream()) { try { await synthesizeSpeechResponse.AudioStream.CopyToAsync(stream); } catch (Exception ex) { context.Logger.LogLine(ex.Message); return(new APIGatewayProxyResponse() { StatusCode = 500, Body = ex.Message, Headers = new Dictionary <string, string>() { { "Content-Type", "text/plain" }, { "Access-Control-Allow-Origin", ALLOWED_ORIGIN } } }); } try { await _s3Client.PutObjectAsync(new PutObjectRequest() { InputStream = stream, BucketName = TARGET_BUCKET, Key = Path.Combine("audios", DateTime.Now.ToString("dd-MM-yyyy_HH-mm-ss-ffff") + ".mp3") }); } catch (AmazonS3Exception ex) { context.Logger.LogLine(ex.Message); return(new APIGatewayProxyResponse() { StatusCode = 500, Body = ex.Message, Headers = new Dictionary <string, string>() { { "Content-Type", "text/plain" }, { "Access-Control-Allow-Origin", ALLOWED_ORIGIN } } }); } } // // return successfull response // return(new APIGatewayProxyResponse() { StatusCode = 200, Headers = new Dictionary <string, string>() { { "Access-Control-Allow-Origin", ALLOWED_ORIGIN } } }); }