private async Task CountWords(int episodeId, SrStoredEpisode storedEpisode, SrStoredSummarizedEpisode summarizedEpisode) { _logger.LogInformation($"Counting words for episode {episodeId}..."); var transcription = summarizedEpisode.Transcription ?? string.Empty; var wordCount = GetWordsCount(transcription, _words); var wordCountEpisode = new SrStoredWordCountEpisode { EpisodeId = episodeId, EpisodeAudioUrl = summarizedEpisode.OriginalAudioUrl, EpisodeAudioLocale = summarizedEpisode.AudioLocale, EpisodeAudioDurationInSeconds = GetEpisodeDuration(storedEpisode.Episode), EpisodeTitle = summarizedEpisode.Title, EpisodeUrl = summarizedEpisode.Url, EpisodePublishDateUtc = summarizedEpisode.PublishDateUtc, ProgramId = summarizedEpisode.ProgramId, ProgramName = summarizedEpisode.ProgramName, EpisodeAudioTranscription = GetMaxLengthForTableStorage(transcription), WordCount = wordCount }; await _wordCountStorage.StoreWordCounterEpisode(episodeId, wordCountEpisode); _logger.LogInformation($"Counted words on episode {episodeId}..."); }
private async Task GenerateSpeak(int episodeId, SrStoredEpisode storedEpisode, SrStoredEnrichedEpisode enrichedEpisode) { _logger.LogInformation($"Generating speaker for episode {episodeId}..."); var episodeSpeech = new SrStoredEpisodeSpeech(); if (enrichedEpisode.Transcription_EN != null) { var result = await CreateAndUploadSpeech(episodeId, storedEpisode, enrichedEpisode.Transcription_EN.Text, "en-US", EnUsVoice); if (result != null) { episodeSpeech.SpeechBlobIdenitifier_EN = result.Value.Key; episodeSpeech.SpeechUrl_EN = result.Value.Value; } } if (enrichedEpisode.Transcription_SV != null) { var result = await CreateAndUploadSpeech(episodeId, storedEpisode, enrichedEpisode.Transcription_SV.Text, "sv-SE", SvSeVoice); if (result != null) { episodeSpeech.SpeechBlobIdenitifier_SV = result.Value.Key; episodeSpeech.SpeechUrl_SV = result.Value.Value; } } await _storage.StoreEpisodeSpeech(episodeId, episodeSpeech); _logger.LogInformation($"Generated speaker for episode {episodeId}..."); }
private async Task TranscribeAndPersist(SrStoredEpisode storedEpisode, SpeechBatchClient speechBatchClient) { _logger.LogInformation($"Transcribing episode {storedEpisode.Episode.Id}..."); var episodeTranscriptionId = await TranscribeEpisode(storedEpisode, speechBatchClient); var episodeTranscription = await WaitForTranscription(episodeTranscriptionId, storedEpisode, speechBatchClient); _logger.LogInformation($"Transcribed episode {storedEpisode.Episode.Id}..."); if (episodeTranscription == null) { return; } _logger.LogInformation($"Transfer transcribed episode {storedEpisode.Episode.Id}..."); var storedEpisodeTranscription = await TransferTranscribedEpisode(episodeTranscription, storedEpisode); await speechBatchClient.DeleteTranscriptionAsync(episodeTranscriptionId); var transcriptionResult = await GetTranscriptionResult(storedEpisodeTranscription, storedEpisodeTranscription.TranscriptionResultChannel0BlobIdentifier); await StoreTranscriptionResult(storedEpisode, transcriptionResult, storedEpisodeTranscription); _logger.LogInformation($"Transfered transcribed episode {storedEpisode.Episode.Id}..."); }
private async Task StoreTranscriptionResult(SrStoredEpisode storedEpisode, TranscriptionResult transcriptionResult, SrStoredEpisodeTranscription storedEpisodeTranscription) { var combinedResult = transcriptionResult.AudioFileResults.FirstOrDefault()?.CombinedResults.FirstOrDefault(); storedEpisodeTranscription.CombinedDisplayResult = combinedResult?.Display ?? string.Empty; await _storage.StoreTranscription(storedEpisode.Episode.Id, storedEpisodeTranscription); }
private async Task <KeyValuePair <string, string> > UploadSpeech(SrStoredEpisode storedEpisode, MemoryStream stream, string voice) { var speakerBlobIdentifier = GetBlobName(storedEpisode.Episode.Program.Id, storedEpisode.Episode, "mp3", $"Speaker_{voice}"); var speakerBlob = _speakerContainer.GetBlockBlobReference(speakerBlobIdentifier); stream.Position = 0; speakerBlob.Properties.ContentType = "audio/mpeg"; await speakerBlob.UploadFromStreamAsync(stream); return(new KeyValuePair <string, string>(speakerBlobIdentifier, speakerBlob.Uri.ToString())); }
private async Task Enrich(int episodeId, SrStoredEpisode storedEpisode, SrStoredEpisodeTranscription storedEpisodeTranscription) { _logger.LogInformation($"Enriching episode {episodeId}..."); var storedEnrichedEpisode = new SrStoredEnrichedEpisode { OriginalLocale = storedEpisode.AudioLocale }; var title = storedEpisode.Episode.Title; var description = storedEpisode.Episode.Description; var transcription = storedEpisodeTranscription.CombinedDisplayResult; var locale = storedEpisode.AudioLocale; _logger.LogInformation($"Analyzing episode {episodeId} texts for sv-SE..."); var swedishTexts = await AnalyzeTexts(episodeId, title, description, transcription, locale, "sv-SE"); storedEnrichedEpisode.Title_SV = swedishTexts.Title; storedEnrichedEpisode.Description_SV = swedishTexts.Description; storedEnrichedEpisode.Transcription_SV = swedishTexts.Transcription; _logger.LogInformation($"Analyzing episode {episodeId} texts for en-US..."); var englishTexts = await AnalyzeTexts(episodeId, title, description, transcription, locale, "en-US"); storedEnrichedEpisode.Title_EN = englishTexts.Title; storedEnrichedEpisode.Description_EN = englishTexts.Description; storedEnrichedEpisode.Transcription_EN = englishTexts.Transcription; if (locale == "sv-SE") { storedEnrichedEpisode.Title_Original = storedEnrichedEpisode.Title_SV; storedEnrichedEpisode.Description_Original = storedEnrichedEpisode.Description_SV; storedEnrichedEpisode.Transcription_Original = storedEnrichedEpisode.Transcription_SV; } else if (locale == "en-US") { storedEnrichedEpisode.Title_Original = storedEnrichedEpisode.Title_EN; storedEnrichedEpisode.Description_Original = storedEnrichedEpisode.Description_EN; storedEnrichedEpisode.Transcription_Original = storedEnrichedEpisode.Transcription_EN; } else { var customTexts = await AnalyzeTexts(episodeId, title, description, transcription, locale, locale); storedEnrichedEpisode.Title_Original = customTexts.Title; storedEnrichedEpisode.Description_Original = customTexts.Description; storedEnrichedEpisode.Transcription_Original = customTexts.Transcription; } await _storage.StoreEnrichedEpisode(episodeId, storedEnrichedEpisode); _logger.LogInformation($"Enriched episode {episodeId}..."); }
private async Task <Guid> TranscribeEpisode(SrStoredEpisode storedEpisode, SpeechBatchClient speechBatchClient) { var audioUrl = RemoveQueryString(storedEpisode.AudioUrl); var transcriptionDefinition = TranscriptionDefinition.Create( $"RadioText - Episode {storedEpisode.Episode.Id}", "RadioText", storedEpisode.AudioLocale, new Uri(audioUrl) ); var transcriptionLocation = await speechBatchClient.PostTranscriptionAsync(transcriptionDefinition); return(GetTranscriptionGuid(transcriptionLocation)); }
private async Task Summarize(int episodeId, SrStoredEpisode storedEpisode, SrStoredEpisodeTranscription storedEpisodeTranscription, SrStoredEnrichedEpisode storedEnrichedEpisode, SrStoredEpisodeSpeech episodeSpeech) { _logger.LogInformation($"Summarizing episode {episodeId}..."); var summarizedEpisode = new SrStoredSummarizedEpisode { EpisodeId = episodeId, OriginalAudioUrl = storedEpisode.OriginalAudioUrl, AudioUrl = storedEpisode.AudioUrl, AudioLocale = storedEpisode.AudioLocale, Title = storedEpisode.Episode.Title, Description = storedEpisode.Episode.Description, Url = storedEpisode.Episode.Url, PublishDateUtc = storedEpisode.Episode.PublishDateUtc, ImageUrl = storedEpisode.Episode.ImageUrl, ProgramId = storedEpisode.Episode.Program.Id, ProgramName = storedEpisode.Episode.Program.Name, Transcription = storedEpisodeTranscription.CombinedDisplayResult, Title_Original = storedEnrichedEpisode.Title_Original, Description_Original = storedEnrichedEpisode.Description_Original, Transcription_Original = storedEnrichedEpisode.Transcription_Original, Title_EN = storedEnrichedEpisode.Title_EN, Description_EN = storedEnrichedEpisode.Description_EN, Transcription_EN = storedEnrichedEpisode.Transcription_EN, SpeechUrl_EN = episodeSpeech.SpeechUrl_EN, Title_SV = storedEnrichedEpisode.Title_SV, Description_SV = storedEnrichedEpisode.Description_SV, Transcription_SV = storedEnrichedEpisode.Transcription_SV, SpeechUrl_SV = episodeSpeech.SpeechUrl_SV }; await _summaryStorage.StoreSummarizedEpisode(episodeId, summarizedEpisode); _logger.LogInformation($"Summarized episode {episodeId}..."); }
private async Task <KeyValuePair <string, string>?> CreateAndUploadSpeech(int episodeId, SrStoredEpisode storedEpisode, string text, string language, string voice) { var speechConfig = _speechConfigFactory.Get(); speechConfig.SpeechSynthesisLanguage = language; speechConfig.SpeechSynthesisVoiceName = voice; speechConfig.SetSpeechSynthesisOutputFormat(SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3); using var stream = new MemoryStream(); using var audioStream = AudioOutputStream.CreatePushStream(new AudioPushAudioOutputStreamCallback(stream)); using var fileOutput = AudioConfig.FromStreamOutput(audioStream); using var synthesizer = new SpeechSynthesizer(speechConfig, fileOutput); var result = await synthesizer.SpeakTextAsync(text); if (result.Reason == ResultReason.SynthesizingAudioCompleted) { _logger.LogInformation($"Created speech for episode {episodeId}"); var uploadedBlob = await UploadSpeech(storedEpisode, stream, voice); _logger.LogInformation($"Uploaded speech for episode {episodeId}"); return(uploadedBlob); } if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); _logger.LogError($"Error creating speech for episode {episodeId}: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { // Expect some texts to be to long etc _logger.LogError( $"Error creating speech for episode {episodeId}: ErrorCode={cancellation.ErrorCode}; ErrorDetails=[{cancellation.ErrorDetails}]"); } return(null); } throw new Exception($"Unknown result status for speech: {result.Reason}"); }
private async Task <(string blobIdentifier, Uri blobUri)?> TransferResultForChannel(SrStoredEpisode storedEpisode, Azure.SpeechBatchClient.Transcription transcription, string channel) { if (!transcription.ResultsUrls.ContainsKey($"channel_{channel}")) { return(null); } var targetBlobPrefix = storedEpisode.AudioBlobIdentifier + "__Transcription_"; var targetBlobIdentifier = $"{targetBlobPrefix}{channel}.json"; var resultsUri = transcription.ResultsUrls[$"channel_{channel}"]; var targetBlobUrl = await _storageTransfer.TransferBlockBlobIfNotExists( _transcriptionsContainerName, targetBlobIdentifier, resultsUri ); return(targetBlobIdentifier, targetBlobUrl); }
private async Task <SrStoredEpisodeTranscription> TransferTranscribedEpisode(Azure.SpeechBatchClient.Transcription transcription, SrStoredEpisode storedEpisode) { var channel0 = await TransferResultForChannel(storedEpisode, transcription, "0"); var channel1 = await TransferResultForChannel(storedEpisode, transcription, "1"); return(new SrStoredEpisodeTranscription { Status = StatusTranscribed, TranscriptionResultChannel0BlobIdentifier = channel0?.blobIdentifier ?? string.Empty, TranscriptionResultChannel0Url = channel0?.blobUri.ToString() ?? string.Empty, TranscriptionResultChannel1BlobIdentifier = channel1?.blobIdentifier ?? string.Empty, TranscriptionResultChannel1Url = channel1?.blobUri.ToString() ?? string.Empty, }); }
private async Task <Transcription?> WaitForTranscription(Guid transcriptionId, SrStoredEpisode storedEpisode, SpeechBatchClient speechBatchClient) { while (true) { var transcription = await speechBatchClient.GetTranscriptionAsync(transcriptionId); _logger.LogTrace($"Transcribing status for {storedEpisode.Episode.Id} is {transcription.Status}"); switch (transcription.Status) { case "": case "Failed": _logger.LogError($"Error transcribing {storedEpisode.Episode.Id}: {transcription.StatusMessage}"); throw new Exception($"Error transcribing {storedEpisode.Episode.Id}: {transcription.StatusMessage}"); case "Succeeded": _logger.LogInformation($"Transcribed {storedEpisode.Episode.Id}"); return(transcription); case "NotStarted": case "Running": break; } await Task.Delay(WaitBetweenStatusCheck); } }