public async Task StartSpeechRecognitionAsync() { SpeechTranslationConfig config = GetRecognizerConfig(); if (config == null) { return; } ResetState(); DisposeRecognizer(); DeviceInformation microphoneInput = await Util.GetDeviceInformation(DeviceClass.AudioCapture, SettingsHelper.Instance.MicrophoneName); using (AudioConfig audioConfig = AudioConfig.FromMicrophoneInput(microphoneInput.Id)) { translationRecognizer = audioConfig != null ? new TranslationRecognizer(config, audioConfig) : new TranslationRecognizer(config); translationRecognizer.Recognizing += OnTranslateRecognizing; translationRecognizer.Recognized += OnTranslateRecognized; translationRecognizer.Canceled += OnTranslateCanceled; translationRecognizer.SessionStarted += (s, e) => { recognizeCancellationTokenSource = new CancellationTokenSource(); }; await translationRecognizer.StartContinuousRecognitionAsync(); } }
protected async Task StartRecognition() { if (speechRecognizer != null) { await speechRecognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); } if (translationRecognizer != null) { await translationRecognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); } }
private async Task StartSpeechTranscriptionEngine(string recognitionLanguage, string targetLanguage) { _translationConfig.SpeechRecognitionLanguage = recognitionLanguage; _translationConfig.AddTargetLanguage(targetLanguage); _speechConfig.SpeechRecognitionLanguage = targetLanguage; _speechConfig.SpeechSynthesisLanguage = targetLanguage; _synthesizer = new SpeechSynthesizer(_speechConfig, _output); _recognizer = new TranslationRecognizer(_translationConfig, _audioInput); _recognizer.Recognized += RecognizerRecognized; await _recognizer.StartContinuousRecognitionAsync(); }
/// <summary> /// Initiate continuous speech recognition from the default microphone, including live translation. /// </summary> private async void StartContinuousTranslation() { Debug.Log("Starting Continuous Translation Recognition."); CreateTranslationRecognizer(); if (translator != null) { Debug.Log("Starting Speech Translator."); await translator.StartContinuousRecognitionAsync().ConfigureAwait(false); recognizedString = "Speech Translator is now running."; Debug.Log("Speech Translator is now running."); } Debug.Log("Start Continuous Speech Translation exit"); }
public async void BeginTranslating() { if (micPermissionGranted) { CreateTranslationRecognizer(); if (translator != null) { await translator.StartContinuousRecognitionAsync().ConfigureAwait(false); } } else { recognizedString = "This app cannot function without access to the microphone."; } }
void ListenForSpeech() { try { translationReco = factory.CreateTranslationRecognizer("en-US", new List <string> { "de" }, "de-DE-Hedda"); translationReco.SynthesisResult += (s, e) => { Log.Info("SpeechSDKDemo", $"Synthesis Result {e.Value.SynthesisStatus}"); if (e.Value.SynthesisStatus == SynthesisStatus.Success) { PlayWay(e.Value.GetAudio()); } }; translationReco.FinalResult += (s, e) => { Log.Info("SpeechSDKDemo", $"Final result {e.Value.Text}"); if (!string.IsNullOrWhiteSpace(e.Value.Text)) { RunOnUiThread(() => FullOutput.Text += $"{ e.Value.Translations["de"]} "); } }; translationReco.IntermediateResult += (s, e) => { Log.Info("SpeechSDKDemo", $"Translation intermediate result {e.Value.Text}"); if (!string.IsNullOrWhiteSpace(e.Value.Text)) { RunOnUiThread(() => IntermediateOutput.Text = $"{e.Value.Text} - { e.Value.Translations["de"]} "); } }; translationReco.RecognitionError += (s, e) => { Log.Info("SpeechSDKDemo", $"Error result {e.Value?.Name()}"); }; translationReco.StartContinuousRecognitionAsync(); } catch (Exception ex) { Log.Error("SpeechSDKDemo", $"unexpected {ex.Message}"); } }
public async void OnPointerDown() { if (!Microphone.IsRecording(Microphone.devices[0])) { Debug.Log("Microphone.Start: " + Microphone.devices[0]); _audioSource.clip = Microphone.Start(Microphone.devices[0], true, 200, 16000); Debug.Log("audioSource.clip channels: " + _audioSource.clip.channels); Debug.Log("audioSource.clip frequency: " + _audioSource.clip.frequency); } await _recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); lock (_threadLocker) { _recognitionStarted = true; Debug.Log("RecognitionStarted: " + _recognitionStarted.ToString()); } }
public async Task SpeechRecognitionFromFileAsync(StorageFile file) { SpeechTranslationConfig config = GetRecognizerConfig(); if (config == null) { return; } ResetState(); stopRecognitionTaskCompletionSource = new TaskCompletionSource <int>(); using (var audioInput = AudioConfig.FromWavFileInput(file.Path)) { using (var recognizer = new TranslationRecognizer(config, audioInput)) { recognizer.Recognizing += OnTranslateRecognizing; recognizer.Recognized += OnTranslateRecognized; recognizer.Canceled += OnTranslateCanceled; recognizer.SessionStarted += (s, e) => { recognizeCancellationTokenSource = new CancellationTokenSource(); }; recognizer.SessionStopped += (s, e) => { if (recognizeCancellationTokenSource != null && recognizeCancellationTokenSource.Token.CanBeCanceled) { recognizeCancellationTokenSource.Cancel(); } stopRecognitionTaskCompletionSource.TrySetResult(0); }; // Starts continuous recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. await stopRecognitionTaskCompletionSource.Task.ConfigureAwait(false); // Stops recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } }
private async void StartRecognition() { if (isRecognitionStarted) { return; } Debug.Log("start recognition"); string fromLang; string toLang; if (modeList.value == 0) { fromLang = "ja-JP"; toLang = "en"; } else { fromLang = "en-US"; toLang = "ja"; } Debug.Log("mode : " + fromLang + " -> " + toLang); var config = SpeechTranslationConfig.FromSubscription(apiKeyInputField.text, apiRegionInputField.text); config.SpeechRecognitionLanguage = fromLang; config.AddTargetLanguage(toLang); recognizer = new TranslationRecognizer(config); recognizer.Canceled += CanceledHandler; recognizer.SessionStarted += SessionStartedHandler; recognizer.SessionStopped += SessionStoppedHandler; recognizer.SpeechStartDetected += SpeechStartDetectedHandler; recognizer.SpeechEndDetected += SpeechEndDetectedHandler; recognizer.Recognizing += RecognizingHandler; recognizer.Recognized += RecognizedHandler; await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); isRecognitionStarted = true; isRecognitionStateChanged = true; }
public async Task SpeechTranslationWithMicrophoneAsync() { // Translation source language. var selectedRecordingLanguage = RecordingLanguagesPicker.SelectedItem as RecordingLanguage; string fromLanguage = selectedRecordingLanguage.Locale; //"ja-JP"; // Creates an instance of a speech translation config with specified subscription key and service region. string speechSubscriptionKey = AppSettingsManager.Settings["SpeechSubscriptionKey"]; string speechSubscriptionRegion = AppSettingsManager.Settings["SpeechSubscriptionRegion"]; var config = SpeechTranslationConfig.FromSubscription(speechSubscriptionKey, speechSubscriptionRegion); config.SpeechRecognitionLanguage = fromLanguage; // Translation target language(s). config.AddTargetLanguage("en-US"); // Creates a translation recognizer using microphone as audio input. using (var recognizer = new TranslationRecognizer(config)) { //Subscribes to events. recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); UpdateRecognizingText(element.Value); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { //Console.WriteLine($"RECOGNIZED in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATED into '{element.Key}': {element.Value}"); UpdateRecognizedText(element.Value); } } /* * //Triggered when text recongized but not able to translate * else if (e.Result.Reason == ResultReason.RecognizedSpeech) * { * Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}"); * Console.WriteLine($" Speech not translated."); * } * else if (e.Result.Reason == ResultReason.NoMatch) * { * Console.WriteLine($"NOMATCH: Speech could not be recognized."); * } */ }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Recognize as long as 'stopped' is clicked Recording = true; do { //loop until stop button is pressed } while (Recording != false); // Stops continuous recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } }
// Translation using file input. public static async Task TranslationWithFileAsync() { // <TranslationWithFileAsync> // Translation source language. // Replace with a language of your choice. string fromLanguage = "en-US"; // Creates an instance of a speech translation config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechTranslationConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion"); config.SpeechRecognitionLanguage = fromLanguage; // Translation target language(s). // Replace with language(s) of your choice. config.AddTargetLanguage("de"); config.AddTargetLanguage("fr"); var stopTranslation = new TaskCompletionSource <int>(); // Creates a translation recognizer using file as audio input. // Replace with your own audio file name. using (var audioInput = AudioConfig.FromWavFileInput(@"whatstheweatherlike.wav")) { using (var recognizer = new TranslationRecognizer(config, audioInput)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { Console.WriteLine($"RECOGNIZED in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATED into '{element.Key}': {element.Value}"); } } else if (e.Result.Reason == ResultReason.RecognizedSpeech) { Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}"); Console.WriteLine($" Speech not translated."); } else if (e.Result.Reason == ResultReason.NoMatch) { Console.WriteLine($"NOMATCH: Speech could not be recognized."); } }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } stopTranslation.TrySetResult(0); }; recognizer.SpeechStartDetected += (s, e) => { Console.WriteLine("\nSpeech start detected event."); }; recognizer.SpeechEndDetected += (s, e) => { Console.WriteLine("\nSpeech end detected event."); }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); Console.WriteLine($"\nStop translation."); stopTranslation.TrySetResult(0); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. Console.WriteLine("Start translation..."); await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopTranslation.Task }); // Stops translation. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } // </TranslationWithFileAsync> }
public static async Task TranslationWithFileAsync(this SpeechTranslationConfig config, byte[] wavBytes, string fromLanguage, IEnumerable <string> targetLanguages, Voice voice, string outputFilename) { var synthesizingWriter = new SynthesizingWriter(outputFilename); config.SpeechRecognitionLanguage = fromLanguage; config.VoiceName = voice.ToString(); targetLanguages.ToList().ForEach(config.AddTargetLanguage); using var audioInput = AudioConfig.FromStreamInput( AudioInputStream.CreatePullStream( new BinaryAudioStreamReader( new MemoryStream( wavBytes)))); using var recognizer = new TranslationRecognizer(config, audioInput); var stopTranslation = new TaskCompletionSource <int>(); recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { Console.WriteLine($"RECOGNIZED in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATED into '{element.Key}': {element.Value}"); } } else if (e.Result.Reason == ResultReason.RecognizedSpeech) { Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}"); Console.WriteLine($" Speech not translated."); } else if (e.Result.Reason == ResultReason.NoMatch) { Console.WriteLine($"NOMATCH: Speech could not be recognized."); } }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } stopTranslation.TrySetResult(0); }; recognizer.SpeechStartDetected += (s, e) => { Console.WriteLine("\nSpeech start detected event."); }; recognizer.SpeechEndDetected += (s, e) => { Console.WriteLine("\nSpeech end detected event."); }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); Console.WriteLine($"\nStop translation."); stopTranslation.TrySetResult(0); }; recognizer.Synthesizing += synthesizingWriter.Synthesizing; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); await Task.WhenAny(new[] { stopTranslation.Task }); // Stops translation. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(continueOnCapturedContext: false); }
public static async Task TranslationContinuousRecognitionAsync() { // Creates an instance of a speech translation config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechTranslationConfig.FromSubscription(subscriptionKey, region); // Sets source and target languages. string fromLanguage = "de-DE"; config.SpeechRecognitionLanguage = fromLanguage; config.AddTargetLanguage("en"); config.AddTargetLanguage("ko"); // Sets voice name of synthesis output. //https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support const string Voice = "en-US-JessaRUS"; config.VoiceName = Voice; var stopRecognition = new TaskCompletionSource <int>(); // Creates a translation recognizer using microphone as audio input. using (var audioInput = AudioConfig.FromWavFileInput(@"autdio file")) { using (var recognizer = new TranslationRecognizer(config, audioInput)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { Console.WriteLine($"\nFinal result: Reason: {e.Result.Reason.ToString()}, recognized text in {fromLanguage}: {e.Result.Text}."); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } } }; recognizer.Synthesizing += (s, e) => { var audio = e.Result.GetAudio(); Console.WriteLine(audio.Length != 0 ? $"AudioSize: {audio.Length}" : $"AudioSize: {audio.Length} (end of synthesis data)"); }; recognizer.Canceled += (s, e) => { Console.WriteLine($"\nRecognition canceled. Reason: {e.Reason}; ErrorDetails: {e.ErrorDetails}"); stopRecognition.TrySetResult(0); }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); stopRecognition.TrySetResult(0); }; recognizer.Synthesizing += (s, e) => { var audio = e.Result.GetAudio(); if (audio.Length > 0) { using (var m = new MemoryStream(audio)) { SoundPlayer simpleSound = new SoundPlayer(m); simpleSound.Play(); } } }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); Task.WaitAny(new[] { stopRecognition.Task }); // Stops continuous recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); Console.ReadLine(); } } }
// Translation using multi-lingual file input. public static async Task TranslationWithMultiLingualFileAsync_withLanguageDetectionEnabled() { // <TranslationWithFileAsync> // Offical v2 endpoint with service region // Please replace the service region with your region var v2EndpointInString = String.Format("wss://{0}.stt.speech.microsoft.com/speech/universal/v2", "YourServiceRegion"); var v2EndpointUrl = new Uri(v2EndpointInString); // Creates an instance of a speech translation config with specified subscription key and service region. // Please replace the service subscription key with your subscription key var config = SpeechTranslationConfig.FromEndpoint(v2EndpointUrl, "YourSubscriptionKey"); // Sets source languages // The source language will be detected by the language detection feature. // However, the SpeechRecognitionLanguage still need to set with a locale string, but it will not be used as the source language. // This will be fixed in a future version of Speech SDK. string fromLanguage = "en-US"; config.SpeechRecognitionLanguage = fromLanguage; // Translation target language(s). // Replace with language(s) of your choice. config.AddTargetLanguage("de"); config.AddTargetLanguage("fr"); // Setup Language id property // Please refer to the documentation of language id with different modes config.SetProperty(PropertyId.SpeechServiceConnection_ContinuousLanguageIdPriority, "Latency"); var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "zh-CN" }); var stopTranslation = new TaskCompletionSource <int>(); // Creates a translation recognizer using file as audio input. // Replace with your own audio file name. using (var audioInput = AudioConfig.FromWavFileInput(@"en-us_zh-cn.wav")) { using (var recognizer = new TranslationRecognizer(config, autoDetectSourceLanguageConfig, audioInput)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { // Note: the detected language result only available in the v2 endpoint var lidResult = e.Result.Properties.GetProperty(PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult); Console.WriteLine($"RECOGNIZING in '{lidResult}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { var lidResult = e.Result.Properties.GetProperty(PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult); Console.WriteLine($"RECOGNIZED in '{lidResult}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATED into '{element.Key}': {element.Value}"); } } else if (e.Result.Reason == ResultReason.RecognizedSpeech) { Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}"); Console.WriteLine($" Speech not translated."); } else if (e.Result.Reason == ResultReason.NoMatch) { Console.WriteLine($"NOMATCH: Speech could not be recognized."); } }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } stopTranslation.TrySetResult(0); }; recognizer.SpeechStartDetected += (s, e) => { Console.WriteLine("\nSpeech start detected event."); }; recognizer.SpeechEndDetected += (s, e) => { Console.WriteLine("\nSpeech end detected event."); }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); Console.WriteLine($"\nStop translation."); stopTranslation.TrySetResult(0); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. Console.WriteLine("Start translation..."); await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopTranslation.Task }); // Stops translation. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } // </TranslationWithFileAsync> }
private async Task <MSTResult> performRecognitionAsync(string logId, string filePath, SpeechTranslationConfig speechConfig, TimeSpan restartOffset, string sourceLanguage, Dictionary <string, List <Caption> > captions, Dictionary <string, TimeSpan> startAfterMap) { using (var audioInput = WavHelper.OpenWavFile(filePath)) { var logOnce = new HashSet <string>(); var stopRecognition = new TaskCompletionSource <int>(); bool verboseLogging = false; TimeSpan lastSuccessfulTime = TimeSpan.Zero; string errorCode = ""; using (var recognizer = new TranslationRecognizer(speechConfig, audioInput)) { recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { JObject jObject = JObject.Parse(e.Result.Properties.GetProperty(PropertyId.SpeechServiceResponse_JsonResult)); var wordLevelCaptions = jObject["Words"] .ToObject <List <MSTWord> >() .OrderBy(w => w.Offset) .ToList(); if (e.Result.Text == "" && wordLevelCaptions.Count == 0) { if (verboseLogging) { TimeSpan _offset = new TimeSpan(e.Result.OffsetInTicks); TimeSpan _end = e.Result.Duration.Add(_offset); _logger.LogInformation($"{logId}: Empty String: Begin={_offset.Minutes}:{_offset.Seconds},{_offset.Milliseconds}, End={_end.Minutes}:{_end.Seconds},{_end.Milliseconds}"); } return; } if (wordLevelCaptions.Any()) { // TODO/TOREVIEW: Is this a bug fix or redefinition? Could this change in later versions of the SDK? long offsetDifference = e.Result.OffsetInTicks - wordLevelCaptions.FirstOrDefault().Offset; wordLevelCaptions.ForEach(w => w.Offset += offsetDifference); } var sentenceLevelCaptions = MSTWord.WordLevelTimingsToSentenceLevelTimings(e.Result.Text, wordLevelCaptions); // Convert back to time in original untrimmed video. // These timings are used to check if we should be adding any captions // However they are then used direcly for sentence level translations // but not for the word-level timings of the primary language TimeSpan begin = (new TimeSpan(e.Result.OffsetInTicks)).Add(restartOffset); TimeSpan end = e.Result.Duration.Add(begin); if (verboseLogging) { _logger.LogInformation($"{logId}: Begin={begin.Minutes}:{begin.Seconds},{begin.Milliseconds}", begin); _logger.LogInformation($"{logId}: End={end.Minutes}:{end.Seconds},{end.Milliseconds}"); } // TODO/TOREVIEW: // ToCaptionEntitiesWithWordTiming vs ToCaptionEntitiesInterpolate // Can this code be simplified to use a single function? // Also: Caution - it is possible that word timing data from MS may depend on SDK version var newCaptions = MSTWord.ToCaptionEntitiesWithWordTiming(captions[sourceLanguage].Count, restartOffset, sentenceLevelCaptions); if (begin >= startAfterMap[sourceLanguage]) { captions[sourceLanguage].AddRange(newCaptions); if (logOnce.Add("AddingMain")) { _logger.LogInformation($"{logId}: Adding Primary Language captions"); } } else { if (logOnce.Add("SkippingMain")) { _logger.LogInformation($"{logId}: Skipping Main captions because {begin} < {startAfterMap[sourceLanguage]}"); } } foreach (var element in e.Result.Translations) { var language = element.Key; var startAfter = startAfterMap[language]; if (begin >= startAfter) { // Translations dont have word level timing so // interpolate between start and end newCaptions = Caption.ToCaptionEntitiesInterpolate(captions[language].Count, begin, end, element.Value); captions[element.Key].AddRange(newCaptions); if (logOnce.Add($"AddingTranslated {language}")) { _logger.LogInformation($"{logId}: Adding translation ({language}) captions"); } } else { if (logOnce.Add($"SkippingTranslated {language}")) { _logger.LogInformation($"{logId}: Skipping ({language}) captions because {begin} < {startAfter}"); } } } } else if (e.Result.Reason == ResultReason.NoMatch) { _logger.LogInformation($"{logId}: NOMATCH: Speech could not be recognized."); } }; recognizer.Canceled += (s, e) => { errorCode = e.ErrorCode.ToString(); _logger.LogInformation($"{logId}: CANCELED: ErrorCode={e.ErrorCode} Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { _logger.LogError($"{logId}: CANCELED: ErrorCode={e.ErrorCode} Reason={e.Reason}"); if (e.ErrorCode == CancellationErrorCode.ServiceTimeout || e.ErrorCode == CancellationErrorCode.ServiceUnavailable || e.ErrorCode == CancellationErrorCode.ConnectionFailure) { TimeSpan lastTime = TimeSpan.Zero; if (captions.Count != 0) { var lastCaption = captions[sourceLanguage].OrderBy(c => c.End).TakeLast(1).ToList().First(); lastTime = lastCaption.End; } _logger.LogInformation($"{logId}: Retrying, LastSuccessTime={lastTime}"); lastSuccessfulTime = lastTime; } else if (e.ErrorCode != CancellationErrorCode.NoError) { _logger.LogInformation($"{logId}: CANCELED: ErrorCode={e.ErrorCode} Reason={e.Reason}"); _slackLogger.PostErrorAsync(new Exception($"{logId}: Transcription Failure"), "Transcription Failure").GetAwaiter().GetResult(); } } stopRecognition.TrySetResult(0); }; recognizer.SessionStarted += (s, e) => { _logger.LogInformation($"{logId}: Session started event."); }; recognizer.SessionStopped += (s, e) => { _logger.LogInformation($"{logId}: Session stopped event. Stopping recognition."); stopRecognition.TrySetResult(0); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopRecognition.Task }); // Stops recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); _logger.LogInformation($"{logId}: Returning {captions.Count} languages, ErrorCode = {errorCode}, LastSuccessTime = {lastSuccessfulTime}"); return(new MSTResult { Captions = captions, ErrorCode = errorCode, LastSuccessTime = lastSuccessfulTime }); } } }
public async Task <string> TranslationWithAudioStreamAsync(Stream audioStream, string fromLanguage = "en-US", string targetLanguage = "en-US") { // Creates an instance of a speech translation config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechTranslationConfig.FromSubscription(this.subscriptionKey, this.region); config.SpeechRecognitionLanguage = fromLanguage; // Translation target language(s). // Replace with language(s) of your choice. config.AddTargetLanguage(targetLanguage); var stopTranslation = new TaskCompletionSource <int>(); string translateResult = null; // Create an audio stream from a wav file. // Replace with your own audio file name. using (var audioInput = OpenWavFile(audioStream)) { // Creates a translation recognizer using audio stream as input. using (var recognizer = new TranslationRecognizer(config, audioInput)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { log.LogInformation($"RECOGNIZING in '{fromLanguage}': Text = {e.Result.Text}"); foreach (var element in e.Result.Translations) { log.LogInformation($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { log.LogInformation($"RECOGNIZED in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { log.LogInformation($" TRANSLATED into '{element.Key}': {element.Value}"); translateResult = element.Value; } } else if (e.Result.Reason == ResultReason.RecognizedSpeech) { log.LogInformation($"RECOGNIZED: Text={e.Result.Text}"); log.LogInformation($" Speech not translated."); } else if (e.Result.Reason == ResultReason.NoMatch) { log.LogInformation($"NOMATCH: Speech could not be recognized."); } }; recognizer.Canceled += (s, e) => { log.LogInformation($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { log.LogInformation($"CANCELED: ErrorCode={e.ErrorCode}"); log.LogInformation($"CANCELED: ErrorDetails={e.ErrorDetails}"); log.LogInformation($"CANCELED: Did you update the subscription info?"); } stopTranslation.TrySetResult(0); }; recognizer.SpeechStartDetected += (s, e) => { log.LogInformation("\nSpeech start detected event."); }; recognizer.SpeechEndDetected += (s, e) => { log.LogInformation("\nSpeech end detected event."); }; recognizer.SessionStarted += (s, e) => { log.LogInformation("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { log.LogInformation($"\nSession stopped event."); log.LogInformation($"\nStop translation."); stopTranslation.TrySetResult(0); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. log.LogInformation("Start translation..."); await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopTranslation.Task }); // Stops translation. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); return(translateResult); } } }
public static async Task TranslateSpeechAsync() { // Translation source language. // Replace with a language of your choice. string fromLanguage = "en-US"; // Voice name of synthesis output. const string GermanVoice = "de-DE-Hedda"; // Creates an instance of a speech translation config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechTranslationConfig.FromSubscription("b253e516e5054860a09955c63ceb0558", "westus"); config.SpeechRecognitionLanguage = fromLanguage; config.VoiceName = GermanVoice; // Translation target language(s). // Replace with language(s) of your choice. config.AddTargetLanguage("de"); //change language to something else // Creates a translation recognizer using microphone as audio input. using (var recognizer = new TranslationRecognizer(config)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { Console.WriteLine($"RECOGNIZED in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATED into '{element.Key}': {element.Value}"); } } else if (e.Result.Reason == ResultReason.RecognizedSpeech) { Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}"); Console.WriteLine($" Speech not translated."); } else if (e.Result.Reason == ResultReason.NoMatch) { Console.WriteLine($"NOMATCH: Speech could not be recognized."); } }; recognizer.Synthesizing += (s, e) => { var audio = e.Result.GetAudio(); Console.WriteLine(audio.Length != 0 ? $"AudioSize: {audio.Length}" : $"AudioSize: {audio.Length} (end of synthesis data)"); if (audio.Length > 0) { #if NET461 using (var m = new MemoryStream(audio)) { SoundPlayer simpleSound = new SoundPlayer(m); simpleSound.PlaySync(); } #endif } }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. Console.WriteLine("Say something..."); await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); while (true) { Console.WriteLine("Press Q to quit..."); string input = Console.ReadLine(); if (input.ToLower() == "q") { break; } } // Stops continuous recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } }
public static async Task TranslationContinuousRecognitionAsync() { // Creates an instance of a speech translation config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechTranslationConfig.FromSubscription(SubscriptionKey, Region); InputFilePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "SourceVideo", "ignite.wav"); VttOutputPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Transcript_{0}.vtt"); // Sets source and target languages. string fromLanguage = "en-US"; config.SpeechRecognitionLanguage = fromLanguage; config.AddTargetLanguage("de"); config.AddTargetLanguage("ar"); config.AddTargetLanguage("fr"); //Create VTT files CreateTranscriptFile(fromLanguage); foreach (var targetLang in config.TargetLanguages) { CreateTranscriptFile(targetLang); } using var audioConfig = AudioConfig.FromWavFileInput(InputFilePath); using (var recognizer = new TranslationRecognizer(config, audioConfig)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { WriteVttLine(fromLanguage, FormatTime(e.Result.OffsetInTicks) + " --> " + FormatTime(e.Result.OffsetInTicks + e.Result.Duration.Ticks) + Environment.NewLine); WriteVttLine(fromLanguage, e.Result.Text + Environment.NewLine + Environment.NewLine); Console.WriteLine($"\nFinal result: Reason: {e.Result.Reason.ToString()}, recognized text in {fromLanguage}: {e.Result.Text}."); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); WriteVttLine(element.Key, FormatTime(e.Result.OffsetInTicks) + " --> " + FormatTime(e.Result.OffsetInTicks + e.Result.Duration.Ticks) + Environment.NewLine); WriteVttLine(element.Key, element.Value + Environment.NewLine + Environment.NewLine); } } }; recognizer.Synthesizing += (s, e) => { var audio = e.Result.GetAudio(); Console.WriteLine(audio.Length != 0 ? $"AudioSize: {audio.Length}" : $"AudioSize: {audio.Length} (end of synthesis data)"); }; recognizer.Canceled += (s, e) => { Console.WriteLine($"\nRecognition canceled. Reason: {e.Reason}; ErrorDetails: {e.ErrorDetails}"); }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. Console.WriteLine("Say something..."); await recognizer.StartContinuousRecognitionAsync(); do { Console.WriteLine("Press Enter to stop"); } while (Console.ReadKey().Key != ConsoleKey.Enter); // Stops continuous recognition. await recognizer.StopContinuousRecognitionAsync(); } }
public async Task TranslateAsync(YouTubeVideo youTubeVideo, string fromLanguage, IEnumerable <string> toLanguages) { // Declare the necessary directories and files variables var outputPath = Path.Combine("Output", Guid.NewGuid().ToString()); var downloadFilePath = Path.Combine(outputPath, "input.mp4"); // StringBuilders for data to be passed to event subscriber var tsb = new StringBuilder(); var osb = new StringBuilder(); var info = new StringBuilder(); var config = SpeechTranslationConfig.FromSubscription( configuration["AzureSpeechTranslation:SubscriptionKey"], configuration["AzureSpeechTranslation:Region"]); config.SpeechRecognitionLanguage = fromLanguage; foreach (var language in toLanguages) { config.AddTargetLanguage(language); } var vidBytes = await youTubeVideo.GetBytesAsync(); // Before saving the video, create the directory CreateOutputDirectory(outputPath); // Save the video await File.WriteAllBytesAsync(downloadFilePath, vidBytes); // Extract the audio from the video to work on it var wavAudioFile = await ExtractingWavAudioAsync(downloadFilePath); var stopTranslation = new TaskCompletionSource <int>(); var lineCount = 1; using (var audioInput = AudioConfig.FromWavFileInput(wavAudioFile)) { using (var recognizer = new TranslationRecognizer(config, audioInput)) { // Subscribes to events. recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { foreach (var element in e.Result.Translations) { var fromTime = TimeSpan.FromTicks(e.Result.OffsetInTicks); var toTime = fromTime.Add(e.Result.Duration); osb.AppendLine($"{lineCount}"); osb.AppendLine($"{fromTime.ToString(@"hh\:mm\:ss\.fff")} --> {toTime.ToString(@"hh\:mm\:ss\.fff")}"); osb.AppendLine(e.Result.Text); osb.AppendLine(); tsb.AppendLine($"{lineCount}"); tsb.AppendLine($"{fromTime.ToString(@"hh\:mm\:ss\.fff")} --> {toTime.ToString(@"hh\:mm\:ss\.fff")}"); tsb.AppendLine(element.Value); tsb.AppendLine(); var speechServicesEventArgs = SetSpeechServicesInformationArgs(fromLanguage, element.Key, osb.ToString(), tsb.ToString()); osb.Clear(); tsb.Clear(); SpeechRecognized?.Invoke(this, speechServicesEventArgs); } lineCount++; } else if (e.Result.Reason == ResultReason.RecognizedSpeech) { info.AppendLine($"RECOGNIZED: Text={e.Result.Text}"); info.AppendLine($" Speech not translated."); var speechServicesEventArgs = SetSpeechServicesInformationArgs(fromLanguage, information: info.ToString()); info.Clear(); SpeechRecognized?.Invoke(this, speechServicesEventArgs); } else if (e.Result.Reason == ResultReason.NoMatch) { info.AppendLine($"NOMATCH: Speech could not be recognized."); var speechServicesEventArgs = SetSpeechServicesInformationArgs(fromLanguage, information: info.ToString()); info.Clear(); SpeechRecognized?.Invoke(this, speechServicesEventArgs); } }; recognizer.Canceled += (s, e) => { info.AppendLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { info.AppendLine($"CANCELED: ErrorCode={e.ErrorCode}"); info.AppendLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); info.AppendLine($"CANCELED: Did you update the subscription info?"); } var speechServicesEventArgs = SetSpeechServicesInformationArgs(fromLanguage, information: info.ToString()); info.Clear(); SpeechCanceled?.Invoke(this, speechServicesEventArgs); stopTranslation.TrySetResult(0); }; recognizer.SpeechStartDetected += (s, e) => { info.AppendLine("Speech start detected event."); var speechServicesEventArgs = SetSpeechServicesInformationArgs(fromLanguage, information: info.ToString()); info.Clear(); SpeechStartDetected?.Invoke(this, speechServicesEventArgs); }; recognizer.SpeechEndDetected += (s, e) => { info.AppendLine("Speech end detected event."); var speechServicesEventArgs = SetSpeechServicesInformationArgs(fromLanguage, information: info.ToString()); info.Clear(); SpeechEndDetected?.Invoke(this, speechServicesEventArgs); }; recognizer.SessionStarted += (s, e) => { info.AppendLine("Start translation..."); info.AppendLine("Session started event."); var speechServicesEventArgs = SetSpeechServicesInformationArgs(fromLanguage, information: info.ToString()); info.Clear(); SpeechSessionStarted?.Invoke(this, speechServicesEventArgs); }; recognizer.SessionStopped += (s, e) => { info.AppendLine("Session stopped event."); info.AppendLine("Stop translation."); var speechServicesEventArgs = SetSpeechServicesInformationArgs(fromLanguage, information: info.ToString()); info.Clear(); SpeechSessionStopped?.Invoke(this, speechServicesEventArgs); stopTranslation.TrySetResult(0); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopTranslation.Task }); // Stops translation. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } //Housekeeping Directory.Delete(outputPath, true); }
public static async Task TranslateAsync(string from, string to, string voice, string inFile, string outFile) { var config = SpeechTranslationConfig.FromSubscription(_key, _region); config.SpeechRecognitionLanguage = from; config.VoiceName = voice; config.AddTargetLanguage(to); var translationCompleted = new TaskCompletionSource <int>(); using (var audioInput = AudioConfig.FromWavFileInput(inFile)) { using (var recognizer = new TranslationRecognizer(config, audioInput)) { recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { WriteLine($"Recognized: {e.Result.Text}"); foreach (var element in e.Result.Translations) { WriteLine($"Translated: {element.Value}"); } } }; recognizer.Synthesizing += (sender, args) => { if (args.Result.Reason == ResultReason.SynthesizingAudio) { var bytes = args.Result.GetAudio(); if (bytes.Length > 0) { using (var fileStream = File.Create(outFile)) fileStream.Write(bytes, 0, bytes.Length); new SoundPlayer(outFile).Play(); WriteLine($"Audio translation can be found here {outFile}"); } } }; recognizer.Canceled += (s, e) => { WriteLine($"Cancelled: {e.Reason}"); if (e.Reason == CancellationReason.Error) { WriteLine($"Error: {e.ErrorDetails}"); } translationCompleted.TrySetResult(0); }; await recognizer.StartContinuousRecognitionAsync(); await translationCompleted.Task; await recognizer.StopContinuousRecognitionAsync(); } } }
public static async Task TranslationContinuousRecognitionAsync() { // Creates an instance of a speech translation config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechTranslationConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion"); // Sets source and target languages. string fromLanguage = "en-US"; config.SpeechRecognitionLanguage = fromLanguage; config.AddTargetLanguage("de"); // Sets voice name of synthesis output. const string GermanVoice = "de-DE-Hedda"; config.VoiceName = GermanVoice; // Creates a translation recognizer using microphone as audio input. using (var recognizer = new TranslationRecognizer(config)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { Console.WriteLine($"\nFinal result: Reason: {e.Result.Reason.ToString()}, recognized text in {fromLanguage}: {e.Result.Text}."); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } } }; recognizer.Synthesizing += (s, e) => { var audio = e.Result.GetAudio(); Console.WriteLine(audio.Length != 0 ? $"AudioSize: {audio.Length}" : $"AudioSize: {audio.Length} (end of synthesis data)"); }; recognizer.Canceled += (s, e) => { Console.WriteLine($"\nRecognition canceled. Reason: {e.Reason}; ErrorDetails: {e.ErrorDetails}"); }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. Console.WriteLine("Say something..."); await recognizer.StartContinuousRecognitionAsync(); do { Console.WriteLine("Press Enter to stop"); } while (Console.ReadKey().Key != ConsoleKey.Enter); // Stops continuous recognition. await recognizer.StopContinuousRecognitionAsync(); } }
private async Task InitializeRecognizer(string languageCode1, string languageCode2, string voice) { try { // Creates an instance of a speech translation config with specified subscription key and service region. var config = SpeechTranslationConfig.FromSubscription((string)Application.Current.Properties["SubscriptionKey"], (string)Application.Current.Properties["Region"]); // Sets source and target languages. string fromLanguage = languageCode1; string toLanguage = languageCode2; config.SpeechRecognitionLanguage = fromLanguage; config.AddTargetLanguage(toLanguage); // Sets the synthesis output voice name. // Replace with the languages of your choice, from list found here: https://aka.ms/speech/tts-languages config.VoiceName = voice; var stopRecognition = new TaskCompletionSource <int>(); using (recognizer = new TranslationRecognizer(config)) { // Subscribes to events. recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { foreach (var element in e.Result.Translations.Where(x => !string.IsNullOrWhiteSpace(x.Value))) { if (!string.IsNullOrWhiteSpace(element.Value)) { TextResults.Enqueue(new KeyValuePair <string, string>(e.Result.Text, element.Value)); } else { var x = "WHY?"; } var options = new SpeechOptions(); } } }; recognizer.Synthesizing += async(s, e) => { var audio = e.Result.GetAudio(); if (audio.Length > 0) { try { AudioResults.Enqueue(audio); } catch (Exception ex) { Log("error Synthesizing " + ex.Message); Crashes.TrackError(ex); } } }; recognizer.Canceled += (s, e) => { Log($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Log($"CANCELED: ErrorCode={e.ErrorCode}"); Log($"CANCELED: ErrorDetails={e.ErrorDetails}"); Log($"CANCELED: Did you update the subscription info?"); } stopRecognition.TrySetResult(0); }; recognizer.SessionStarted += (s, e) => { Log("Session started event."); }; recognizer.SessionStopped += async(s, e) => { Device.BeginInvokeOnMainThread(async() => { Log("Session stopped event."); Log("Return results now."); var sourceText = string.Empty; var targetText = string.Empty; KeyValuePair <string, string> result; while (TextResults.Count > 0) { result = TextResults.Dequeue(); sourceText = sourceText + " " + result.Key; targetText = targetText + " " + result.Value; } if (!string.IsNullOrWhiteSpace(targetText)) { if (languageCode1 == (string)Application.Current.Properties["LanguageCode1"]) { UpdateUI(LayoutOptions.Start, sourceText, targetText, (string)Application.Current.Properties["LanguageCode2"], true); } else { UpdateUI(LayoutOptions.End, sourceText, targetText, (string)Application.Current.Properties["LanguageCode1"], false); } service = DependencyService.Get <IAudioService>(); service.PlaySound(AudioResults); stopRecognition.TrySetResult(0); } else { var x = "WHY?"; } }); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopRecognition.Task }); // Stops recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } catch (Exception ex) { Log("From Pressed Error " + ex.Message); Crashes.TrackError(ex); } }
private async void SpeechTranslationFromMicrophone_ButtonClicked(object sender, RoutedEventArgs e) { // Creates an instance of a speech config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechTranslationConfig.FromSubscription("YourSubscriptionKey", "YourSubcriptionRegion"); // Sets source and target languages. string fromLanguage = "en-US"; config.SpeechRecognitionLanguage = fromLanguage; config.AddTargetLanguage("de"); try { // Creates a speech recognizer using microphone as audio input. using (var recognizer = new TranslationRecognizer(config)) { // The TaskCompletionSource to stop recognition. var stopRecognition = new TaskCompletionSource <int>(); // Subscribes to events. recognizer.Recognizing += (s, ee) => { NotifyUser($"RECOGNIZING in '{fromLanguage}': Text={ee.Result.Text}", NotifyType.StatusMessage); foreach (var element in ee.Result.Translations) { NotifyUser($" TRANSLATING into '{element.Key}': {element.Value}", NotifyType.StatusMessage); } }; recognizer.Recognized += (s, ee) => { if (ee.Result.Reason == ResultReason.TranslatedSpeech) { NotifyUser($"\nFinal result: Reason: {ee.Result.Reason.ToString()}, recognized text in {fromLanguage}: {ee.Result.Text}.", NotifyType.StatusMessage); foreach (var element in ee.Result.Translations) { NotifyUser($" TRANSLATING into '{element.Key}': {element.Value}", NotifyType.StatusMessage); } } }; recognizer.Canceled += (s, ee) => { NotifyUser($"\nRecognition canceled. Reason: {ee.Reason}; ErrorDetails: {ee.ErrorDetails}", NotifyType.StatusMessage); }; recognizer.SessionStarted += (s, ee) => { NotifyUser("\nSession started event.", NotifyType.StatusMessage); }; recognizer.SessionStopped += (s, ee) => { NotifyUser("\nSession stopped event.", NotifyType.StatusMessage); stopRecognition.TrySetResult(0); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopRecognition.Task }); // Stops continuous recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } catch (Exception ex) { NotifyUser($"{ex.ToString()}", NotifyType.ErrorMessage); } }
public Task StartContinuousRecognitionAsync() { return(recognized.StartContinuousRecognitionAsync()); }
private async Task StartSpeechTranslation() { try { if (isTranslationListening || string.IsNullOrEmpty(settings.SpeechKey)) { return; } isTranslationListening = true; // Creates an instance of a speech factory with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechTranslationConfig.FromSubscription(settings.SpeechKey, settings.SpeechRegion); config.SpeechRecognitionLanguage = "en-US"; translationStopRecognition = new TaskCompletionSource <int>(); Random rand = new Random(); string language = textLanguges.ElementAt(rand.Next(textLanguges.Keys.Count())).Key; config.AddTargetLanguage(language); using (var recognizer = new TranslationRecognizer(config)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { try { Debug.WriteLine($"Message received {e.Result.Text}"); string languageLong = textLanguges[e.Result.Translations.First().Key]; UpdateTranslationUI($"English: {e.Result.Text}", $"{languageLong}: {e.Result.Translations.First().Value}"); } catch (Exception) { // let it go } }; recognizer.Recognized += (s, e) => { var result = e.Result; }; recognizer.Canceled += (s, e) => { //NotifyUser($"An error occurred. Please step in front of camera to reactivate."); isTranslationListening = false; translationStopRecognition.TrySetResult(0); }; recognizer.SessionStopped += (s, e) => { //NotifyUser($"\n Session event. Event: {e.EventType.ToString()}."); // Stops recognition when session stop is detected. //NotifyUser($"\nStop recognition."); isTranslationListening = false; translationStopRecognition.TrySetResult(0); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); UpdateTranslationUI($"Warming Up Translation", ""); await Task.Delay(3500); UpdateTranslationUI($"Say Hi!", ""); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { translationStopRecognition.Task }); //NotifyUser($"Stopped listenint"); isTranslationListening = false; // Stops recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } catch (Exception ex) { // Exception caught let it go! } }
// Translation from microphone. public static async Task TranslationWithMicrophoneAsync_withLanguageDetectionEnabled() { // <TranslationWithMicrophoneAsync> // Translation source language. // Replace with a language of your choice. string fromLanguage = "en-US"; // Voice name of synthesis output. const string GermanVoice = "Microsoft Server Speech Text to Speech Voice (de-DE, Hedda)"; // Creates an instance of a speech translation config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechTranslationConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion"); // This is required, even when language id is enabled. config.SpeechRecognitionLanguage = fromLanguage; config.VoiceName = GermanVoice; // Translation target language(s). // Replace with language(s) of your choice. config.AddTargetLanguage("de"); // Set the language detection require property // Please refer to the documentation of language id with different modes config.SetProperty(PropertyId.SpeechServiceConnection_SingleLanguageIdPriority, "Latency"); var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "de-DE" }); // Creates a translation recognizer using microphone as audio input. using (var recognizer = new TranslationRecognizer(config, autoDetectSourceLanguageConfig)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { Console.WriteLine($"RECOGNIZED Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATED into '{element.Key}': {element.Value}"); } } else if (e.Result.Reason == ResultReason.RecognizedSpeech) { Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}"); Console.WriteLine($" Speech not translated."); } else if (e.Result.Reason == ResultReason.NoMatch) { Console.WriteLine($"NOMATCH: Speech could not be recognized."); } }; recognizer.Synthesizing += (s, e) => { var audio = e.Result.GetAudio(); Console.WriteLine(audio.Length != 0 ? $"AudioSize: {audio.Length}" : $"AudioSize: {audio.Length} (end of synthesis data)"); if (audio.Length > 0) { #if NET461 using (var m = new MemoryStream(audio)) { SoundPlayer simpleSound = new SoundPlayer(m); simpleSound.PlaySync(); } #endif } }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. Console.WriteLine("Say something..."); await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); do { Console.WriteLine("Press Enter to stop"); } while (Console.ReadKey().Key != ConsoleKey.Enter); // Stops continuous recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } // </TranslationWithMicrophoneAsync> }
public async Task <TranscriptUtterance> SpeechToTranslatedTextAsync(string audioUrl, string sourceLanguage, string targetLanguage) { Transcripts.Clear(); TranscriptUtterance utterance = null; var config = SpeechTranslationConfig.FromSubscription(_subscriptionKey, _region); config.SpeechRecognitionLanguage = sourceLanguage; config.AddTargetLanguage(targetLanguage); var stopTranslation = new TaskCompletionSource <int>(); using (var audioInput = await AudioUtils.DownloadWavFileAsync(audioUrl)) { using (var recognizer = new TranslationRecognizer(config, audioInput)) { // Subscribes to events. recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { utterance = new TranscriptUtterance { Recognition = e.Result.Text, Translation = e.Result.Translations.FirstOrDefault().Value, }; } else if (e.Result.Reason == ResultReason.NoMatch) { Trace.TraceError($"NOMATCH: Speech could not be translated."); } }; recognizer.Canceled += (s, e) => { if (e.Reason == CancellationReason.Error) { Trace.TraceError($"Failed to decode incoming text message: {e.ErrorDetails}"); } stopTranslation.TrySetResult(0); }; recognizer.SessionStopped += (s, e) => { Trace.TraceInformation("Session stopped event."); stopTranslation.TrySetResult(0); }; await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopTranslation.Task }); // Stops translation. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); return(utterance); } } }
public async Task TranslationContinuousRecognitionAsync( string fromLanguage = "en-US", string targetLanguage = "nl", string voice = "nl-NL-HannaRUS") { var config = GetSpeechClient(); config.SpeechRecognitionLanguage = fromLanguage; config.AddTargetLanguage(targetLanguage); config.VoiceName = voice; using var recognizer = new TranslationRecognizer(config); recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { Console.WriteLine($"\nFinal result: Reason: {e.Result.Reason.ToString()}, recognized text in {fromLanguage}: {e.Result.Text}."); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); SynthesisToSpeakerAsync(element.Value).Wait(); } } }; recognizer.Synthesizing += (s, e) => { var audio = e.Result.GetAudio(); Console.WriteLine(audio.Length != 0 ? $"AudioSize: {audio.Length}" : $"AudioSize: {audio.Length} (end of synthesis data)"); }; recognizer.Canceled += (s, e) => { Console.WriteLine($"\nRecognition canceled. Reason: {e.Reason}; ErrorDetails: {e.ErrorDetails}"); }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); }; Console.WriteLine("Say something..."); await recognizer.StartContinuousRecognitionAsync(); do { Console.WriteLine("Press Enter to stop"); } while (Console.ReadKey().Key != ConsoleKey.Enter); await recognizer.StopContinuousRecognitionAsync(); }
// Translation from microphone. public static async Task TranslationWithMicrophoneAsync() { // <TranslationWithMicrophoneAsync> // Translation source language. // Replace with a language of your choice. string fromLanguage = "en-US"; // Voice name of synthesis output. const string GermanVoice = "Microsoft Server Speech Text to Speech Voice (de-DE, Hedda)"; // Creates an instance of a speech translation config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechTranslationConfig.FromSubscription("", "westus"); config.SpeechRecognitionLanguage = fromLanguage; config.VoiceName = GermanVoice; // Translation target language(s). // Replace with language(s) of your choice. config.AddTargetLanguage("de"); // Creates a translation recognizer using microphone as audio input. using (var recognizer = new TranslationRecognizer(config)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { //Console.WriteLine($"RECOGNIZING in '{fromLanguage}': Text={e.Result.Text}"); //foreach (var element in e.Result.Translations) //{ // Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); //} }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { Console.WriteLine($"\nRECOGNIZED in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($"TRANSLATED into '{element.Key}': {element.Value}"); } } else if (e.Result.Reason == ResultReason.RecognizedSpeech) { //Console.WriteLine($"\nText={e.Result.Text}"); //Console.WriteLine($" Speech not translated."); } else if (e.Result.Reason == ResultReason.NoMatch) { //Console.WriteLine($"NOMATCH: Speech could not be recognized."); } }; recognizer.Synthesizing += (s, e) => { //var audio = e.Result.GetAudio(); //Console.WriteLine(audio.Length != 0 // ? $"AudioSize: {audio.Length}" // : $"AudioSize: {audio.Length} (end of synthesis data)"); //if (audio.Length > 0) //{ // #if NET461 // using (var m = new MemoryStream(audio)) // { // SoundPlayer simpleSound = new SoundPlayer(m); // simpleSound.PlaySync(); // } // #endif //} }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. Console.WriteLine("Say something..."); await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); do { Console.WriteLine("Press Enter to stop"); } while (Console.ReadKey().Key != ConsoleKey.Enter); // Stops continuous recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } // </TranslationWithMicrophoneAsync> }
public static async Task TranslationContinuousRecognitionAsync() { var config = SpeechTranslationConfig.FromSubscription("key", "region"); string fromLanguage = "en-US"; config.SpeechRecognitionLanguage = fromLanguage; config.AddTargetLanguage("de"); const string GermanVoice = "de-DE-Hedda"; config.VoiceName = GermanVoice; using (var recognizer = new TranslationRecognizer(config)) { recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING in '{fromLanguage}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { Console.WriteLine($"\nFinal result: Reason: {e.Result.Reason.ToString()}, recognized text in {fromLanguage}: {e.Result.Text}."); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } } }; recognizer.Synthesizing += (s, e) => { var audio = e.Result.GetAudio(); Console.WriteLine(audio.Length != 0 ? $"AudioSize: {audio.Length}" : $"AudioSize: {audio.Length} (end of synthesis data)"); }; recognizer.Canceled += (s, e) => { Console.WriteLine($"\nRecognition canceled. Reason: {e.Reason}; ErrorDetails: {e.ErrorDetails}"); }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); }; Console.WriteLine("Say something..."); await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); do { Console.WriteLine("Press Enter to stop"); } while (Console.ReadKey().Key != ConsoleKey.Enter); await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } }