private async void btnRecord_Click(object sender, EventArgs e) { btnRecord.BackColor = Color.LightGreen; // other fun language codes: //fr-FR //ja-JP //hi-IN //de-DE var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "fr-FR", "hi-IN" }); using (var recognizer = new SpeechRecognizer(SpeechConfig.FromSubscription("cb35ce20eade4be2a74a36ab2e9d0ac1", "eastus"), autoDetectSourceLanguageConfig)) { var speechRecognitionResult = await recognizer.RecognizeOnceAsync(); if (speechRecognitionResult.Reason == ResultReason.Canceled) { var cancellation = CancellationDetails.FromResult(speechRecognitionResult); MessageBox.Show("Error: " + cancellation); this.Close(); return; } var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(speechRecognitionResult); var detectedLanguage = autoDetectSourceLanguageResult.Language; btnRecord.BackColor = default(Color); // detectedLanguage passed on to the OptionsMenu form formOptionsMenu = new OptionsMenu(detectedLanguage); //pop up Options Menu formOptionsMenu.Show(); } btnRecord.Click += new EventHandler(this.btnRecord_Click); }
public async Task <string> DetectLanguage(byte[] audioBytes, string fileExtension, string locale1, string locale2) { var wavBytes = ConvertToWaveBytes(audioBytes, fileExtension); var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { locale1, locale2 }); var config = SpeechConfig.FromSubscription(SubscriptionKey, SubscriptionRegion); var stopRecognition = new TaskCompletionSource <int>(); var detected = new List <string>(); using var pushStream = AudioInputStream.CreatePushStream(); using (var audioInput = AudioConfig.FromStreamInput(pushStream)) { using var recognizer = new SpeechRecognizer( config, autoDetectSourceLanguageConfig, audioInput); pushStream.Write(wavBytes); pushStream.Close(); recognizer.Recognized += (s, e) => { var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result); var detectedLanguage = autoDetectSourceLanguageResult.Language; detected.Add(detectedLanguage); if (detected.Count > UtteranceCount) { stopRecognition.TrySetResult(0); } }; recognizer.SessionStopped += (s, e) => { stopRecognition.TrySetResult(0); }; await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); var t = Task.Factory.StartNew(async() => { await SetTimeOutForRecognition(stopRecognition).ConfigureAwait(false); }, CancellationToken.None, TaskCreationOptions.None, TaskScheduler.Default); Task.WaitAny(new[] { stopRecognition.Task }); await recognizer.StopKeywordRecognitionAsync().ConfigureAwait(false); } if (detected.Count == 0) { throw new TimeoutException("Did not get any language identification results back in time."); } var detectedByCount = detected.GroupBy(i => i); var mostFreq = detectedByCount.OrderBy(t => t.Count()).LastOrDefault().Key; if (string.IsNullOrEmpty(mostFreq) || (!mostFreq.Equals(locale1, StringComparison.OrdinalIgnoreCase) && !mostFreq.Equals(locale2, StringComparison.OrdinalIgnoreCase))) { return(locale1); } return(mostFreq); }
static SpeechRecognizer user_config_to_speech_recognizer(SpeechConfig speech_config, AudioConfig audio_config, USER_CONFIG user_config) { SpeechRecognizer speech_recognizer; if (true == user_config.language_id_enabled) { /* Note: Continuous language identification is supported only in C#, C++, and Python. * See: * https://docs.microsoft.com/azure/cognitive-services/speech-service/how-to-automatic-language-detection?pivots=programming-language-cpp#language-identification-with-speech-to-text */ AutoDetectSourceLanguageConfig detect_language_config = AutoDetectSourceLanguageConfig.FromLanguages(user_config.language_id_languages); speech_recognizer = new SpeechRecognizer(speech_config, detect_language_config, audio_config); } else { speech_recognizer = new SpeechRecognizer(speech_config, audio_config); } if (true == user_config.phrase_list_enabled) { PhraseListGrammar grammar = PhraseListGrammar.FromRecognizer(speech_recognizer); grammar.AddPhrase(user_config.phrase_list); } return(speech_recognizer); }
public async Task RecognizeSpeechAsync() { text = "Error"; language = "Error"; var config = SpeechConfig.FromSubscription("54e5c11f4ba84a95a282d180905efeb1", "westus"); var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "de-DE", "pl-PL" }); using var recognizer = new SpeechRecognizer(config, autoDetectSourceLanguageConfig); var result = await recognizer.RecognizeOnceAsync(); var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(result); var detectedLanguage = autoDetectSourceLanguageResult.Language; language = detectedLanguage; switch (result.Reason) { case ResultReason.RecognizedSpeech: text = result.Text; break; case ResultReason.NoMatch: text = $"NOMATCH: Rozpoznanie nie udało się."; break; case ResultReason.Canceled: var cancellation = CancellationDetails.FromResult(result); if (cancellation.Reason == CancellationReason.Error) { Debug.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); Debug.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}"); Debug.WriteLine($"CANCELED: Did you update the subscription info?"); } text = $"CANCELED: Reason={cancellation.Reason}"; break; } }
public static async Task RecognizeLng() { SpeechConfig speechConfig = SpeechConfig.FromEndpoint(new System.Uri(ConfigurationManager.AppSettings.Get("SpeechEndpoint")), ConfigurationManager.AppSettings.Get("TTSKey")); AudioConfig audioConfig = AudioConfig.FromDefaultSpeakerOutput(); AutoDetectSourceLanguageConfig autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig .FromLanguages(new string[] { "en-US", "ru-RU" }); using (var recognizer = new SpeechRecognizer( speechConfig, autoDetectSourceLanguageConfig, audioConfig)) { Console.WriteLine("Say something..."); var speechRecognitionResult = await recognizer.RecognizeOnceAsync(); var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(speechRecognitionResult); var detectedLng = autoDetectSourceLanguageResult.Language; Console.WriteLine("I recognized " + speechRecognitionResult.Text + " in " + detectedLng); } }
public VRHandler(Label label, ActivityIndicator indicator) { micService = DependencyService.Resolve <IMicrophoneService>(); this.bleHandler = BLEHandler.GetHandler(); this.drivingHandler = DrivingHandler.GetHandler(); this.lText = label; this.indicator = indicator; // initialize speech recognizer if (recognizer == null) { var config = SpeechConfig.FromSubscription(Constants.CognitiveServicesApiKey, Constants.CognitiveServicesRegion); //string[] languages = { "cs-CZ", "en-US", "en-GB" }; AutoDetectSourceLanguageConfig lang_config = AutoDetectSourceLanguageConfig.FromLanguages(Constants.GetLanguages); recognizer = new SpeechRecognizer(config, lang_config); recognizer.Recognized += (obj, args) => { Process(args.Result.Text); }; } }
// // Create SpeechRecognizer // private SpeechRecognizer SpeechRecognizerFromUserConfig() { AudioConfig audioConfig = AudioConfigFromUserConfig(); SpeechConfig speechConfig = SpeechConfigFromUserConfig(); SpeechRecognizer speechRecognizer; if (userConfig.languageIDLanguages is string[] languageIDLanguagesValue) { var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(languageIDLanguagesValue); speechRecognizer = new SpeechRecognizer(speechConfig, autoDetectSourceLanguageConfig, audioConfig); } else { speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig); } if (this.userConfig.phraseList is string phraseListValue) { var grammar = PhraseListGrammar.FromRecognizer(speechRecognizer); grammar.AddPhrase(phraseListValue); } return(speechRecognizer); }
public async IAsyncEnumerable <RecognitionWord> ContinuousRecognition(IWaveProvider audioSource, [EnumeratorCancellation] CancellationToken cancellation, IAsyncEnumerable <string>?sourceLangs, IAsyncEnumerable <string>?phrases) { var config = SpeechConfig.FromSubscription(_key, _region); var audioConfig = AudioConfig.FromStreamInput(new PullAdapter(audioSource, 24000), AudioStreamFormat.GetWaveFormatPCM(24000, 16, 1)); using var recogniser = new SpeechRecognizer(config, AutoDetectSourceLanguageConfig.FromLanguages(await(sourceLangs ?? Array.Empty <string>().ToAsyncEnumerable()).Append("en-GB").ToArrayAsync(cancellation)), audioConfig ); // Add some likely words to the phrase dictionary var phraseList = PhraseListGrammar.FromRecognizer(recogniser); phraseList.AddPhrase("mute"); phraseList.AddPhrase("discord"); phraseList.AddPhrase("stop"); if (phrases != null) { await foreach (var phrase in phrases.WithCancellation(cancellation)) { phraseList.AddPhrase(phrase); } } // Subscribe to recogniser results var results = new ConcurrentQueue <RecognitionWord>(); recogniser.Recognized += (_, e) => { if (e.Result.Reason == ResultReason.RecognizedSpeech) { results.Enqueue(new RecognitionWord(e.Result.Text)); } else if (e.Result.Reason == ResultReason.NoMatch) { results.Enqueue(new RecognitionWord(null)); } }; recogniser.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { results.Enqueue(new RecognitionWord($"CANCELED: ErrorCode={e.ErrorCode}")); results.Enqueue(new RecognitionWord($"CANCELED: ErrorDetails={e.ErrorDetails}")); results.Enqueue(new RecognitionWord($"CANCELED: Did you update the subscription info?")); } }; recogniser.SessionStarted += (_, e) => { results.Enqueue(new RecognitionWord("Session_started_event.")); }; var stopped = false; recogniser.SessionStopped += (_, e) => { results.Enqueue(new RecognitionWord("Session_stopped_event.")); stopped = true; }; // Return recognised results until cancelled await recogniser.StartContinuousRecognitionAsync(); while (!cancellation.IsCancellationRequested && !stopped) { if (results.TryDequeue(out var r)) { yield return(r); } } // Stop receiving further results await recogniser.StopContinuousRecognitionAsync(); // Finish sending remaining results foreach (var result in results) { yield return(result); } }
// Speech recognition with auto detection for source language public static async Task RecognitionWithAutoDetectSourceLanguageAsync() { // Creates an instance of a speech config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion"); // Creates an instance of AutoDetectSourceLanguageConfig with the 2 source language candidates // Currently this feature only supports 2 different language candidates // Replace the languages to be the language candidates for your speech. Please see https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support for all supported langauges var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "de-DE", "fr-FR" }); var stopRecognition = new TaskCompletionSource <int>(); // Creates a speech recognizer using the auto detect source language config, and the file as audio input. // Replace with your own audio file name. using (var audioInput = AudioConfig.FromWavFileInput(@"whatstheweatherlike.wav")) { using (var recognizer = new SpeechRecognizer(config, autoDetectSourceLanguageConfig, audioInput)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { if (e.Result.Reason == ResultReason.RecognizingSpeech) { Console.WriteLine($"RECOGNIZING: Text={e.Result.Text}"); // Retrieve the detected language var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result); Console.WriteLine($"DETECTED: Language={autoDetectSourceLanguageResult.Language}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.RecognizedSpeech) { Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}"); // Retrieve the detected language var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result); Console.WriteLine($"DETECTED: Language={autoDetectSourceLanguageResult.Language}"); } else if (e.Result.Reason == ResultReason.NoMatch) { Console.WriteLine($"NOMATCH: Speech could not be recognized."); } }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } stopRecognition.TrySetResult(0); }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\n Session started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\n Session stopped event."); Console.WriteLine("\nStop recognition."); stopRecognition.TrySetResult(0); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopRecognition.Task }); // Stops recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } }
public static async Task RunAsync([EventGridTrigger] EventGridEvent eventGridEvent, ILogger log) { //Extracting content type and url of the blob triggering the function var jsondata = JsonConvert.SerializeObject(eventGridEvent.Data); var tmp = new { contentType = "", url = "" }; var data = JsonConvert.DeserializeAnonymousType(jsondata, tmp); //Checking if the trigger was iniatiated for a WAV File. if (data.contentType == "audio/wav") { var audioUrl = data.url; string blobName = audioUrl.Split('/').Last(); string contosoStorageConnectionString = System.Environment.GetEnvironmentVariable("ContosoStorageConnectionString", EnvironmentVariableTarget.Process); string speechRegion = System.Environment.GetEnvironmentVariable("SpeechRegion", EnvironmentVariableTarget.Process); string speechKey = System.Environment.GetEnvironmentVariable("SpeechKey", EnvironmentVariableTarget.Process); string translatorKey = System.Environment.GetEnvironmentVariable("TranslatorKey", EnvironmentVariableTarget.Process); string translatorEndpoint = System.Environment.GetEnvironmentVariable("TranslatorEndpoint", EnvironmentVariableTarget.Process); string translatorLocation = System.Environment.GetEnvironmentVariable("TranslatorLocation", EnvironmentVariableTarget.Process); string cosmosEndpointUrl = System.Environment.GetEnvironmentVariable("CosmosDBEndpointUrl", EnvironmentVariableTarget.Process); string cosmosPrimaryKey = System.Environment.GetEnvironmentVariable("CosmosDBPrimaryKey", EnvironmentVariableTarget.Process); string textAnalyticsKey = System.Environment.GetEnvironmentVariable("TextAnalyticsKey", EnvironmentVariableTarget.Process); string textAnalyticsEndpoint = System.Environment.GetEnvironmentVariable("TextAnalyticsEndpoint", EnvironmentVariableTarget.Process); // Download audio file to a local temp directory var tempPath = System.IO.Path.GetTempFileName(); BlobContainerClient container = new BlobContainerClient(contosoStorageConnectionString, "audiorecordings"); BlobClient blob = container.GetBlobClient(blobName); await blob.DownloadToAsync(tempPath); var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion); speechConfig.SetProperty(PropertyId.SpeechServiceConnection_SingleLanguageIdPriority, "Latency"); // Audio Language Identification // Considering only two languages: English and Spanish // Languages supported for language detection : https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "es-MX" }); string languageDetected = "en-US"; using (var audioInput = AudioConfig.FromWavFileInput(tempPath)) { using (var recognizer = new SourceLanguageRecognizer(speechConfig, autoDetectSourceLanguageConfig, audioInput)) { var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false); if (result.Reason == ResultReason.RecognizedSpeech) { var lidResult = AutoDetectSourceLanguageResult.FromResult(result); languageDetected = lidResult.Language; } } } speechConfig.SpeechRecognitionLanguage = languageDetected; // Audio Transcription StringBuilder sb = new StringBuilder(); using var audioConfig = AudioConfig.FromWavFileInput(tempPath); { using var recognizer = new SpeechRecognizer(speechConfig, audioConfig); { var stopRecognition = new TaskCompletionSource <int>(); recognizer.SessionStopped += (s, e) => { stopRecognition.TrySetResult(0); }; recognizer.Canceled += (s, e) => { stopRecognition.TrySetResult(0); }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.RecognizedSpeech) { sb.Append(e.Result.Text); } else if (e.Result.Reason == ResultReason.NoMatch) { log.LogInformation($"NOMATCH: Speech could not be recognized."); } }; await recognizer.StartContinuousRecognitionAsync(); Task.WaitAny(new[] { stopRecognition.Task }); } } string transcribedText = sb.ToString(); // If transcription is in Spanish we will translate it to English if (!languageDetected.Contains("en")) { string route = $"/translate?api-version=3.0&to=en"; string textToTranslate = sb.ToString(); object[] body = new object[] { new { Text = textToTranslate } }; var requestBody = JsonConvert.SerializeObject(body); using (var client = new HttpClient()) using (var request = new HttpRequestMessage()) { request.Method = HttpMethod.Post; request.RequestUri = new Uri(translatorEndpoint + route); request.Content = new StringContent(requestBody, Encoding.UTF8, "application/json"); request.Headers.Add("Ocp-Apim-Subscription-Key", translatorKey); request.Headers.Add("Ocp-Apim-Subscription-Region", translatorLocation); HttpResponseMessage response = await client.SendAsync(request).ConfigureAwait(false); var responseBody = await response.Content.ReadAsStringAsync(); List <Model.TranslatorService.Root> translatedDocuments = JsonConvert.DeserializeObject <List <Model.TranslatorService.Root> >(responseBody); transcribedText = translatedDocuments.FirstOrDefault().Translations.FirstOrDefault().Text; } } //TODO:Azure Text Analytics for Healthcare //Insert documents into CosmosDB var cosmosClient = new CosmosClient(cosmosEndpointUrl, cosmosPrimaryKey); var cosmosDatabase = (await cosmosClient.CreateDatabaseIfNotExistsAsync("Contoso")).Database; var cosmosContainer = (await cosmosDatabase.CreateContainerIfNotExistsAsync("Transcriptions", "/id")).Container; Model.Transcription newTranscription = new Model.Transcription(); newTranscription.Id = Guid.NewGuid().ToString(); newTranscription.DocumentDate = new DateTime(int.Parse(blobName.Substring(0, 4)), int.Parse(blobName.Substring(4, 2)), int.Parse(blobName.Substring(6, 2))); newTranscription.FileName = blobName; newTranscription.TranscribedText = transcribedText; foreach (var item in healthcareResult.Entities) { newTranscription.HealthcareEntities.Add(new Model.HealthcareEntity() { Category = item.Category, Text = item.Text }); } try { ItemResponse <Model.Transcription> cosmosResponse = await cosmosContainer.CreateItemAsync(newTranscription, new PartitionKey(newTranscription.Id)); } catch (CosmosException ex) when(ex.StatusCode == System.Net.HttpStatusCode.Conflict) { //Conflicting documents are silently ignored for demo purposes. } System.IO.File.Delete(tempPath); log.LogInformation(eventGridEvent.Data.ToString()); } }
/// <summary> /// Speech recognition with auto detection for source language with universal v2 endpoint /// We only support multi-lingual continuous recognition in universal v2 endpoint /// </summary> public static async Task MultiLingualRecognitionWithUniversalV2Endpiont() { // Offical v2 endpoint // Replace the region with your service region var v2EndpointInString = String.Format("wss://{0}.stt.speech.microsoft.com/speech/universal/v2", "YourServiceRegion"); var v2EndpointUrl = new Uri(v2EndpointInString); // Creates an instance of a speech config with specified subscription key. // Replace the subscription key with your subscription key var config = SpeechConfig.FromEndpoint(v2EndpointUrl, "YourSubscriptionKey"); // Please refer to the documentation of language id with different modes config.SetProperty(PropertyId.SpeechServiceConnection_ContinuousLanguageIdPriority, "Latency"); // Creates an instance of AutoDetectSourceLanguageConfig with the 2 source language candidates // Currently this feature only supports 2 different language candidates // Replace the languages to be the language candidates for your speech. Please see https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support for all supported langauges var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "zh-CN" }); var stopRecognition = new TaskCompletionSource <int>(); // Creates a speech recognizer using the auto detect source language config, and the file as audio input. // Replace with your own audio file name. using (var audioInput = AudioConfig.FromWavFileInput(@"en-us_zh-cn.wav")) { using (var recognizer = new SpeechRecognizer(config, autoDetectSourceLanguageConfig, audioInput)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { if (e.Result.Reason == ResultReason.RecognizingSpeech) { Console.WriteLine($"RECOGNIZING: Text={e.Result.Text}"); // Retrieve the detected language var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result); Console.WriteLine($"DETECTED: Language={autoDetectSourceLanguageResult.Language}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.RecognizedSpeech) { Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}"); // Retrieve the detected language var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result); Console.WriteLine($"DETECTED: Language={autoDetectSourceLanguageResult.Language}"); } else if (e.Result.Reason == ResultReason.NoMatch) { Console.WriteLine($"NOMATCH: Speech could not be recognized."); } }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } stopRecognition.TrySetResult(0); }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\n Session started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\n Session stopped event."); Console.WriteLine("\nStop recognition."); stopRecognition.TrySetResult(0); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopRecognition.Task }); // Stops recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } }
public async Task Start(SpeechRecognitionOptions options) { SpeechRecognizer recognizer = null; try { Logger.LogInformation("Starting speech recognition"); var credentials = this.Credentials; var speechConfig = SpeechConfig.FromEndpoint(new Uri($"wss://{credentials.Region}.stt.speech.microsoft.com/speech/universal/v2"), credentials.SubscriptionKey); speechConfig.SetProfanity(ProfanityOption.Raw); if (options.Languages.Count > 1) { //enable continuous language detection when we have more than 1 language //this seems kind of buggy though, at times the speech recognition just simply doesn't work at all when this is enabled speechConfig.SetProperty(PropertyId.SpeechServiceConnection_ContinuousLanguageIdPriority, "Latency"); } var languageConfig = AutoDetectSourceLanguageConfig.FromLanguages(options.Languages.Select(lang => { //convert language selections if (lang.Length == 2) { //two-letter code. select some default five-letter code instead. if (lang == "en") { lang = "en-US"; } else { lang = lang + "-" + lang.ToUpperInvariant(); } } return(lang); }).ToArray()); recognizer = new SpeechRecognizer(speechConfig, languageConfig, AudioConfig); //set up the special phrases if any if (options.Phrases?.Count > 0) { var phrases = PhraseListGrammar.FromRecognizer(recognizer); foreach (var phrase in options.Phrases) { phrases.AddPhrase(phrase); } } //prepare events recognizer.Canceled += (sender, e) => { SpeechRecognizer = null; Dispose(Disposables); if (e.ErrorCode == CancellationErrorCode.Forbidden || e.ErrorCode == CancellationErrorCode.AuthenticationFailure) { //out of quota (or invalid key, try the next one anyway) int credentialsIndexCurrent = CredentialsIndex; if (NextCredentials()) { Logger.LogInformation($"Out of quota for credentials {credentialsIndexCurrent}. Restarting with {CredentialsIndex}"); Threading.Tasks.FireAndForget(() => Start(options)); return; } } if (e.Reason != CancellationReason.EndOfStream && e.Reason != CancellationReason.CancelledByUser) { Logger.LogWarning($"Recognition stopped. reason={e.Reason}, erroCode={e.ErrorCode}, details={e.ErrorDetails}"); } Stopped?.Invoke(this, new SpeechRecognitionStoppedEvent() { Message = $"{e.ErrorCode}: {e.ErrorDetails}" }); }; recognizer.Recognizing += (sender, e) => { OnSpeechEvent(e, false); }; recognizer.Recognized += (sender, e) => { OnSpeechEvent(e, true); }; recognizer.SpeechEndDetected += (sender, e) => { StreamAudioNoiseGate?.OnAudioStop(); }; //start recognizing await recognizer.StartContinuousRecognitionAsync(); //start our audio source if (!IsRunning && StreamAudioSource != null) { await StreamAudioSource.Start(); } } catch (Exception e) { Logger.LogError(e, "Could not start continuous recognition"); recognizer?.Dispose(); throw; } SpeechRecognizer = recognizer; IsRunning = true; Disposables.Add(recognizer); }
// Translation using multi-lingual file input. public static async Task TranslationWithMultiLingualFileAsync_withLanguageDetectionEnabled() { // <TranslationWithFileAsync> // Offical v2 endpoint with service region // Please replace the service region with your region var v2EndpointInString = String.Format("wss://{0}.stt.speech.microsoft.com/speech/universal/v2", "YourServiceRegion"); var v2EndpointUrl = new Uri(v2EndpointInString); // Creates an instance of a speech translation config with specified subscription key and service region. // Please replace the service subscription key with your subscription key var config = SpeechTranslationConfig.FromEndpoint(v2EndpointUrl, "YourSubscriptionKey"); // Sets source languages // The source language will be detected by the language detection feature. // However, the SpeechRecognitionLanguage still need to set with a locale string, but it will not be used as the source language. // This will be fixed in a future version of Speech SDK. string fromLanguage = "en-US"; config.SpeechRecognitionLanguage = fromLanguage; // Translation target language(s). // Replace with language(s) of your choice. config.AddTargetLanguage("de"); config.AddTargetLanguage("fr"); // Setup Language id property // Please refer to the documentation of language id with different modes config.SetProperty(PropertyId.SpeechServiceConnection_ContinuousLanguageIdPriority, "Latency"); var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "zh-CN" }); var stopTranslation = new TaskCompletionSource <int>(); // Creates a translation recognizer using file as audio input. // Replace with your own audio file name. using (var audioInput = AudioConfig.FromWavFileInput(@"en-us_zh-cn.wav")) { using (var recognizer = new TranslationRecognizer(config, autoDetectSourceLanguageConfig, audioInput)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { // Note: the detected language result only available in the v2 endpoint var lidResult = e.Result.Properties.GetProperty(PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult); Console.WriteLine($"RECOGNIZING in '{lidResult}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { var lidResult = e.Result.Properties.GetProperty(PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult); Console.WriteLine($"RECOGNIZED in '{lidResult}': Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATED into '{element.Key}': {element.Value}"); } } else if (e.Result.Reason == ResultReason.RecognizedSpeech) { Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}"); Console.WriteLine($" Speech not translated."); } else if (e.Result.Reason == ResultReason.NoMatch) { Console.WriteLine($"NOMATCH: Speech could not be recognized."); } }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } stopTranslation.TrySetResult(0); }; recognizer.SpeechStartDetected += (s, e) => { Console.WriteLine("\nSpeech start detected event."); }; recognizer.SpeechEndDetected += (s, e) => { Console.WriteLine("\nSpeech end detected event."); }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); Console.WriteLine($"\nStop translation."); stopTranslation.TrySetResult(0); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. Console.WriteLine("Start translation..."); await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopTranslation.Task }); // Stops translation. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } // </TranslationWithFileAsync> }
// Translation from microphone. public static async Task TranslationWithMicrophoneAsync_withLanguageDetectionEnabled() { // <TranslationWithMicrophoneAsync> // Translation source language. // Replace with a language of your choice. string fromLanguage = "en-US"; // Voice name of synthesis output. const string GermanVoice = "Microsoft Server Speech Text to Speech Voice (de-DE, Hedda)"; // Creates an instance of a speech translation config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechTranslationConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion"); // This is required, even when language id is enabled. config.SpeechRecognitionLanguage = fromLanguage; config.VoiceName = GermanVoice; // Translation target language(s). // Replace with language(s) of your choice. config.AddTargetLanguage("de"); // Set the language detection require property // Please refer to the documentation of language id with different modes config.SetProperty(PropertyId.SpeechServiceConnection_SingleLanguageIdPriority, "Latency"); var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "de-DE" }); // Creates a translation recognizer using microphone as audio input. using (var recognizer = new TranslationRecognizer(config, autoDetectSourceLanguageConfig)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { Console.WriteLine($"RECOGNIZING Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}"); } }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.TranslatedSpeech) { Console.WriteLine($"RECOGNIZED Text={e.Result.Text}"); foreach (var element in e.Result.Translations) { Console.WriteLine($" TRANSLATED into '{element.Key}': {element.Value}"); } } else if (e.Result.Reason == ResultReason.RecognizedSpeech) { Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}"); Console.WriteLine($" Speech not translated."); } else if (e.Result.Reason == ResultReason.NoMatch) { Console.WriteLine($"NOMATCH: Speech could not be recognized."); } }; recognizer.Synthesizing += (s, e) => { var audio = e.Result.GetAudio(); Console.WriteLine(audio.Length != 0 ? $"AudioSize: {audio.Length}" : $"AudioSize: {audio.Length} (end of synthesis data)"); if (audio.Length > 0) { #if NET461 using (var m = new MemoryStream(audio)) { SoundPlayer simpleSound = new SoundPlayer(m); simpleSound.PlaySync(); } #endif } }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } }; recognizer.SessionStarted += (s, e) => { Console.WriteLine("\nSession started event."); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\nSession stopped event."); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. Console.WriteLine("Say something..."); await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); do { Console.WriteLine("Press Enter to stop"); } while (Console.ReadKey().Key != ConsoleKey.Enter); // Stops continuous recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } // </TranslationWithMicrophoneAsync> }
private AutoDetectSourceLanguageConfig AutoMaticLanguageDetection(string[] languagesToDetect) => AutoDetectSourceLanguageConfig.FromLanguages(languagesToDetect);
private async static Task RecognizeSpeechAsync() { var config = SpeechConfig.FromEndpoint( new Uri("https://eastus2.api.cognitive.microsoft.com/sts/v1.0/issuetoken"), "MySuscriptionKey"); //config.SetProperty("ConversationTranscriptionInRoomAndOnline", "true"); //config.RequestWordLevelTimestamps(); //config.EnableAudioLogging(); //config.SpeechSynthesisLanguage = "en-US"; //config.SpeechRecognitionLanguage = "en-US"; var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages( new string[] { "en-US", "es-MX", "pt-BR", "fr-FR" }); using var inputConfig = AudioConfig.FromDefaultMicrophoneInput(); using var speechRecognition = new SpeechRecognizer(config, autoDetectSourceLanguageConfig, inputConfig); var endRecognition = new TaskCompletionSource <int>(); speechRecognition.Recognized += (s, e) => { switch (e.Result.Reason) { case ResultReason.NoMatch: if (!endRecognition.Task.IsCompleted) { Console.WriteLine($"::{e.Result.Text}"); } break; case ResultReason.Canceled: Console.WriteLine($":x:{e.Result.Text}"); break; case ResultReason.RecognizingSpeech: Console.WriteLine($":..:{e.Result.Text}"); break; case ResultReason.RecognizedSpeech: Console.WriteLine($">:{e.Result.Text}"); break; case ResultReason.RecognizedIntent: Console.WriteLine($"#:{e.Result.Text}"); Console.WriteLine($"Saliendo ...."); endRecognition.TrySetResult(0); break; default: Console.WriteLine($"*:{e.Result.Reason}"); break; } }; speechRecognition.Canceled += (s, e) => { if (e.Reason == CancellationReason.Error) { Console.WriteLine($"ocurrió un error:{e.ErrorCode} => {e.ErrorDetails}"); } endRecognition.TrySetResult(0); }; speechRecognition.SessionStopped += (s, e) => { Console.WriteLine("Deteniendo"); endRecognition.TrySetResult(0); }; await speechRecognition.StartContinuousRecognitionAsync().ConfigureAwait(false); Task.WaitAny(new[] { endRecognition.Task }); await speechRecognition.StopContinuousRecognitionAsync().ConfigureAwait(false); }
public async Task SpeechToText(Message message) { string fileId = message.Voice == null ? message.Audio.FileId : message.Voice.FileId; var file = await TelegramBotClient.GetFileAsync(fileId); string pathToAudio = GetFilePath(file.FileUniqueId, file.FilePath); var saveFileStream = File.Open(pathToAudio, FileMode.Create); TelegramBotClient.DownloadFileAsync(file.FilePath, saveFileStream).Wait(); saveFileStream.Close(); string fileType = message.Type == MessageType.Audio ? "file" : "voice"; await TelegramBotClient.SendTextMessageAsync( chatId : message.Chat.Id, text : $"We started processing your {fileType} wait couple minute" ); if (message.Type == MessageType.Voice && File.Exists(pathToAudio) && !File.Exists(Path.ChangeExtension(pathToAudio, ".ogg"))) { File.Move(pathToAudio, Path.ChangeExtension(pathToAudio, ".ogg")); pathToAudio = Path.ChangeExtension(pathToAudio, ".ogg"); } string jobEndpoint = ZamzarSettings.BaseAPIUrl + "jobs"; var zamzarJob = ZamzarHelper.Upload <ZamzarJobResponseModel>(ZamzarSettings.SecretKey, jobEndpoint, pathToAudio, "wav").Result; string getStatusConvertingEndpoint = $"{jobEndpoint}/{zamzarJob.Id}"; while (true) { zamzarJob = ZamzarHelper.GetSimpleResponse <ZamzarJobResponseModel>(ZamzarSettings.SecretKey, getStatusConvertingEndpoint).Result; if (zamzarJob.Status == "successful") { break; } } string downloadConvertedFileEndpoint = $"{ZamzarSettings.BaseAPIUrl}files/{zamzarJob.TargetFiles.First().Id}/content"; pathToAudio = Path.ChangeExtension(pathToAudio, ".wav"); await ZamzarHelper.Download(ZamzarSettings.SecretKey, downloadConvertedFileEndpoint, pathToAudio); var config = SpeechConfig.FromSubscription(AzureSpeechToText.SubscriptionKey, AzureSpeechToText.Region); var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(ZamzarSettings.Languages); using var audioInput = AudioConfig.FromWavFileInput(pathToAudio); using var recognizer = new SpeechRecognizer(config, autoDetectSourceLanguageConfig, audioInput); var speechRecognitionResult = await recognizer.RecognizeOnceAsync(); string result = null; switch (speechRecognitionResult.Reason) { case ResultReason.RecognizedSpeech: result = speechRecognitionResult.Text; break; case ResultReason.NoMatch: result = "Speech could not be recognized"; break; case ResultReason.Canceled: var cancellation = CancellationDetails.FromResult(speechRecognitionResult); result = cancellation.ErrorDetails; break; } await TelegramBotClient.SendTextMessageAsync( chatId : message.Chat.Id, text : result ); }
private async static Task RecognizeRecordFileAsync() { var config = SpeechConfig.FromEndpoint( new Uri("https://eastus2.api.cognitive.microsoft.com/sts/v1.0/issuetoken"), "MySuscriptionKey"); //config.SpeechRecognitionLanguage = "en-US"; //config.EnableDictation(); config.RequestWordLevelTimestamps(); config.EnableAudioLogging(); var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages( new string[] { "en-US", "es-MX", "fr-FR", "pt-BR" }); //using (var recognizer = new SpeechRecognizer( // config, // autoDetectSourceLanguageConfig, // audioConfig)) //{ // var speechRecognitionResult = await recognizer.RecognizeOnceAsync(); // var autoDetectSourceLanguageResult = // AutoDetectSourceLanguageResult.FromResult(speechRecognitionResult); // var detectedLanguage = autoDetectSourceLanguageResult.Language; //} using var inputConfig = AudioConfig.FromWavFileInput(@"D:\Downloads\Llamada con Jorge y 2 más (1).wav"); using var speechRecognition = new SpeechRecognizer(config, autoDetectSourceLanguageConfig, inputConfig); //var result = await speechRecognition.RecognizeOnceAsync(); //switch (result.Reason) //{ // case ResultReason.NoMatch: // Console.WriteLine($"No entendí na':{result.Text}"); // break; // case ResultReason.Canceled: // Console.WriteLine($":x:{result.Text}"); // break; // case ResultReason.RecognizingSpeech: // Console.WriteLine($"...:{result.Text}"); // break; // case ResultReason.RecognizedSpeech: // Console.WriteLine($">:{result.Text}"); // break; // case ResultReason.RecognizedIntent: // Console.WriteLine($"Detectado comando de voz:{result.Text}"); // Console.WriteLine($"Saliendo ...."); // break; // default: // Console.WriteLine($"LLegué aquí porque:{result.Reason}"); // break; //} var endRecognition = new TaskCompletionSource <int>(); speechRecognition.Recognized += (s, e) => { switch (e.Result.Reason) { case ResultReason.NoMatch: if (!endRecognition.Task.IsCompleted) { Console.WriteLine($"::{e.Result.Text}"); } break; case ResultReason.Canceled: Console.WriteLine($":x:{e.Result.Text}"); break; case ResultReason.RecognizingSpeech: Console.WriteLine($":..:{e.Result.Text}"); break; case ResultReason.RecognizedSpeech: Console.WriteLine($">:{e.Result.Text}"); break; case ResultReason.RecognizedIntent: Console.WriteLine($"#:{e.Result.Text}"); Console.WriteLine($"Saliendo ...."); endRecognition.TrySetResult(0); break; default: Console.WriteLine($"*:{e.Result.Reason}"); break; } }; speechRecognition.Canceled += (s, e) => { if (e.Reason == CancellationReason.Error) { Console.WriteLine($"ocurrió un error:{e.ErrorCode} => {e.ErrorDetails}"); } endRecognition.TrySetResult(0); }; speechRecognition.SessionStopped += (s, e) => { Console.WriteLine("Deteniendo"); endRecognition.TrySetResult(0); }; await speechRecognition.StartContinuousRecognitionAsync().ConfigureAwait(false); Task.WaitAny(new[] { endRecognition.Task }); await speechRecognition.StopContinuousRecognitionAsync().ConfigureAwait(false); }