Beispiel #1
0
        public async Task <string> SynthesisToSpeakerAsync(string text)
        {
            var config = GetClient();
            var sb     = new StringBuilder();


            using var synthesizer = new SpeechSynthesizer(config,
                                                          AutoDetectSourceLanguageConfig.FromOpenRange(),
                                                          AudioConfig.FromDefaultSpeakerOutput());

            using var result = await synthesizer.SpeakTextAsync(text);

            if (result.Reason == ResultReason.SynthesizingAudioCompleted)
            {
                sb.AppendLine($"Speech synthesized to speaker for text [{text}]");
            }
            else if (result.Reason == ResultReason.Canceled)
            {
                var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
                sb.AppendLine($"CANCELED: Reason={cancellation.Reason}");

                if (cancellation.Reason == CancellationReason.Error)
                {
                    sb.AppendLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                    sb.AppendLine($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]");
                    sb.AppendLine($"CANCELED: Did you update the subscription info?");
                }
            }
            return(sb.ToString());
        }
Beispiel #2
0
 public SpeechToText(string[] languages)
 {
     _speechConfig = SpeechConfig.FromSubscription(Settings.AzureSpeech.KEY, Settings.AzureSpeech.REGION);
     _speechConfig.RequestWordLevelTimestamps();
     _languagesToDetect = AutoMaticLanguageDetection(languages);
     _stopRecognition   = new TaskCompletionSource <int>();
 }
Beispiel #3
0
        private static async Task SynthesizeSpeechAsync(IConfiguration config, FileInfo textFile)
        {
            var text = await File.ReadAllLinesAsync(textFile.FullName);

            int contineWith = 0;
            int counter     = contineWith;

            foreach (var page in text.Page(25).Skip(contineWith))
            {
                var outputFile = Path.Combine(textFile.DirectoryName, $"{Path.GetFileNameWithoutExtension(textFile.Name)}{counter:D4}.wav");

                Log($"synthesizing page {counter}", ConsoleColor.Gray);

                var speechConfig = SpeechConfig.FromSubscription(config["SubscriptionKey"], config["Region"]);
                using var speech = new SpeechSynthesizer(speechConfig,
                                                         AutoDetectSourceLanguageConfig.FromOpenRange(),
                                                         AudioConfig.FromWavFileOutput(outputFile));

                string textToConvert = string.Join(Environment.NewLine, page);
                var    result        = await speech.SpeakTextAsync(textToConvert);

                if (result.Reason != ResultReason.SynthesizingAudioCompleted)
                {
                    throw new Exception(result.Reason.ToString());
                }

                counter++;
            }
        }
        public async Task <string> DetectLanguage(byte[] audioBytes, string fileExtension, string locale1, string locale2)
        {
            var wavBytes = ConvertToWaveBytes(audioBytes, fileExtension);

            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { locale1, locale2 });

            var config          = SpeechConfig.FromSubscription(SubscriptionKey, SubscriptionRegion);
            var stopRecognition = new TaskCompletionSource <int>();
            var detected        = new List <string>();

            using var pushStream = AudioInputStream.CreatePushStream();
            using (var audioInput = AudioConfig.FromStreamInput(pushStream))
            {
                using var recognizer = new SpeechRecognizer(
                          config,
                          autoDetectSourceLanguageConfig,
                          audioInput);
                pushStream.Write(wavBytes);
                pushStream.Close();

                recognizer.Recognized += (s, e) =>
                {
                    var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result);
                    var detectedLanguage = autoDetectSourceLanguageResult.Language;
                    detected.Add(detectedLanguage);
                    if (detected.Count > UtteranceCount)
                    {
                        stopRecognition.TrySetResult(0);
                    }
                };

                recognizer.SessionStopped += (s, e) =>
                {
                    stopRecognition.TrySetResult(0);
                };

                await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                var t = Task.Factory.StartNew(async() => { await SetTimeOutForRecognition(stopRecognition).ConfigureAwait(false); }, CancellationToken.None, TaskCreationOptions.None, TaskScheduler.Default);

                Task.WaitAny(new[] { stopRecognition.Task });

                await recognizer.StopKeywordRecognitionAsync().ConfigureAwait(false);
            }

            if (detected.Count == 0)
            {
                throw new TimeoutException("Did not get any language identification results back in time.");
            }

            var detectedByCount = detected.GroupBy(i => i);
            var mostFreq        = detectedByCount.OrderBy(t => t.Count()).LastOrDefault().Key;

            if (string.IsNullOrEmpty(mostFreq) || (!mostFreq.Equals(locale1, StringComparison.OrdinalIgnoreCase) && !mostFreq.Equals(locale2, StringComparison.OrdinalIgnoreCase)))
            {
                return(locale1);
            }

            return(mostFreq);
        }
        private async void btnRecord_Click(object sender, EventArgs e)
        {
            btnRecord.BackColor = Color.LightGreen;
            // other fun language codes:
            //fr-FR
            //ja-JP
            //hi-IN
            //de-DE
            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "fr-FR", "hi-IN" });

            using (var recognizer = new SpeechRecognizer(SpeechConfig.FromSubscription("cb35ce20eade4be2a74a36ab2e9d0ac1", "eastus"), autoDetectSourceLanguageConfig))
            {
                var speechRecognitionResult = await recognizer.RecognizeOnceAsync();

                if (speechRecognitionResult.Reason == ResultReason.Canceled)
                {
                    var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
                    MessageBox.Show("Error: " + cancellation);
                    this.Close();
                    return;
                }
                var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(speechRecognitionResult);
                var detectedLanguage = autoDetectSourceLanguageResult.Language;
                btnRecord.BackColor = default(Color);
                // detectedLanguage passed on to the OptionsMenu form
                formOptionsMenu = new OptionsMenu(detectedLanguage);
                //pop up Options Menu
                formOptionsMenu.Show();
            }
            btnRecord.Click += new EventHandler(this.btnRecord_Click);
        }
        static SpeechRecognizer user_config_to_speech_recognizer(SpeechConfig speech_config, AudioConfig audio_config, USER_CONFIG user_config)
        {
            SpeechRecognizer speech_recognizer;

            if (true == user_config.language_id_enabled)
            {
/* Note: Continuous language identification is supported only in C#, C++, and Python.
 * See:
 * https://docs.microsoft.com/azure/cognitive-services/speech-service/how-to-automatic-language-detection?pivots=programming-language-cpp#language-identification-with-speech-to-text
 */
                AutoDetectSourceLanguageConfig detect_language_config = AutoDetectSourceLanguageConfig.FromLanguages(user_config.language_id_languages);
                speech_recognizer = new SpeechRecognizer(speech_config, detect_language_config, audio_config);
            }
            else
            {
                speech_recognizer = new SpeechRecognizer(speech_config, audio_config);
            }

            if (true == user_config.phrase_list_enabled)
            {
                PhraseListGrammar grammar = PhraseListGrammar.FromRecognizer(speech_recognizer);
                grammar.AddPhrase(user_config.phrase_list);
            }

            return(speech_recognizer);
        }
Beispiel #7
0
        async Task <byte[]> SynthesizeAudioAsync(string msg)
        {
            var response        = new byte[] { };
            var autoDetecConfig = AutoDetectSourceLanguageConfig.FromOpenRange();

            using var synthesizer = new SpeechSynthesizer(_speechConfig, autoDetecConfig, AudioConfig.FromDefaultSpeakerOutput());
            using var result      = await synthesizer.SpeakTextAsync(msg);

            if (result.Reason == ResultReason.SynthesizingAudioCompleted)
            {
                response = result.AudioData;
            }
            return(response);
        }
Beispiel #8
0
        // Speech synthesis with auto detection for source language
        // Note: this is a preview feature, which might be updated in future versions.
        public static async Task SynthesisWithAutoDetectSourceLanguageAsync()
        {
            // Creates an instance of a speech config with specified subscription key and service region.
            // Replace with your own subscription key and service region (e.g., "westus").
            // The default language is "en-us".
            var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");

            // Creates an instance of AutoDetectSourceLanguageConfig with open languages range
            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromOpenRange();

            // Creates a speech synthesizer with auto detection for source language, using the default speaker as audio output.
            using (var synthesizer = new SpeechSynthesizer(config, autoDetectSourceLanguageConfig,
                                                           AudioConfig.FromDefaultSpeakerOutput()))
            {
                while (true)
                {
                    // Receives a multi lingual text from console input and synthesize it to speaker.
                    // For example, you can input "Bonjour le monde. Hello world.", then you will hear "Bonjour le monde."
                    // spoken in a French voice and "Hello world." in an English voice.
                    Console.WriteLine("Enter some text that you want to speak, or enter empty text to exit.");
                    Console.Write("> ");
                    string text = Console.ReadLine();
                    if (string.IsNullOrEmpty(text))
                    {
                        break;
                    }

                    using (var result = await synthesizer.SpeakTextAsync(text))
                    {
                        if (result.Reason == ResultReason.SynthesizingAudioCompleted)
                        {
                            Console.WriteLine($"Speech synthesized to speaker for text [{text}]");
                        }
                        else if (result.Reason == ResultReason.Canceled)
                        {
                            var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
                            Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

                            if (cancellation.Reason == CancellationReason.Error)
                            {
                                Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                                Console.WriteLine($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]");
                                Console.WriteLine($"CANCELED: Did you update the subscription info?");
                            }
                        }
                    }
                }
            }
        }
Beispiel #9
0
        public async Task <(ResultReason, string)> ListenAsync()
        {
            var sourceLanguageConfigs = new SourceLanguageConfig[]
            {
                SourceLanguageConfig.FromLanguage("en-US"),
                SourceLanguageConfig.FromLanguage("it-IT")
            };
            var config = SpeechTranslationConfig.FromSubscription(Config.Key, Config.Region);
            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromSourceLanguageConfigs(sourceLanguageConfigs);

            using var recognizer = new SpeechRecognizer(config, autoDetectSourceLanguageConfig);
            var result = await recognizer.RecognizeOnceAsync();

            return(result.Reason switch
            {
                ResultReason.RecognizedSpeech => (ResultReason.RecognizedSpeech, result.Text),
                _ => (ResultReason.NoMatch, null)
            });
Beispiel #10
0
        public async Task RecognizeSpeechAsync()
        {
            text     = "Error";
            language = "Error";

            var config =
                SpeechConfig.FromSubscription("54e5c11f4ba84a95a282d180905efeb1", "westus");

            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "de-DE", "pl-PL" });

            using var recognizer = new SpeechRecognizer(config, autoDetectSourceLanguageConfig);

            var result = await recognizer.RecognizeOnceAsync();

            var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(result);
            var detectedLanguage = autoDetectSourceLanguageResult.Language;

            language = detectedLanguage;

            switch (result.Reason)
            {
            case ResultReason.RecognizedSpeech:
                text = result.Text;
                break;

            case ResultReason.NoMatch:
                text = $"NOMATCH: Rozpoznanie nie udało się.";
                break;

            case ResultReason.Canceled:
                var cancellation = CancellationDetails.FromResult(result);

                if (cancellation.Reason == CancellationReason.Error)
                {
                    Debug.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                    Debug.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
                    Debug.WriteLine($"CANCELED: Did you update the subscription info?");
                }
                text = $"CANCELED: Reason={cancellation.Reason}";
                break;
            }
        }
        public static async Task RecognizeLng()
        {
            SpeechConfig speechConfig = SpeechConfig.FromEndpoint(new System.Uri(ConfigurationManager.AppSettings.Get("SpeechEndpoint")), ConfigurationManager.AppSettings.Get("TTSKey"));
            AudioConfig  audioConfig  = AudioConfig.FromDefaultSpeakerOutput();
            AutoDetectSourceLanguageConfig autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig
                                                                            .FromLanguages(new string[] { "en-US", "ru-RU" });

            using (var recognizer = new SpeechRecognizer(
                       speechConfig,
                       autoDetectSourceLanguageConfig,
                       audioConfig))
            {
                Console.WriteLine("Say something...");
                var speechRecognitionResult = await recognizer.RecognizeOnceAsync();

                var autoDetectSourceLanguageResult =
                    AutoDetectSourceLanguageResult.FromResult(speechRecognitionResult);
                var detectedLng = autoDetectSourceLanguageResult.Language;
                Console.WriteLine("I recognized " + speechRecognitionResult.Text + " in " + detectedLng);
            }
        }
Beispiel #12
0
        public VRHandler(Label label, ActivityIndicator indicator)
        {
            micService = DependencyService.Resolve <IMicrophoneService>();

            this.bleHandler     = BLEHandler.GetHandler();
            this.drivingHandler = DrivingHandler.GetHandler();

            this.lText     = label;
            this.indicator = indicator;

            // initialize speech recognizer
            if (recognizer == null)
            {
                var config = SpeechConfig.FromSubscription(Constants.CognitiveServicesApiKey, Constants.CognitiveServicesRegion);
                //string[] languages = { "cs-CZ", "en-US", "en-GB" };
                AutoDetectSourceLanguageConfig lang_config = AutoDetectSourceLanguageConfig.FromLanguages(Constants.GetLanguages);
                recognizer             = new SpeechRecognizer(config, lang_config);
                recognizer.Recognized += (obj, args) =>
                {
                    Process(args.Result.Text);
                };
            }
        }
Beispiel #13
0
        //
        // Create SpeechRecognizer
        //
        private SpeechRecognizer SpeechRecognizerFromUserConfig()
        {
            AudioConfig      audioConfig  = AudioConfigFromUserConfig();
            SpeechConfig     speechConfig = SpeechConfigFromUserConfig();
            SpeechRecognizer speechRecognizer;

            if (userConfig.languageIDLanguages is string[] languageIDLanguagesValue)
            {
                var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(languageIDLanguagesValue);
                speechRecognizer = new SpeechRecognizer(speechConfig, autoDetectSourceLanguageConfig, audioConfig);
            }
            else
            {
                speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
            }

            if (this.userConfig.phraseList is string phraseListValue)
            {
                var grammar = PhraseListGrammar.FromRecognizer(speechRecognizer);
                grammar.AddPhrase(phraseListValue);
            }

            return(speechRecognizer);
        }
Beispiel #14
0
        // Speech recognition with auto detection for source language and custom model
        public static async Task RecognitionWithAutoDetectSourceLanguageAndCustomModelAsync()
        {
            // Creates an instance of a speech config with specified subscription key and service region.
            // Replace with your own subscription key and service region (e.g., "westus").
            var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");

            var sourceLanguageConfigs = new SourceLanguageConfig[]
            {
                // The endpoint id is optional, if not specified,  the service will use the default model for en-US
                // Replace the language with your source language candidate. Please see https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support for all supported langauges
                SourceLanguageConfig.FromLanguage("en-US"),

                // Replace the id with the CRIS endpoint id of your customized model. If the speech is in fr-FR, the service will use the corresponding customized model for speech recognition
                SourceLanguageConfig.FromLanguage("fr-FR", "YourEndpointId"),
            };

            // Creates an instance of AutoDetectSourceLanguageConfig with the 2 source language configurations
            // Currently this feature only supports 2 different language candidates
            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromSourceLanguageConfigs(sourceLanguageConfigs);

            var stopRecognition = new TaskCompletionSource <int>();

            // Creates a speech recognizer using the auto detect source language config, and the file as audio input.
            // Replace with your own audio file name.
            using (var audioInput = AudioConfig.FromWavFileInput(@"whatstheweatherlike.wav"))
            {
                using (var recognizer = new SpeechRecognizer(config, autoDetectSourceLanguageConfig, audioInput))
                {
                    recognizer.Recognizing += (s, e) =>
                    {
                        if (e.Result.Reason == ResultReason.RecognizingSpeech)
                        {
                            Console.WriteLine($"RECOGNIZING: Text={e.Result.Text}");
                            // Retrieve the detected language
                            var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result);
                            Console.WriteLine($"DETECTED: Language={autoDetectSourceLanguageResult.Language}");
                        }
                    };

                    recognizer.Recognized += (s, e) =>
                    {
                        if (e.Result.Reason == ResultReason.RecognizedSpeech)
                        {
                            Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
                            // Retrieve the detected language
                            var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result);
                            Console.WriteLine($"DETECTED: Language={autoDetectSourceLanguageResult.Language}");
                        }
                        else if (e.Result.Reason == ResultReason.NoMatch)
                        {
                            Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                        }
                    };

                    recognizer.Canceled += (s, e) =>
                    {
                        Console.WriteLine($"CANCELED: Reason={e.Reason}");

                        if (e.Reason == CancellationReason.Error)
                        {
                            Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}");
                            Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}");
                            Console.WriteLine($"CANCELED: Did you update the subscription info?");
                        }

                        stopRecognition.TrySetResult(0);
                    };

                    recognizer.SessionStarted += (s, e) =>
                    {
                        Console.WriteLine("\n    Session started event.");
                    };

                    recognizer.SessionStopped += (s, e) =>
                    {
                        Console.WriteLine("\n    Session stopped event.");
                        Console.WriteLine("\nStop recognition.");
                        stopRecognition.TrySetResult(0);
                    };

                    // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition.
                    await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                    // Waits for completion.
                    // Use Task.WaitAny to keep the task rooted.
                    Task.WaitAny(new[] { stopRecognition.Task });

                    // Stops recognition.
                    await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
                }
            }
        }
        public static async Task RunAsync([EventGridTrigger] EventGridEvent eventGridEvent, ILogger log)
        {
            //Extracting content type and url of the blob triggering the function
            var jsondata = JsonConvert.SerializeObject(eventGridEvent.Data);
            var tmp      = new { contentType = "", url = "" };
            var data     = JsonConvert.DeserializeAnonymousType(jsondata, tmp);

            //Checking if the trigger was iniatiated for a WAV File.
            if (data.contentType == "audio/wav")
            {
                var    audioUrl = data.url;
                string blobName = audioUrl.Split('/').Last();

                string contosoStorageConnectionString = System.Environment.GetEnvironmentVariable("ContosoStorageConnectionString", EnvironmentVariableTarget.Process);
                string speechRegion          = System.Environment.GetEnvironmentVariable("SpeechRegion", EnvironmentVariableTarget.Process);
                string speechKey             = System.Environment.GetEnvironmentVariable("SpeechKey", EnvironmentVariableTarget.Process);
                string translatorKey         = System.Environment.GetEnvironmentVariable("TranslatorKey", EnvironmentVariableTarget.Process);
                string translatorEndpoint    = System.Environment.GetEnvironmentVariable("TranslatorEndpoint", EnvironmentVariableTarget.Process);
                string translatorLocation    = System.Environment.GetEnvironmentVariable("TranslatorLocation", EnvironmentVariableTarget.Process);
                string cosmosEndpointUrl     = System.Environment.GetEnvironmentVariable("CosmosDBEndpointUrl", EnvironmentVariableTarget.Process);
                string cosmosPrimaryKey      = System.Environment.GetEnvironmentVariable("CosmosDBPrimaryKey", EnvironmentVariableTarget.Process);
                string textAnalyticsKey      = System.Environment.GetEnvironmentVariable("TextAnalyticsKey", EnvironmentVariableTarget.Process);
                string textAnalyticsEndpoint = System.Environment.GetEnvironmentVariable("TextAnalyticsEndpoint", EnvironmentVariableTarget.Process);

                // Download audio file to a local temp directory
                var tempPath = System.IO.Path.GetTempFileName();
                BlobContainerClient container = new BlobContainerClient(contosoStorageConnectionString, "audiorecordings");
                BlobClient          blob      = container.GetBlobClient(blobName);
                await blob.DownloadToAsync(tempPath);

                var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
                speechConfig.SetProperty(PropertyId.SpeechServiceConnection_SingleLanguageIdPriority, "Latency");

                // Audio Language Identification
                // Considering only two languages: English and Spanish
                // Languages supported for language detection : https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support
                var    autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "es-MX" });
                string languageDetected = "en-US";
                using (var audioInput = AudioConfig.FromWavFileInput(tempPath))
                {
                    using (var recognizer = new SourceLanguageRecognizer(speechConfig, autoDetectSourceLanguageConfig, audioInput))
                    {
                        var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false);

                        if (result.Reason == ResultReason.RecognizedSpeech)
                        {
                            var lidResult = AutoDetectSourceLanguageResult.FromResult(result);
                            languageDetected = lidResult.Language;
                        }
                    }
                }
                speechConfig.SpeechRecognitionLanguage = languageDetected;

                // Audio Transcription
                StringBuilder sb = new StringBuilder();
                using var audioConfig = AudioConfig.FromWavFileInput(tempPath);
                {
                    using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
                    {
                        var stopRecognition = new TaskCompletionSource <int>();
                        recognizer.SessionStopped += (s, e) =>
                        {
                            stopRecognition.TrySetResult(0);
                        };
                        recognizer.Canceled += (s, e) =>
                        {
                            stopRecognition.TrySetResult(0);
                        };
                        recognizer.Recognized += (s, e) =>
                        {
                            if (e.Result.Reason == ResultReason.RecognizedSpeech)
                            {
                                sb.Append(e.Result.Text);
                            }
                            else if (e.Result.Reason == ResultReason.NoMatch)
                            {
                                log.LogInformation($"NOMATCH: Speech could not be recognized.");
                            }
                        };
                        await recognizer.StartContinuousRecognitionAsync();

                        Task.WaitAny(new[] { stopRecognition.Task });
                    }
                }
                string transcribedText = sb.ToString();

                // If transcription is in Spanish we will translate it to English
                if (!languageDetected.Contains("en"))
                {
                    string   route           = $"/translate?api-version=3.0&to=en";
                    string   textToTranslate = sb.ToString();
                    object[] body            = new object[] { new { Text = textToTranslate } };
                    var      requestBody     = JsonConvert.SerializeObject(body);

                    using (var client = new HttpClient())
                        using (var request = new HttpRequestMessage())
                        {
                            request.Method     = HttpMethod.Post;
                            request.RequestUri = new Uri(translatorEndpoint + route);
                            request.Content    = new StringContent(requestBody, Encoding.UTF8, "application/json");
                            request.Headers.Add("Ocp-Apim-Subscription-Key", translatorKey);
                            request.Headers.Add("Ocp-Apim-Subscription-Region", translatorLocation);

                            HttpResponseMessage response = await client.SendAsync(request).ConfigureAwait(false);

                            var responseBody = await response.Content.ReadAsStringAsync();

                            List <Model.TranslatorService.Root> translatedDocuments = JsonConvert.DeserializeObject <List <Model.TranslatorService.Root> >(responseBody);
                            transcribedText = translatedDocuments.FirstOrDefault().Translations.FirstOrDefault().Text;
                        }
                }

                //TODO:Azure Text Analytics for Healthcare


                //Insert documents into CosmosDB
                var cosmosClient    = new CosmosClient(cosmosEndpointUrl, cosmosPrimaryKey);
                var cosmosDatabase  = (await cosmosClient.CreateDatabaseIfNotExistsAsync("Contoso")).Database;
                var cosmosContainer = (await cosmosDatabase.CreateContainerIfNotExistsAsync("Transcriptions", "/id")).Container;

                Model.Transcription newTranscription = new Model.Transcription();
                newTranscription.Id           = Guid.NewGuid().ToString();
                newTranscription.DocumentDate = new DateTime(int.Parse(blobName.Substring(0, 4)),
                                                             int.Parse(blobName.Substring(4, 2)), int.Parse(blobName.Substring(6, 2)));
                newTranscription.FileName        = blobName;
                newTranscription.TranscribedText = transcribedText;
                foreach (var item in healthcareResult.Entities)
                {
                    newTranscription.HealthcareEntities.Add(new Model.HealthcareEntity()
                    {
                        Category = item.Category, Text = item.Text
                    });
                }

                try
                {
                    ItemResponse <Model.Transcription> cosmosResponse = await
                                                                        cosmosContainer.CreateItemAsync(newTranscription, new PartitionKey(newTranscription.Id));
                }
                catch (CosmosException ex) when(ex.StatusCode == System.Net.HttpStatusCode.Conflict)
                {
                    //Conflicting documents are silently ignored for demo purposes.
                }

                System.IO.File.Delete(tempPath);
                log.LogInformation(eventGridEvent.Data.ToString());
            }
        }
        /// <summary>
        /// Speech recognition with auto detection for source language with universal v2 endpoint
        /// We only support multi-lingual continuous recognition in universal v2 endpoint
        /// </summary>
        public static async Task MultiLingualRecognitionWithUniversalV2Endpiont()
        {
            // Offical v2 endpoint
            // Replace the region with your service region
            var v2EndpointInString = String.Format("wss://{0}.stt.speech.microsoft.com/speech/universal/v2", "YourServiceRegion");
            var v2EndpointUrl      = new Uri(v2EndpointInString);

            // Creates an instance of a speech config with specified subscription key.
            // Replace the subscription key with your subscription key
            var config = SpeechConfig.FromEndpoint(v2EndpointUrl, "YourSubscriptionKey");

            // Please refer to the documentation of language id with different modes
            config.SetProperty(PropertyId.SpeechServiceConnection_ContinuousLanguageIdPriority, "Latency");

            // Creates an instance of AutoDetectSourceLanguageConfig with the 2 source language candidates
            // Currently this feature only supports 2 different language candidates
            // Replace the languages to be the language candidates for your speech. Please see https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support for all supported langauges
            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "zh-CN" });

            var stopRecognition = new TaskCompletionSource <int>();

            // Creates a speech recognizer using the auto detect source language config, and the file as audio input.
            // Replace with your own audio file name.
            using (var audioInput = AudioConfig.FromWavFileInput(@"en-us_zh-cn.wav"))
            {
                using (var recognizer = new SpeechRecognizer(config, autoDetectSourceLanguageConfig, audioInput))
                {
                    // Subscribes to events.
                    recognizer.Recognizing += (s, e) =>
                    {
                        if (e.Result.Reason == ResultReason.RecognizingSpeech)
                        {
                            Console.WriteLine($"RECOGNIZING: Text={e.Result.Text}");
                            // Retrieve the detected language
                            var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result);
                            Console.WriteLine($"DETECTED: Language={autoDetectSourceLanguageResult.Language}");
                        }
                    };

                    recognizer.Recognized += (s, e) =>
                    {
                        if (e.Result.Reason == ResultReason.RecognizedSpeech)
                        {
                            Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
                            // Retrieve the detected language
                            var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result);
                            Console.WriteLine($"DETECTED: Language={autoDetectSourceLanguageResult.Language}");
                        }
                        else if (e.Result.Reason == ResultReason.NoMatch)
                        {
                            Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                        }
                    };

                    recognizer.Canceled += (s, e) =>
                    {
                        Console.WriteLine($"CANCELED: Reason={e.Reason}");

                        if (e.Reason == CancellationReason.Error)
                        {
                            Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}");
                            Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}");
                            Console.WriteLine($"CANCELED: Did you update the subscription info?");
                        }

                        stopRecognition.TrySetResult(0);
                    };

                    recognizer.SessionStarted += (s, e) =>
                    {
                        Console.WriteLine("\n    Session started event.");
                    };

                    recognizer.SessionStopped += (s, e) =>
                    {
                        Console.WriteLine("\n    Session stopped event.");
                        Console.WriteLine("\nStop recognition.");
                        stopRecognition.TrySetResult(0);
                    };

                    // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition.
                    await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                    // Waits for completion.
                    // Use Task.WaitAny to keep the task rooted.
                    Task.WaitAny(new[] { stopRecognition.Task });

                    // Stops recognition.
                    await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
                }
            }
        }
        public async Task Start(SpeechRecognitionOptions options)
        {
            SpeechRecognizer recognizer = null;

            try
            {
                Logger.LogInformation("Starting speech recognition");

                var credentials = this.Credentials;

                var speechConfig = SpeechConfig.FromEndpoint(new Uri($"wss://{credentials.Region}.stt.speech.microsoft.com/speech/universal/v2"), credentials.SubscriptionKey);
                speechConfig.SetProfanity(ProfanityOption.Raw);

                if (options.Languages.Count > 1)
                {
                    //enable continuous language detection when we have more than 1 language
                    //this seems kind of buggy though, at times the speech recognition just simply doesn't work at all when this is enabled
                    speechConfig.SetProperty(PropertyId.SpeechServiceConnection_ContinuousLanguageIdPriority, "Latency");
                }

                var languageConfig = AutoDetectSourceLanguageConfig.FromLanguages(options.Languages.Select(lang =>
                {
                    //convert language selections
                    if (lang.Length == 2)
                    {
                        //two-letter code. select some default five-letter code instead.
                        if (lang == "en")
                        {
                            lang = "en-US";
                        }
                        else
                        {
                            lang = lang + "-" + lang.ToUpperInvariant();
                        }
                    }
                    return(lang);
                }).ToArray());

                recognizer = new SpeechRecognizer(speechConfig, languageConfig, AudioConfig);

                //set up the special phrases if any
                if (options.Phrases?.Count > 0)
                {
                    var phrases = PhraseListGrammar.FromRecognizer(recognizer);
                    foreach (var phrase in options.Phrases)
                    {
                        phrases.AddPhrase(phrase);
                    }
                }

                //prepare events
                recognizer.Canceled += (sender, e) =>
                {
                    SpeechRecognizer = null;
                    Dispose(Disposables);

                    if (e.ErrorCode == CancellationErrorCode.Forbidden || e.ErrorCode == CancellationErrorCode.AuthenticationFailure)
                    {
                        //out of quota (or invalid key, try the next one anyway)
                        int credentialsIndexCurrent = CredentialsIndex;
                        if (NextCredentials())
                        {
                            Logger.LogInformation($"Out of quota for credentials {credentialsIndexCurrent}. Restarting with {CredentialsIndex}");

                            Threading.Tasks.FireAndForget(() => Start(options));
                            return;
                        }
                    }

                    if (e.Reason != CancellationReason.EndOfStream && e.Reason != CancellationReason.CancelledByUser)
                    {
                        Logger.LogWarning($"Recognition stopped. reason={e.Reason}, erroCode={e.ErrorCode}, details={e.ErrorDetails}");
                    }

                    Stopped?.Invoke(this, new SpeechRecognitionStoppedEvent()
                    {
                        Message = $"{e.ErrorCode}: {e.ErrorDetails}"
                    });
                };
                recognizer.Recognizing += (sender, e) =>
                {
                    OnSpeechEvent(e, false);
                };
                recognizer.Recognized += (sender, e) =>
                {
                    OnSpeechEvent(e, true);
                };
                recognizer.SpeechEndDetected += (sender, e) =>
                {
                    StreamAudioNoiseGate?.OnAudioStop();
                };

                //start recognizing
                await recognizer.StartContinuousRecognitionAsync();

                //start our audio source
                if (!IsRunning && StreamAudioSource != null)
                {
                    await StreamAudioSource.Start();
                }
            }
            catch (Exception e)
            {
                Logger.LogError(e, "Could not start continuous recognition");

                recognizer?.Dispose();
                throw;
            }

            SpeechRecognizer = recognizer;
            IsRunning        = true;

            Disposables.Add(recognizer);
        }
        // Translation using multi-lingual file input.
        public static async Task TranslationWithMultiLingualFileAsync_withLanguageDetectionEnabled()
        {
            // <TranslationWithFileAsync>
            // Offical v2 endpoint with service region
            // Please replace the service region with your region
            var v2EndpointInString = String.Format("wss://{0}.stt.speech.microsoft.com/speech/universal/v2", "YourServiceRegion");
            var v2EndpointUrl      = new Uri(v2EndpointInString);

            // Creates an instance of a speech translation config with specified subscription key and service region.
            // Please replace the service subscription key with your subscription key
            var config = SpeechTranslationConfig.FromEndpoint(v2EndpointUrl, "YourSubscriptionKey");

            // Sets source languages
            // The source language will be detected by the language detection feature.
            // However, the SpeechRecognitionLanguage still need to set with a locale string, but it will not be used as the source language.
            // This will be fixed in a future version of Speech SDK.
            string fromLanguage = "en-US";

            config.SpeechRecognitionLanguage = fromLanguage;

            // Translation target language(s).
            // Replace with language(s) of your choice.
            config.AddTargetLanguage("de");
            config.AddTargetLanguage("fr");

            // Setup Language id property
            // Please refer to the documentation of language id with different modes
            config.SetProperty(PropertyId.SpeechServiceConnection_ContinuousLanguageIdPriority, "Latency");
            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "zh-CN" });

            var stopTranslation = new TaskCompletionSource <int>();

            // Creates a translation recognizer using file as audio input.
            // Replace with your own audio file name.
            using (var audioInput = AudioConfig.FromWavFileInput(@"en-us_zh-cn.wav"))
            {
                using (var recognizer = new TranslationRecognizer(config, autoDetectSourceLanguageConfig, audioInput))
                {
                    // Subscribes to events.
                    recognizer.Recognizing += (s, e) =>
                    {
                        // Note: the detected language result only available in the v2 endpoint
                        var lidResult = e.Result.Properties.GetProperty(PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult);

                        Console.WriteLine($"RECOGNIZING in '{lidResult}': Text={e.Result.Text}");
                        foreach (var element in e.Result.Translations)
                        {
                            Console.WriteLine($"    TRANSLATING into '{element.Key}': {element.Value}");
                        }
                    };

                    recognizer.Recognized += (s, e) => {
                        if (e.Result.Reason == ResultReason.TranslatedSpeech)
                        {
                            var lidResult = e.Result.Properties.GetProperty(PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult);

                            Console.WriteLine($"RECOGNIZED in '{lidResult}': Text={e.Result.Text}");
                            foreach (var element in e.Result.Translations)
                            {
                                Console.WriteLine($"    TRANSLATED into '{element.Key}': {element.Value}");
                            }
                        }
                        else if (e.Result.Reason == ResultReason.RecognizedSpeech)
                        {
                            Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
                            Console.WriteLine($"    Speech not translated.");
                        }
                        else if (e.Result.Reason == ResultReason.NoMatch)
                        {
                            Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                        }
                    };

                    recognizer.Canceled += (s, e) =>
                    {
                        Console.WriteLine($"CANCELED: Reason={e.Reason}");

                        if (e.Reason == CancellationReason.Error)
                        {
                            Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}");
                            Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}");
                            Console.WriteLine($"CANCELED: Did you update the subscription info?");
                        }

                        stopTranslation.TrySetResult(0);
                    };

                    recognizer.SpeechStartDetected += (s, e) => {
                        Console.WriteLine("\nSpeech start detected event.");
                    };

                    recognizer.SpeechEndDetected += (s, e) => {
                        Console.WriteLine("\nSpeech end detected event.");
                    };

                    recognizer.SessionStarted += (s, e) => {
                        Console.WriteLine("\nSession started event.");
                    };

                    recognizer.SessionStopped += (s, e) => {
                        Console.WriteLine("\nSession stopped event.");
                        Console.WriteLine($"\nStop translation.");
                        stopTranslation.TrySetResult(0);
                    };

                    // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition.
                    Console.WriteLine("Start translation...");
                    await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                    // Waits for completion.
                    // Use Task.WaitAny to keep the task rooted.
                    Task.WaitAny(new[] { stopTranslation.Task });

                    // Stops translation.
                    await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
                }
            }
            // </TranslationWithFileAsync>
        }
        public async Task SpeechToText(Message message)
        {
            string fileId = message.Voice == null ? message.Audio.FileId : message.Voice.FileId;
            var    file   = await TelegramBotClient.GetFileAsync(fileId);

            string pathToAudio    = GetFilePath(file.FileUniqueId, file.FilePath);
            var    saveFileStream = File.Open(pathToAudio, FileMode.Create);

            TelegramBotClient.DownloadFileAsync(file.FilePath, saveFileStream).Wait();
            saveFileStream.Close();

            string fileType = message.Type == MessageType.Audio ? "file" : "voice";
            await TelegramBotClient.SendTextMessageAsync(
                chatId : message.Chat.Id,
                text : $"We started processing your {fileType} wait couple minute"
                );

            if (message.Type == MessageType.Voice && File.Exists(pathToAudio) && !File.Exists(Path.ChangeExtension(pathToAudio, ".ogg")))
            {
                File.Move(pathToAudio, Path.ChangeExtension(pathToAudio, ".ogg"));
                pathToAudio = Path.ChangeExtension(pathToAudio, ".ogg");
            }

            string jobEndpoint = ZamzarSettings.BaseAPIUrl + "jobs";

            var zamzarJob = ZamzarHelper.Upload <ZamzarJobResponseModel>(ZamzarSettings.SecretKey, jobEndpoint, pathToAudio, "wav").Result;

            string getStatusConvertingEndpoint = $"{jobEndpoint}/{zamzarJob.Id}";

            while (true)
            {
                zamzarJob = ZamzarHelper.GetSimpleResponse <ZamzarJobResponseModel>(ZamzarSettings.SecretKey, getStatusConvertingEndpoint).Result;
                if (zamzarJob.Status == "successful")
                {
                    break;
                }
            }

            string downloadConvertedFileEndpoint = $"{ZamzarSettings.BaseAPIUrl}files/{zamzarJob.TargetFiles.First().Id}/content";

            pathToAudio = Path.ChangeExtension(pathToAudio, ".wav");

            await ZamzarHelper.Download(ZamzarSettings.SecretKey, downloadConvertedFileEndpoint, pathToAudio);

            var config = SpeechConfig.FromSubscription(AzureSpeechToText.SubscriptionKey, AzureSpeechToText.Region);
            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(ZamzarSettings.Languages);

            using var audioInput = AudioConfig.FromWavFileInput(pathToAudio);
            using var recognizer = new SpeechRecognizer(config, autoDetectSourceLanguageConfig, audioInput);

            var speechRecognitionResult = await recognizer.RecognizeOnceAsync();

            string result = null;

            switch (speechRecognitionResult.Reason)
            {
            case ResultReason.RecognizedSpeech:
                result = speechRecognitionResult.Text;
                break;

            case ResultReason.NoMatch:
                result = "Speech could not be recognized";
                break;

            case ResultReason.Canceled:
                var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
                result = cancellation.ErrorDetails;
                break;
            }

            await TelegramBotClient.SendTextMessageAsync(
                chatId : message.Chat.Id,
                text : result
                );
        }
Beispiel #20
0
        private async static Task RecognizeSpeechAsync()
        {
            var config = SpeechConfig.FromEndpoint(
                new Uri("https://eastus2.api.cognitive.microsoft.com/sts/v1.0/issuetoken"),
                "MySuscriptionKey");
            //config.SetProperty("ConversationTranscriptionInRoomAndOnline", "true");
            //config.RequestWordLevelTimestamps();
            //config.EnableAudioLogging();
            //config.SpeechSynthesisLanguage = "en-US";
            //config.SpeechRecognitionLanguage = "en-US";
            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(
                new string[] { "en-US", "es-MX", "pt-BR", "fr-FR" });

            using var inputConfig       = AudioConfig.FromDefaultMicrophoneInput();
            using var speechRecognition = new SpeechRecognizer(config, autoDetectSourceLanguageConfig, inputConfig);

            var endRecognition = new TaskCompletionSource <int>();

            speechRecognition.Recognized += (s, e) =>
            {
                switch (e.Result.Reason)
                {
                case ResultReason.NoMatch:
                    if (!endRecognition.Task.IsCompleted)
                    {
                        Console.WriteLine($"::{e.Result.Text}");
                    }
                    break;

                case ResultReason.Canceled:
                    Console.WriteLine($":x:{e.Result.Text}");
                    break;

                case ResultReason.RecognizingSpeech:
                    Console.WriteLine($":..:{e.Result.Text}");
                    break;

                case ResultReason.RecognizedSpeech:
                    Console.WriteLine($">:{e.Result.Text}");
                    break;

                case ResultReason.RecognizedIntent:
                    Console.WriteLine($"#:{e.Result.Text}");
                    Console.WriteLine($"Saliendo ....");
                    endRecognition.TrySetResult(0);
                    break;

                default:
                    Console.WriteLine($"*:{e.Result.Reason}");
                    break;
                }
            };

            speechRecognition.Canceled += (s, e) =>
            {
                if (e.Reason == CancellationReason.Error)
                {
                    Console.WriteLine($"ocurrió un error:{e.ErrorCode} => {e.ErrorDetails}");
                }

                endRecognition.TrySetResult(0);
            };

            speechRecognition.SessionStopped += (s, e) =>
            {
                Console.WriteLine("Deteniendo");
                endRecognition.TrySetResult(0);
            };

            await speechRecognition.StartContinuousRecognitionAsync().ConfigureAwait(false);

            Task.WaitAny(new[] { endRecognition.Task });
            await speechRecognition.StopContinuousRecognitionAsync().ConfigureAwait(false);
        }
Beispiel #21
0
        internal async Task <StringBuilder> AzSpeechtoText(string filePath)
        {
            StringBuilder sb        = new StringBuilder();
            StringBuilder sbConsole = new StringBuilder();
            var           config    = SpeechConfig.FromEndpoint(
                new Uri("https://eastus2.api.cognitive.microsoft.com/sts/v1.0/issuetoken"),
                "MySuscriptionKey");

            config.EnableDictation();
            config.RequestWordLevelTimestamps();
            config.EnableAudioLogging();

            /* var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(
             *  new string[] { "en-US", "es-ES", "fr-FR", "pt-BR" }); */

            var sourceLanguageConfigs = new SourceLanguageConfig[]
            {
                SourceLanguageConfig.FromLanguage("en-US"),
                SourceLanguageConfig.FromLanguage("fr-FR", "The Endpoint Id for custom model of fr-FR"),
                SourceLanguageConfig.FromLanguage("es-ES"),
                SourceLanguageConfig.FromLanguage("pt-BR", "The Endpoint Id for custom model of pt-BR")
            };

            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromSourceLanguageConfigs(sourceLanguageConfigs);

            using var inputConfig       = AudioConfig.FromWavFileInput(filePath);
            using var speechRecognition = new SpeechRecognizer(config, autoDetectSourceLanguageConfig, inputConfig);

            var endRecognition = new TaskCompletionSource <int>();

            speechRecognition.Recognized += (s, e) =>
            {
                switch (e.Result.Reason)
                {
                case ResultReason.NoMatch:
                    if (!endRecognition.Task.IsCompleted)
                    {
                        sbConsole.AppendLine(e.Result.Text);
                    }
                    break;

                case ResultReason.Canceled:
                    sbConsole.AppendLine(e.Result.Text);
                    break;

                case ResultReason.RecognizingSpeech:
                    sb.AppendLine(e.Result.Text);
                    break;

                case ResultReason.RecognizedSpeech:
                    sb.AppendLine(e.Result.Text);
                    break;

                case ResultReason.RecognizedIntent:
                    sbConsole.AppendLine(e.Result.Text);
                    endRecognition.TrySetResult(0);
                    break;

                default:
                    sbConsole.AppendLine(e.Result.Text);
                    break;
                }
            };

            speechRecognition.Canceled += (s, e) =>
            {
                if (e.Reason == CancellationReason.Error)
                {
                    sbConsole.AppendLine($"ocurrió un error:{e.ErrorCode} => {e.ErrorDetails}");
                }

                endRecognition.TrySetResult(0);
            };

            speechRecognition.SessionStopped += (s, e) =>
            {
                sbConsole.AppendLine("##End Transcript##");
                endRecognition.TrySetResult(0);
            };

            await speechRecognition.StartContinuousRecognitionAsync().ConfigureAwait(false);

            Task.WaitAny(new[] { endRecognition.Task });
            await speechRecognition.StopContinuousRecognitionAsync().ConfigureAwait(false);

            sb.Append(sbConsole);
            sb.ToString();
            return(sb);
        }
Beispiel #22
0
        private async static Task RecognizeRecordFileAsync()
        {
            var config = SpeechConfig.FromEndpoint(
                new Uri("https://eastus2.api.cognitive.microsoft.com/sts/v1.0/issuetoken"),
                "MySuscriptionKey");

            //config.SpeechRecognitionLanguage = "en-US";
            //config.EnableDictation();
            config.RequestWordLevelTimestamps();
            config.EnableAudioLogging();

            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(
                new string[] { "en-US", "es-MX", "fr-FR", "pt-BR" });

            //using (var recognizer = new SpeechRecognizer(
            //    config,
            //    autoDetectSourceLanguageConfig,
            //    audioConfig))
            //{
            //    var speechRecognitionResult = await recognizer.RecognizeOnceAsync();
            //    var autoDetectSourceLanguageResult =
            //        AutoDetectSourceLanguageResult.FromResult(speechRecognitionResult);
            //    var detectedLanguage = autoDetectSourceLanguageResult.Language;
            //}

            using var inputConfig       = AudioConfig.FromWavFileInput(@"D:\Downloads\Llamada con Jorge y 2 más (1).wav");
            using var speechRecognition = new SpeechRecognizer(config, autoDetectSourceLanguageConfig, inputConfig);

            //var result = await speechRecognition.RecognizeOnceAsync();
            //switch (result.Reason)
            //{
            //    case ResultReason.NoMatch:
            //        Console.WriteLine($"No entendí na':{result.Text}");
            //        break;
            //    case ResultReason.Canceled:
            //        Console.WriteLine($":x:{result.Text}");
            //        break;
            //    case ResultReason.RecognizingSpeech:
            //        Console.WriteLine($"...:{result.Text}");
            //        break;
            //    case ResultReason.RecognizedSpeech:
            //        Console.WriteLine($">:{result.Text}");
            //        break;
            //    case ResultReason.RecognizedIntent:
            //        Console.WriteLine($"Detectado comando de voz:{result.Text}");
            //        Console.WriteLine($"Saliendo ....");
            //        break;
            //    default:
            //        Console.WriteLine($"LLegué aquí porque:{result.Reason}");
            //        break;
            //}

            var endRecognition = new TaskCompletionSource <int>();

            speechRecognition.Recognized += (s, e) =>
            {
                switch (e.Result.Reason)
                {
                case ResultReason.NoMatch:
                    if (!endRecognition.Task.IsCompleted)
                    {
                        Console.WriteLine($"::{e.Result.Text}");
                    }
                    break;

                case ResultReason.Canceled:
                    Console.WriteLine($":x:{e.Result.Text}");
                    break;

                case ResultReason.RecognizingSpeech:
                    Console.WriteLine($":..:{e.Result.Text}");
                    break;

                case ResultReason.RecognizedSpeech:
                    Console.WriteLine($">:{e.Result.Text}");
                    break;

                case ResultReason.RecognizedIntent:
                    Console.WriteLine($"#:{e.Result.Text}");
                    Console.WriteLine($"Saliendo ....");
                    endRecognition.TrySetResult(0);
                    break;

                default:
                    Console.WriteLine($"*:{e.Result.Reason}");
                    break;
                }
            };

            speechRecognition.Canceled += (s, e) =>
            {
                if (e.Reason == CancellationReason.Error)
                {
                    Console.WriteLine($"ocurrió un error:{e.ErrorCode} => {e.ErrorDetails}");
                }

                endRecognition.TrySetResult(0);
            };

            speechRecognition.SessionStopped += (s, e) =>
            {
                Console.WriteLine("Deteniendo");
                endRecognition.TrySetResult(0);
            };

            await speechRecognition.StartContinuousRecognitionAsync().ConfigureAwait(false);

            Task.WaitAny(new[] { endRecognition.Task });
            await speechRecognition.StopContinuousRecognitionAsync().ConfigureAwait(false);
        }
Beispiel #23
0
 private AutoDetectSourceLanguageConfig AutoMaticLanguageDetection(string[] languagesToDetect)
 => AutoDetectSourceLanguageConfig.FromLanguages(languagesToDetect);
        public async IAsyncEnumerable <RecognitionWord> ContinuousRecognition(IWaveProvider audioSource, [EnumeratorCancellation] CancellationToken cancellation, IAsyncEnumerable <string>?sourceLangs, IAsyncEnumerable <string>?phrases)
        {
            var config      = SpeechConfig.FromSubscription(_key, _region);
            var audioConfig = AudioConfig.FromStreamInput(new PullAdapter(audioSource, 24000), AudioStreamFormat.GetWaveFormatPCM(24000, 16, 1));

            using var recogniser = new SpeechRecognizer(config,
                                                        AutoDetectSourceLanguageConfig.FromLanguages(await(sourceLangs ?? Array.Empty <string>().ToAsyncEnumerable()).Append("en-GB").ToArrayAsync(cancellation)),
                                                        audioConfig
                                                        );

            // Add some likely words to the phrase dictionary
            var phraseList = PhraseListGrammar.FromRecognizer(recogniser);

            phraseList.AddPhrase("mute");
            phraseList.AddPhrase("discord");
            phraseList.AddPhrase("stop");
            if (phrases != null)
            {
                await foreach (var phrase in phrases.WithCancellation(cancellation))
                {
                    phraseList.AddPhrase(phrase);
                }
            }

            // Subscribe to recogniser results
            var results = new ConcurrentQueue <RecognitionWord>();

            recogniser.Recognized += (_, e) =>
            {
                if (e.Result.Reason == ResultReason.RecognizedSpeech)
                {
                    results.Enqueue(new RecognitionWord(e.Result.Text));
                }
                else if (e.Result.Reason == ResultReason.NoMatch)
                {
                    results.Enqueue(new RecognitionWord(null));
                }
            };

            recogniser.Canceled += (s, e) =>
            {
                Console.WriteLine($"CANCELED: Reason={e.Reason}");

                if (e.Reason == CancellationReason.Error)
                {
                    results.Enqueue(new RecognitionWord($"CANCELED: ErrorCode={e.ErrorCode}"));
                    results.Enqueue(new RecognitionWord($"CANCELED: ErrorDetails={e.ErrorDetails}"));
                    results.Enqueue(new RecognitionWord($"CANCELED: Did you update the subscription info?"));
                }
            };

            recogniser.SessionStarted += (_, e) =>
            {
                results.Enqueue(new RecognitionWord("Session_started_event."));
            };

            var stopped = false;

            recogniser.SessionStopped += (_, e) =>
            {
                results.Enqueue(new RecognitionWord("Session_stopped_event."));
                stopped = true;
            };

            // Return recognised results until cancelled
            await recogniser.StartContinuousRecognitionAsync();

            while (!cancellation.IsCancellationRequested && !stopped)
            {
                if (results.TryDequeue(out var r))
                {
                    yield return(r);
                }
            }

            // Stop receiving further results
            await recogniser.StopContinuousRecognitionAsync();

            // Finish sending remaining results
            foreach (var result in results)
            {
                yield return(result);
            }
        }
        // Translation from microphone.
        public static async Task TranslationWithMicrophoneAsync_withLanguageDetectionEnabled()
        {
            // <TranslationWithMicrophoneAsync>
            // Translation source language.
            // Replace with a language of your choice.
            string fromLanguage = "en-US";

            // Voice name of synthesis output.
            const string GermanVoice = "Microsoft Server Speech Text to Speech Voice (de-DE, Hedda)";

            // Creates an instance of a speech translation config with specified subscription key and service region.
            // Replace with your own subscription key and service region (e.g., "westus").
            var config = SpeechTranslationConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");

            // This is required, even when language id is enabled.
            config.SpeechRecognitionLanguage = fromLanguage;
            config.VoiceName = GermanVoice;

            // Translation target language(s).
            // Replace with language(s) of your choice.
            config.AddTargetLanguage("de");

            // Set the language detection require property
            // Please refer to the documentation of language id with different modes
            config.SetProperty(PropertyId.SpeechServiceConnection_SingleLanguageIdPriority, "Latency");
            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { "en-US", "de-DE" });

            // Creates a translation recognizer using microphone as audio input.
            using (var recognizer = new TranslationRecognizer(config, autoDetectSourceLanguageConfig))
            {
                // Subscribes to events.
                recognizer.Recognizing += (s, e) =>
                {
                    Console.WriteLine($"RECOGNIZING Text={e.Result.Text}");
                    foreach (var element in e.Result.Translations)
                    {
                        Console.WriteLine($" TRANSLATING into '{element.Key}': {element.Value}");
                    }
                };

                recognizer.Recognized += (s, e) =>
                {
                    if (e.Result.Reason == ResultReason.TranslatedSpeech)
                    {
                        Console.WriteLine($"RECOGNIZED Text={e.Result.Text}");
                        foreach (var element in e.Result.Translations)
                        {
                            Console.WriteLine($"    TRANSLATED into '{element.Key}': {element.Value}");
                        }
                    }
                    else if (e.Result.Reason == ResultReason.RecognizedSpeech)
                    {
                        Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
                        Console.WriteLine($"    Speech not translated.");
                    }
                    else if (e.Result.Reason == ResultReason.NoMatch)
                    {
                        Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                    }
                };

                recognizer.Synthesizing += (s, e) =>
                {
                    var audio = e.Result.GetAudio();
                    Console.WriteLine(audio.Length != 0
                        ? $"AudioSize: {audio.Length}"
                        : $"AudioSize: {audio.Length} (end of synthesis data)");

                    if (audio.Length > 0)
                    {
#if NET461
                        using (var m = new MemoryStream(audio))
                        {
                            SoundPlayer simpleSound = new SoundPlayer(m);
                            simpleSound.PlaySync();
                        }
#endif
                    }
                };

                recognizer.Canceled += (s, e) =>
                {
                    Console.WriteLine($"CANCELED: Reason={e.Reason}");

                    if (e.Reason == CancellationReason.Error)
                    {
                        Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}");
                        Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}");
                        Console.WriteLine($"CANCELED: Did you update the subscription info?");
                    }
                };

                recognizer.SessionStarted += (s, e) =>
                {
                    Console.WriteLine("\nSession started event.");
                };

                recognizer.SessionStopped += (s, e) =>
                {
                    Console.WriteLine("\nSession stopped event.");
                };

                // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition.
                Console.WriteLine("Say something...");
                await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                do
                {
                    Console.WriteLine("Press Enter to stop");
                } while (Console.ReadKey().Key != ConsoleKey.Enter);

                // Stops continuous recognition.
                await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
            }
            // </TranslationWithMicrophoneAsync>
        }