Esempio n. 1
0
        public static async Task TranslateSpeechToText()
        {
            // Sets source and target languages.
            // Replace with the languages of your choice, from list found here: https://aka.ms/speech/sttt-languages
            string fromLanguage = "en-US";
            string toLanguage   = "de";

            config.SpeechRecognitionLanguage = fromLanguage;
            config.AddTargetLanguage(toLanguage);

            // Creates a translation recognizer using the default microphone audio input device.
            using (var recognizer = new TranslationRecognizer(config))
            {
                // Starts translation, and returns after a single utterance is recognized. The end of a
                // single utterance is determined by listening for silence at the end or until a maximum of 15
                // seconds of audio is processed. The task returns the recognized text as well as the translation.
                // Note: Since RecognizeOnceAsync() returns only a single utterance, it is suitable only for single
                // shot recognition like command or query.
                // For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead.
                Console.WriteLine("Say something...");
                var result = await recognizer.RecognizeOnceAsync();

                // Checks result.
                if (result.Reason == ResultReason.TranslatedSpeech)
                {
                    Console.WriteLine($"RECOGNIZED '{fromLanguage}': {result.Text}");
                    Console.WriteLine($"TRANSLATED into '{toLanguage}': {result.Translations[toLanguage]}");
                }
                else if (result.Reason == ResultReason.RecognizedSpeech)
                {
                    Console.WriteLine($"RECOGNIZED '{fromLanguage}': {result.Text} (text could not be translated)");
                }
                else if (result.Reason == ResultReason.NoMatch)
                {
                    Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                }
                else if (result.Reason == ResultReason.Canceled)
                {
                    var cancellation = CancellationDetails.FromResult(result);
                    Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

                    if (cancellation.Reason == CancellationReason.Error)
                    {
                        Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                        Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
                        Console.WriteLine($"CANCELED: Did you update the subscription info?");
                    }
                }
            }
        }
Esempio n. 2
0
        static async Task Main(string[] args)
        {
            try
            {
                // Get config settings from AppSettings
                IConfigurationBuilder builder       = new ConfigurationBuilder().AddJsonFile("appsettings.json");
                IConfigurationRoot    configuration = builder.Build();
                string cogSvcKey    = configuration["CognitiveServiceKey"];
                string cogSvcRegion = configuration["CognitiveServiceRegion"];


                // Set a dictionary of supported voices
                var voices = new Dictionary <string, string>
                {
                    ["fr"] = "fr-FR-Julie",
                    ["es"] = "es-ES-Laura",
                    ["hi"] = "hi-IN-Kalpana"
                };

                // Configure translation
                translationConfig = SpeechTranslationConfig.FromSubscription(cogSvcKey, cogSvcRegion);
                translationConfig.SpeechRecognitionLanguage = "en-US";
                Console.WriteLine("Ready to translate from " + translationConfig.SpeechRecognitionLanguage);


                string targetLanguage = "";
                while (targetLanguage != "quit")
                {
                    Console.WriteLine("\nEnter a target language\n fr = French\n es = Spanish\n hi = Hindi\n Enter anything else to stop\n");
                    targetLanguage = Console.ReadLine().ToLower();
                    // Check if the user has requested a language that this app supports
                    if (voices.ContainsKey(targetLanguage))
                    {
                        // Because the synthesised speech event only supports 1:1 translation, we'll remove any languages already in the translationconfig
                        if (translationConfig.TargetLanguages.Count > 1)
                        {
                            foreach (string language in translationConfig.TargetLanguages)
                            {
                                translationConfig.RemoveTargetLanguage(language);
                            }
                        }

                        // and add the requested one in
                        translationConfig.AddTargetLanguage(targetLanguage);
                        translationConfig.VoiceName = voices[targetLanguage];
                        await Translate(targetLanguage);
                    }
                    else
                    {
                        targetLanguage = "quit";
                    }
                }
            }
            catch (Exception ex) { Console.WriteLine(ex.Message); }
        }
Esempio n. 3
0
 private async Task StartSpeechTranscriptionEngine(string recognitionLanguage, string targetLanguage)
 {
     _translationConfig.SpeechRecognitionLanguage = recognitionLanguage;
     _translationConfig.AddTargetLanguage(targetLanguage);
     _speechConfig.SpeechRecognitionLanguage = targetLanguage;
     _speechConfig.SpeechSynthesisLanguage   = targetLanguage;
     _synthesizer            = new SpeechSynthesizer(_speechConfig, _output);
     _recognizer             = new TranslationRecognizer(_translationConfig, _audioInput);
     _recognizer.Recognized += RecognizerRecognized;
     await _recognizer.StartContinuousRecognitionAsync();
 }
    /// <summary>
    /// Creates a class-level Translation Recognizer for a specific language using Azure credentials
    /// and hooks-up lifecycle & recognition events. Translation can be enabled with one or more target
    /// languages translated simultaneously
    /// </summary>
    void CreateTranslationRecognizer()
    {
        Debug.Log("Creating Translation Recognizer.");
        recognizedString = "Initializing speech recognition with translation, please wait...";

        if (translator == null)
        {
            SpeechTranslationConfig config = SpeechTranslationConfig.FromSubscription(SpeechServiceAPIKey, SpeechServiceRegion);
            config.SpeechRecognitionLanguage = fromLanguage;
            if (Languages1.captionText.text.Length > 0)
            {
                config.AddTargetLanguage(ExtractLanguageCode(Languages1.captionText.text));
            }
            if (Languages2.captionText.text.Length > 0)
            {
                config.AddTargetLanguage(ExtractLanguageCode(Languages2.captionText.text));
            }
            if (Languages3.captionText.text.Length > 0)
            {
                config.AddTargetLanguage(ExtractLanguageCode(Languages3.captionText.text));
            }
            translator = new TranslationRecognizer(config);

            if (translator != null)
            {
                translator.Recognizing         += RecognizingTranslationHandler;
                translator.Recognized          += RecognizedTranslationHandler;
                translator.SpeechStartDetected += SpeechStartDetectedHandler;
                translator.SpeechEndDetected   += SpeechEndDetectedHandler;
                translator.Canceled            += CanceledTranslationHandler;
                translator.SessionStarted      += SessionStartedHandler;
                translator.SessionStopped      += SessionStoppedHandler;
            }
        }
        Debug.Log("CreateTranslationRecognizer exit");
    }
Esempio n. 5
0
    void Start()
    {
        if (outputText == null)
        {
            UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it.");
        }
        else if (recoButton == null)
        {
            _message = "recoButton property is null! Assign a UI Button to it.";
            UnityEngine.Debug.LogError(_message);
        }
        else
        {
            // Continue with normal initialization, Text and Button objects are present.
#if PLATFORM_ANDROID
            // Request to use the microphone, cf.
            // https://docs.unity3d.com/Manual/android-RequestingPermissions.html
            message = "Waiting for mic permission";
            if (!Permission.HasUserAuthorizedPermission(Permission.Microphone))
            {
                Permission.RequestUserPermission(Permission.Microphone);
            }
#elif PLATFORM_IOS
            if (!Application.HasUserAuthorization(UserAuthorization.Microphone))
            {
                Application.RequestUserAuthorization(UserAuthorization.Microphone);
            }
#else
            _micPermissionGranted = true;
            _message = "Click button to recognize speech";
#endif
            _config = SpeechTranslationConfig.FromSubscription(SubscriptionKey, SubscriptionRegion);
            _config.SpeechRecognitionLanguage = "es-US";
            _config.AddTargetLanguage("en-US");
            _pushStream              = AudioInputStream.CreatePushStream();
            _audioInput              = AudioConfig.FromStreamInput(_pushStream);
            _recognizer              = new TranslationRecognizer(_config, _audioInput);
            _recognizer.Recognizing += RecognizingHandler;
            _recognizer.Recognized  += RecognizedHandler;
            _recognizer.Canceled    += CanceledHandler;

            foreach (var device in Microphone.devices)
            {
                Debug.Log("DeviceName: " + device);
            }
            _audioSource = GameObject.Find("AudioSource").GetComponent <AudioSource>();
        }
    }
Esempio n. 6
0
    void CreateTranslationRecognizer()
    {
        if (translator == null)
        {
            SpeechTranslationConfig config = SpeechTranslationConfig.FromSubscription(lunarcomController.SpeechServiceAPIKey, lunarcomController.SpeechServiceRegion);
            config.SpeechRecognitionLanguage = fromLanguage;
            config.AddTargetLanguage(toLanguage);

            translator = new TranslationRecognizer(config);

            if (translator != null)
            {
                translator.Recognizing    += HandleTranslatorRecognizing;
                translator.Recognized     += HandleTranslatorRecognized;
                translator.Canceled       += HandleTranslatorCanceled;
                translator.SessionStarted += HandleTranslatorSessionStarted;
                translator.SessionStopped += HandleTranslatorSessionStopped;
            }
        }
    }
Esempio n. 7
0
        private SpeechTranslationConfig createSpeechTranslationConfig(String logId, Key key, string sourceLanguage, List <string> languages)
        {
            SpeechTranslationConfig speechConfig = SpeechTranslationConfig.FromSubscription(key.ApiKey, key.Region);

            speechConfig.RequestWordLevelTimestamps();
            if (!IsSupportedRecognition(sourceLanguage))
            {
                _logger.LogError($"{logId}: !!!! Unknown recognition language ({sourceLanguage})! Recogition may fail ...");
            }
            speechConfig.SpeechRecognitionLanguage = sourceLanguage;

            _logger.LogInformation($"{logId}: Requested output languages: { String.Join(",", languages) }, source = ({sourceLanguage})");
            String shortCodeSource = sourceLanguage.Split('-')[0].ToLower();

            foreach (var language in languages)
            {
                String shortCodeTarget = language.Split('-')[0].ToLower();
                if (shortCodeSource == shortCodeTarget)
                {
                    continue;
                }
                if (IsSupportedTranslation(language))
                {
                    _logger.LogInformation($"{logId}: Adding target {language}");
                    speechConfig.AddTargetLanguage(language);
                }
                else
                {
                    _logger.LogWarning($"{logId}: Skipping unsupported target {language}");
                }
            }



            speechConfig.OutputFormat = OutputFormat.Detailed;
            return(speechConfig);
        }
Esempio n. 8
0
        public static async Task TranslationContinuousRecognitionAsync(SpeechTranslationConfig config)
        {
            byte[] audio        = null;
            string fromLanguage = "en-US";

            #region LanguageDetection

            /*SpeechConfig speechConfig = SpeechConfig.FromEndpoint(new System.Uri(ConfigurationManager.AppSettings.Get("SpeechEndpoint")), ConfigurationManager.AppSettings.Get("TTSKey"));
             * AudioConfig audioConfig = AudioConfig.FromDefaultMicrophoneInput();
             * string fromLanguage = string.Empty;
             * AutoDetectSourceLanguageConfig autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig
             *                                          .FromLanguages(new string[] { "en-US", "ru-RU" });
             * using (var recognizer = new SpeechRecognizer(
             *  speechConfig,
             *  autoDetectSourceLanguageConfig,
             *  audioConfig))
             * {
             *  Console.WriteLine("Say something...");
             *  var speechRecognitionResult = await recognizer.RecognizeOnceAsync();
             *  var autoDetectSourceLanguageResult =
             *      AutoDetectSourceLanguageResult.FromResult(speechRecognitionResult);
             *  fromLanguage = autoDetectSourceLanguageResult.Language;
             *  Console.WriteLine("I recognized " + speechRecognitionResult.Text + " in " + fromLanguage);
             * }*/
            #endregion
            config.SpeechRecognitionLanguage = fromLanguage;
            config.AddTargetLanguage("de");

            const string GermanVoice = "de-DE-Hedda";
            config.VoiceName = GermanVoice;
            // Creates a translation recognizer using microphone as audio input.
            using (var recognizer = new TranslationRecognizer(config))
            {
                recognizer.Recognizing += (s, e) =>
                {
                    Console.WriteLine($"RECOGNIZING in '{fromLanguage}': Text={e.Result.Text}");
                    foreach (var element in e.Result.Translations)
                    {
                        Console.WriteLine($"    TRANSLATING into '{element.Key}': {element.Value}");
                    }
                };

                recognizer.Recognized += (s, e) =>
                {
                    if (e.Result.Reason == ResultReason.TranslatedSpeech)
                    {
                        Console.WriteLine($"\nFinal result: Reason: {e.Result.Reason.ToString()}, recognized text in {fromLanguage}: {e.Result.Text}.");
                        foreach (var element in e.Result.Translations)
                        {
                            Console.WriteLine($"    TRANSLATING into '{element.Key}': {element.Value}");
                        }
                    }
                };

                recognizer.Synthesizing += (s, e) =>
                {
                    audio = e.Result.GetAudio();
                    Console.WriteLine(audio.Length != 0
                        ? $"AudioSize: {audio.Length}"
                        : $"AudioSize: {audio.Length} (end of synthesis data)");
                    using (MemoryStream ms = new MemoryStream(audio))
                    {
                        SoundPlayer player = new SoundPlayer();
                        player.Stream          = null;
                        player.Stream          = ms;
                        player.Stream.Position = 0;
                        player.PlaySync();
                    }
                };

                recognizer.Canceled += (s, e) =>
                {
                    Console.WriteLine($"\nRecognition canceled. Reason: {e.Reason}; ErrorDetails: {e.ErrorDetails}");
                };

                recognizer.SessionStarted += (s, e) =>
                {
                    Console.WriteLine("\nSession started event.");
                };

                recognizer.SessionStopped += (s, e) =>
                {
                    Console.WriteLine("\nSession stopped event.");
                };

                // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition.
                Console.WriteLine("Say something...");
                await recognizer.RecognizeOnceAsync();//.StartContinuousRecognitionAsync().ConfigureAwait(false);

                do
                {
                    Console.WriteLine("Press Enter to stop");
                } while (Console.ReadKey().Key != ConsoleKey.Enter);


                // Stops continuous recognition.
                await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
            }
        }
 public void SetSpeechLanguage(string language, string translationLanguage, string voice)
 {
     speechConfiguration.SpeechRecognitionLanguage = language;
     speechConfiguration.AddTargetLanguage(translationLanguage);
     speechConfiguration.VoiceName = voice;
 }
        /// <summary>
        /// Using an audio stream, get the translation of that audio file
        /// </summary>
        /// <param name="audioData"></param>
        /// <param name="fromLanguage"></param>
        /// <param name="toLanguages"></param>
        /// <returns></returns>
        public async Task <TranslationRecognitionResult> TranslateAudioStream(byte[] audioData, string fromLanguage, IList <string> toLanguages)
        {
            if (!_availableServices.Contains(AzureServiceType.Speech))
            {
                return(null);
            }

            _speechSemaphore.Wait();
            try
            {
                TranslationRecognitionResult result;

                StorageFolder localFolder = ApplicationData.Current.LocalFolder;

                //TODO Update to use PullAudioInputStream
                StorageFile storageFile = await localFolder.CreateFileAsync("AudioFromStream.wav", CreationCollisionOption.ReplaceExisting);

                using (var stream = await storageFile.OpenStreamForWriteAsync())
                {
                    await stream.WriteAsync(audioData, 0, audioData.Count());

                    stream.Close();
                }

                var audioConfig = AudioConfig.FromWavFileInput(storageFile.Path);
                _speechTranslationConfig.SpeechRecognitionLanguage = fromLanguage;

                foreach (string language in toLanguages)
                {
                    _speechTranslationConfig.AddTargetLanguage(language);
                }

                using (var translationRecognizer = new TranslationRecognizer(_speechTranslationConfig, audioConfig))
                {
                    result = await translationRecognizer.RecognizeOnceAsync();
                }

                if (result.Reason == ResultReason.Canceled)
                {
                    var cancellation = CancellationDetails.FromResult(result);
                    _logger.LogWarning($"Call cancelled.  {cancellation.Reason}");

                    if (cancellation.Reason == CancellationReason.Error)
                    {
                        _logger.Log($"Cancel error code = {cancellation.ErrorCode}");
                        _logger.Log($"Cancel details = {cancellation.ErrorDetails}");

                        if (cancellation.ErrorCode == CancellationErrorCode.NoError)
                        {
                            _logger.Log("You may be having an authorization issue, are your keys correct and up to date?");
                        }
                    }
                }
                else if (result.Reason == ResultReason.TranslatedSpeech)
                {
                    _logger.Log($"Azure Translation. '{result.Reason}': {result.Text}");
                }
                return(result);
            }
            catch (Exception ex)
            {
                string message = "Failed processing image.";
                _logger.Log(message, ex);
                return(null);
            }
            finally
            {
                _speechSemaphore.Release();
            }
        }