Beispiel #1
0
        /// <summary>
        /// Define an operator that uses the Bing Speech Recognizer to translate from audio to text
        /// </summary>
        /// <param name="audio">Our audio stream</param>
        /// <param name="speechDetector">A stream that indicates whether the user is speaking</param>
        /// <returns>A new producer with the translated speech and time stamp</returns>
        public static IProducer <(string, TimeSpan)> SpeechToText(this IProducer <AudioBuffer> audio, IProducer <bool> speechDetector)
        {
            var speechRecognizer = new BingSpeechRecognizer(audio.Out.Pipeline, new BingSpeechRecognizerConfiguration()
            {
                SubscriptionKey = bingSubscriptionKey
            });

            audio.Join(speechDetector).PipeTo(speechRecognizer);
            return(speechRecognizer.Where(r => r.IsFinal).Select(r => (r.Text, r.Duration.Value)));
        }
Beispiel #2
0
        /// <summary>
        /// Builds and runs a speech recognition pipeline using the Bing speech recognizer. Requires a valid Cognitive Services
        /// subscription key. See https://docs.microsoft.com/en-us/azure/cognitive-services/cognitive-services-apis-create-account.
        /// </summary>
        /// <remarks>
        /// If you are getting a <see cref="System.InvalidOperationException"/> with the message 'BingSpeechRecognizer returned
        /// OnConversationError with error code: LoginFailed. Original error text: Transport error', this most likely is due to
        /// an invalid subscription key. Please check your Azure portal at https://portal.azure.com and ensure that you have
        /// added a subscription to the Bing Speech API on your account.
        /// </remarks>
        public static void RunBingSpeech()
        {
            // Create the pipeline object.
            using (Pipeline pipeline = Pipeline.Create())
            {
                // Create the AudioSource component to capture audio from the default device in 16 kHz 1-channel
                // PCM format as required by both the voice activity detector and speech recognition components.
                IProducer <AudioBuffer> audioInput = new AudioSource(pipeline, new AudioConfiguration()
                {
                    DeviceName = "plughw:0,0", Format = WaveFormat.Create16kHz1Channel16BitPcm()
                });

                // Perform voice activity detection using the voice activity detector component
                var vad = new SimpleVoiceActivityDetector(pipeline);
                audioInput.PipeTo(vad);

                // Create Bing speech recognizer component
                var recognizer = new BingSpeechRecognizer(pipeline, new BingSpeechRecognizerConfiguration()
                {
                    SubscriptionKey = Program.bingSubscriptionKey, RecognitionMode = SpeechRecognitionMode.Interactive
                });

                // The input audio to the Bing speech recognizer needs to be annotated with a voice activity flag.
                // This can be constructed by using the Psi Join() operator to combine the audio and VAD streams.
                var annotatedAudio = audioInput.Join(vad);

                // Subscribe the recognizer to the annotated audio
                annotatedAudio.PipeTo(recognizer);

                // Partial and final speech recognition results are posted on the same stream. Here
                // we use Psi's Where() operator to filter out only the final recognition results.
                var finalResults = recognizer.Out.Where(result => result.IsFinal);

                // Print the recognized text of the final recognition result to the console.
                finalResults.Do(result => Console.WriteLine(result.Text));

                // Register an event handler to catch pipeline errors
                pipeline.PipelineCompletionEvent += PipelineCompletionEvent;

                // Run the pipeline
                pipeline.RunAsync();

                // Bing speech transcribes speech to text
                Console.WriteLine("Say anything");

                Console.WriteLine("Press any key to exit...");
                Console.ReadKey(true);
            }
        }
Beispiel #3
0
        /// <summary>
        /// Builds and runs a speech recognition pipeline using the Bing speech recognizer. Requires a valid Cognitive Services
        /// subscription key. See https://docs.microsoft.com/en-us/azure/cognitive-services/cognitive-services-apis-create-account.
        /// </summary>
        /// <remarks>
        /// If you are getting a <see cref="System.InvalidOperationException"/> with the message 'BingSpeechRecognizer returned
        /// OnConversationError with error code: LoginFailed. Original error text: Transport error', this most likely is due to
        /// an invalid subscription key. Please check your Azure portal at https://portal.azure.com and ensure that you have
        /// added a subscription to the Bing Speech API on your account.
        /// </remarks>
        /// <param name="outputLogPath">The path under which to write log data.</param>
        /// <param name="inputLogPath">The path from which to read audio input data.</param>
        public static void RunBingSpeech(string outputLogPath = null, string inputLogPath = null)
        {
            // Create the pipeline object.
            using (Pipeline pipeline = Pipeline.Create())
            {
                // Use either live audio from the microphone or audio from a previously saved log
                IProducer <AudioBuffer> audioInput = null;
                if (inputLogPath != null)
                {
                    // Open the MicrophoneAudio stream from the last saved log
                    var store = Store.Open(pipeline, Program.AppName, inputLogPath);
                    audioInput = store.OpenStream <AudioBuffer>($"{Program.AppName}.MicrophoneAudio");
                }
                else
                {
                    // Create the AudioCapture component to capture audio from the default device in 16 kHz 1-channel
                    // PCM format as required by both the voice activity detector and speech recognition components.
                    audioInput = new AudioCapture(pipeline, new AudioCaptureConfiguration()
                    {
                        OutputFormat = WaveFormat.Create16kHz1Channel16BitPcm()
                    });
                }

                // Perform voice activity detection using the voice activity detector component
                var vad = new SystemVoiceActivityDetector(pipeline);
                audioInput.PipeTo(vad);

                // Create Bing speech recognizer component
                var recognizer = new BingSpeechRecognizer(pipeline, new BingSpeechRecognizerConfiguration()
                {
                    SubscriptionKey = Program.bingSubscriptionKey, RecognitionMode = SpeechRecognitionMode.Interactive
                });

                // The input audio to the Bing speech recognizer needs to be annotated with a voice activity flag.
                // This can be constructed by using the Psi Join() operator to combine the audio and VAD streams.
                var annotatedAudio = audioInput.Join(vad);

                // Subscribe the recognizer to the annotated audio
                annotatedAudio.PipeTo(recognizer);

                // Partial and final speech recognition results are posted on the same stream. Here
                // we use Psi's Where() operator to filter out only the final recognition results.
                var finalResults = recognizer.Out.Where(result => result.IsFinal);

                // Print the recognized text of the final recognition result to the console.
                finalResults.Do(result => Console.WriteLine(result.Text));

                // Create a data store to log the data to if necessary. A data store is necessary
                // only if output logging is enabled.
                var dataStore = CreateDataStore(pipeline, outputLogPath);

                // For disk logging only
                if (dataStore != null)
                {
                    // Log the microphone audio and recognition results
                    audioInput.Write($"{Program.AppName}.MicrophoneAudio", dataStore);
                    finalResults.Write($"{Program.AppName}.FinalRecognitionResults", dataStore);
                    vad.Write($"{Program.AppName}.VoiceActivity", dataStore);
                }

                // Register an event handler to catch pipeline errors
                pipeline.PipelineCompletionEvent += PipelineCompletionEvent;

                // Run the pipeline
                pipeline.RunAsync();

                // Bing speech transcribes speech to text
                Console.WriteLine("Say anything");

                Console.WriteLine("Press any key to exit...");
                Console.ReadKey(true);
            }
        }