/// <summary> /// Define an operator that uses the Bing Speech Recognizer to translate from audio to text /// </summary> /// <param name="audio">Our audio stream</param> /// <param name="speechDetector">A stream that indicates whether the user is speaking</param> /// <returns>A new producer with the translated speech and time stamp</returns> public static IProducer <(string, TimeSpan)> SpeechToText(this IProducer <AudioBuffer> audio, IProducer <bool> speechDetector) { var speechRecognizer = new BingSpeechRecognizer(audio.Out.Pipeline, new BingSpeechRecognizerConfiguration() { SubscriptionKey = bingSubscriptionKey }); audio.Join(speechDetector).PipeTo(speechRecognizer); return(speechRecognizer.Where(r => r.IsFinal).Select(r => (r.Text, r.Duration.Value))); }
/// <summary> /// Builds and runs a speech recognition pipeline using the Bing speech recognizer. Requires a valid Cognitive Services /// subscription key. See https://docs.microsoft.com/en-us/azure/cognitive-services/cognitive-services-apis-create-account. /// </summary> /// <remarks> /// If you are getting a <see cref="System.InvalidOperationException"/> with the message 'BingSpeechRecognizer returned /// OnConversationError with error code: LoginFailed. Original error text: Transport error', this most likely is due to /// an invalid subscription key. Please check your Azure portal at https://portal.azure.com and ensure that you have /// added a subscription to the Bing Speech API on your account. /// </remarks> public static void RunBingSpeech() { // Create the pipeline object. using (Pipeline pipeline = Pipeline.Create()) { // Create the AudioSource component to capture audio from the default device in 16 kHz 1-channel // PCM format as required by both the voice activity detector and speech recognition components. IProducer <AudioBuffer> audioInput = new AudioSource(pipeline, new AudioConfiguration() { DeviceName = "plughw:0,0", Format = WaveFormat.Create16kHz1Channel16BitPcm() }); // Perform voice activity detection using the voice activity detector component var vad = new SimpleVoiceActivityDetector(pipeline); audioInput.PipeTo(vad); // Create Bing speech recognizer component var recognizer = new BingSpeechRecognizer(pipeline, new BingSpeechRecognizerConfiguration() { SubscriptionKey = Program.bingSubscriptionKey, RecognitionMode = SpeechRecognitionMode.Interactive }); // The input audio to the Bing speech recognizer needs to be annotated with a voice activity flag. // This can be constructed by using the Psi Join() operator to combine the audio and VAD streams. var annotatedAudio = audioInput.Join(vad); // Subscribe the recognizer to the annotated audio annotatedAudio.PipeTo(recognizer); // Partial and final speech recognition results are posted on the same stream. Here // we use Psi's Where() operator to filter out only the final recognition results. var finalResults = recognizer.Out.Where(result => result.IsFinal); // Print the recognized text of the final recognition result to the console. finalResults.Do(result => Console.WriteLine(result.Text)); // Register an event handler to catch pipeline errors pipeline.PipelineCompletionEvent += PipelineCompletionEvent; // Run the pipeline pipeline.RunAsync(); // Bing speech transcribes speech to text Console.WriteLine("Say anything"); Console.WriteLine("Press any key to exit..."); Console.ReadKey(true); } }
/// <summary> /// Builds and runs a speech recognition pipeline using the Bing speech recognizer. Requires a valid Cognitive Services /// subscription key. See https://docs.microsoft.com/en-us/azure/cognitive-services/cognitive-services-apis-create-account. /// </summary> /// <remarks> /// If you are getting a <see cref="System.InvalidOperationException"/> with the message 'BingSpeechRecognizer returned /// OnConversationError with error code: LoginFailed. Original error text: Transport error', this most likely is due to /// an invalid subscription key. Please check your Azure portal at https://portal.azure.com and ensure that you have /// added a subscription to the Bing Speech API on your account. /// </remarks> /// <param name="outputLogPath">The path under which to write log data.</param> /// <param name="inputLogPath">The path from which to read audio input data.</param> public static void RunBingSpeech(string outputLogPath = null, string inputLogPath = null) { // Create the pipeline object. using (Pipeline pipeline = Pipeline.Create()) { // Use either live audio from the microphone or audio from a previously saved log IProducer <AudioBuffer> audioInput = null; if (inputLogPath != null) { // Open the MicrophoneAudio stream from the last saved log var store = Store.Open(pipeline, Program.AppName, inputLogPath); audioInput = store.OpenStream <AudioBuffer>($"{Program.AppName}.MicrophoneAudio"); } else { // Create the AudioCapture component to capture audio from the default device in 16 kHz 1-channel // PCM format as required by both the voice activity detector and speech recognition components. audioInput = new AudioCapture(pipeline, new AudioCaptureConfiguration() { OutputFormat = WaveFormat.Create16kHz1Channel16BitPcm() }); } // Perform voice activity detection using the voice activity detector component var vad = new SystemVoiceActivityDetector(pipeline); audioInput.PipeTo(vad); // Create Bing speech recognizer component var recognizer = new BingSpeechRecognizer(pipeline, new BingSpeechRecognizerConfiguration() { SubscriptionKey = Program.bingSubscriptionKey, RecognitionMode = SpeechRecognitionMode.Interactive }); // The input audio to the Bing speech recognizer needs to be annotated with a voice activity flag. // This can be constructed by using the Psi Join() operator to combine the audio and VAD streams. var annotatedAudio = audioInput.Join(vad); // Subscribe the recognizer to the annotated audio annotatedAudio.PipeTo(recognizer); // Partial and final speech recognition results are posted on the same stream. Here // we use Psi's Where() operator to filter out only the final recognition results. var finalResults = recognizer.Out.Where(result => result.IsFinal); // Print the recognized text of the final recognition result to the console. finalResults.Do(result => Console.WriteLine(result.Text)); // Create a data store to log the data to if necessary. A data store is necessary // only if output logging is enabled. var dataStore = CreateDataStore(pipeline, outputLogPath); // For disk logging only if (dataStore != null) { // Log the microphone audio and recognition results audioInput.Write($"{Program.AppName}.MicrophoneAudio", dataStore); finalResults.Write($"{Program.AppName}.FinalRecognitionResults", dataStore); vad.Write($"{Program.AppName}.VoiceActivity", dataStore); } // Register an event handler to catch pipeline errors pipeline.PipelineCompletionEvent += PipelineCompletionEvent; // Run the pipeline pipeline.RunAsync(); // Bing speech transcribes speech to text Console.WriteLine("Say anything"); Console.WriteLine("Press any key to exit..."); Console.ReadKey(true); } }