//private static System.Speech.Recognition.SpeechRecognizedEventArgs WakeUpWordResults; public static async System.Threading.Tasks.Task <string> MicrophoneToTextAsync() { CallServices <ISpeechToTextService, ISpeechToTextServiceResponse> wSTT = new CallServices <ISpeechToTextService, ISpeechToTextServiceResponse>(null); System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language); using (System.Speech.Recognition.SpeechRecognitionEngine RecognitionEngine = new System.Speech.Recognition.SpeechRecognitionEngine(ci)) { RecognitionEngine.SetInputToDefaultAudioDevice(); RecognitionEngine.LoadGrammar(new System.Speech.Recognition.DictationGrammar()); System.Speech.Recognition.RecognitionResult WakeUpWordResult = RecognitionEngine.Recognize(); if (WakeUpWordResult == null) { return(null); } using (System.IO.FileStream waveStream = new System.IO.FileStream(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName, System.IO.FileMode.Create)) { WakeUpWordResult.Audio.WriteToWaveStream(waveStream); waveStream.Flush(); waveStream.Close(); } byte[] bytes = await Helpers.ReadBytesFromFileAsync(Options.options.audio.speechSynthesisFileName); System.IO.MemoryStream stream = new System.IO.MemoryStream(bytes); string text = WakeUpWordResult.Text; int sampleRate = await Audio.GetSampleRateAsync(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName); System.Collections.Generic.Dictionary <string, string> apiArgs = new System.Collections.Generic.Dictionary <string, string>() { { "sampleRate", sampleRate.ToString() } }; foreach (ISpeechToTextService STT in new FindServices <ISpeechToTextService>(Options.commandservices["SpeechToText"].preferredServices).PreferredOrderingOfServices) { // ISpeechToTextService STT = (ISpeechToTextService)constructor.Invoke(Type.EmptyTypes);; SpeechToTextServiceResponse r = await STT.SpeechToTextServiceAsync(bytes, apiArgs); if (r.StatusCode == 200) { text = r.ResponseResult; Console.WriteLine(r.Service.name + ":\"" + text + "\" Total Elapsed ms:" + r.TotalElapsedMilliseconds + " Request Elapsed ms:" + r.RequestElapsedMilliseconds); return(text); } else { Console.WriteLine(r.Service.name + " not available."); } } CallServiceResponse <ISpeechToTextServiceResponse> response = await wSTT.CallServiceAsync(bytes, apiArgs); text = response.ResponseResult; Console.WriteLine("Windows STT (default):\"" + text + "\" Total Elapsed ms:" + response.TotalElapsedMilliseconds + " Request Elapsed ms:" + response.RequestElapsedMilliseconds); return(text); } }
public static async System.Threading.Tasks.Task <string> SpeechToTextServiceAsync(System.IO.MemoryStream stream) { System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language); using (System.Speech.Recognition.SpeechRecognitionEngine RecognitionEngine = new System.Speech.Recognition.SpeechRecognitionEngine(ci)) { RecognitionEngine.SetInputToWaveStream(stream); RecognitionEngine.LoadGrammar(new System.Speech.Recognition.DictationGrammar()); System.Speech.Recognition.RecognitionResult result = RecognitionEngine.Recognize(); if (result == null) { return("Speech.RecognitionEngine.Recognize returned null result"); } return(result.Text); } }
public static async System.Threading.Tasks.Task <string> SpeechToTextServicesAsync(string fileName, System.Collections.Generic.Dictionary <string, string> apiArgs) { if (System.IO.File.Exists(Options.options.tempFolderPath + fileName)) { System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language); using (System.Speech.Recognition.SpeechRecognitionEngine RecognitionEngine = new System.Speech.Recognition.SpeechRecognitionEngine(ci)) { RecognitionEngine.SetInputToWaveFile(Options.options.tempFolderPath + fileName); RecognitionEngine.LoadGrammar(new System.Speech.Recognition.DictationGrammar()); System.Speech.Recognition.RecognitionResult result = RecognitionEngine.Recognize(); return(result.Text); } } return(null); }
public void speechToTextInit() { //This method is used for converting speech into text //Creates an instance of the system.speech speech recognition engine for use with speech to text stt_sre = new System.Speech.Recognition.SpeechRecognitionEngine(); //Uses dictation grammar to allow freedom of speech for entering any word detected System.Speech.Recognition.Grammar dictationGrammar = new System.Speech.Recognition.DictationGrammar(); //Loads the dictation grammar into the speech recognition engine stt_sre.LoadGrammar(dictationGrammar); try { //Try catch is used here to catch any invalid operation exceptions that could occur when detecting speech to text stt_sre.SetInputToDefaultAudioDevice(); //Saves result from speech recognition as a recognition result object System.Speech.Recognition.RecognitionResult result = stt_sre.Recognize(); if (result != null) { //Speech result is set to the string of the result to be used in speech to text speechResult = result.Text; //Removes any spaces to prevent inconsistencies speechResult.Replace(",", ""); Console.WriteLine("The result is: " + speechResult); try { //Used in passing results from speech to text to other classes speechInputCheck(speechResult); } catch (NullReferenceException null_ref) { //Used in catching if the speechResult is null, showing that the exception has been thrown and its source Console.WriteLine("NullReferenceException thrown in speech: " + null_ref.Message + null_ref.Source); } } } catch (InvalidOperationException invalid) { Console.WriteLine("InvalidOperationException in speech: " + invalid.Message + invalid.Source); } finally { //Unloads the speech recognition engine once finished stt_sre.UnloadAllGrammars(); } }
private void bVoice_Click(object sender, EventArgs e) { if (MonoCompat.IsMono) { Logger.Log(LogType.Warning, "Voice commands are for windows operating systems only"); return; } //if button was already clicked, cancel if (listening) { listening = false; bVoice.ForeColor = System.Drawing.Color.Black; return; } System.Speech.Recognition.SpeechRecognitionEngine engine = new System.Speech.Recognition.SpeechRecognitionEngine(); bVoice.ForeColor = System.Drawing.Color.Aqua; System.Speech.Recognition.Choices commands = new System.Speech.Recognition.Choices(); commands.Add(new string[] { "restart", "shutdown", "status report", "players", "help" }); System.Speech.Recognition.Grammar gr = new System.Speech.Recognition.Grammar(new System.Speech.Recognition.GrammarBuilder(commands)); try { listening = true; engine.RequestRecognizerUpdate(); engine.LoadGrammar(gr); engine.SpeechRecognized += engine_SpeechRecognized; engine.SetInputToDefaultAudioDevice(); engine.RecognizeAsync(System.Speech.Recognition.RecognizeMode.Multiple); engine.Recognize(); } catch { return; } }
public static async System.Threading.Tasks.Task <string[]> WaitForWakeUpWordThenRecognizeRemainingSpeechAsync(string[] WakeUpWords) { Console.WriteLine("Say the wakeup word (" + string.Join(" ", WakeUpWords) + ") followed by the request ..."); CallServices <ISpeechToTextService, ISpeechToTextServiceResponse> wSTT = new CallServices <ISpeechToTextService, ISpeechToTextServiceResponse>(null); System.Diagnostics.Stopwatch stopWatch = new System.Diagnostics.Stopwatch(); stopWatch.Start(); // continues until return System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language); while (true) { using (System.Speech.Recognition.SpeechRecognitionEngine RecognitionEngine = new System.Speech.Recognition.SpeechRecognitionEngine(ci)) { RecognitionEngine.SetInputToDefaultAudioDevice(); // build wakeup word grammar System.Speech.Recognition.GrammarBuilder wakeUpWordBuilder = new System.Speech.Recognition.GrammarBuilder(); wakeUpWordBuilder.Append(new System.Speech.Recognition.Choices(WakeUpWords)); // build words-after-wakeup word grammar System.Speech.Recognition.GrammarBuilder wordsAfterWakeUpWordBuilder = new System.Speech.Recognition.GrammarBuilder(); wordsAfterWakeUpWordBuilder.AppendWildcard(); System.Speech.Recognition.SemanticResultKey wordsAfterWakeUpWordKey = new System.Speech.Recognition.SemanticResultKey("wordsAfterWakeUpWordKey", wordsAfterWakeUpWordBuilder); wakeUpWordBuilder.Append(new System.Speech.Recognition.SemanticResultKey("wordsAfterWakeUpWordKey", wordsAfterWakeUpWordBuilder)); // initialize recognizer, wait for result, save result to file System.Speech.Recognition.Grammar g = new System.Speech.Recognition.Grammar(wakeUpWordBuilder); RecognitionEngine.LoadGrammar(g); if (Options.options.wakeup.initialSilenceTimeout == -1) { RecognitionEngine.InitialSilenceTimeout = TimeSpan.FromTicks(Int32.MaxValue); // never timeout } else { RecognitionEngine.InitialSilenceTimeout = TimeSpan.FromSeconds(Options.options.wakeup.initialSilenceTimeout); // timesout after this much silence } RecognitionEngine.EndSilenceTimeout = TimeSpan.FromSeconds(Options.options.wakeup.endSilenceTimeout); // maximum silence allowed after hearing wakeup word #if true // experimenting with Babble and other timeouts RecognitionEngine.BabbleTimeout = TimeSpan.FromSeconds(0); #else RecognitionEngine.BabbleTimeout = TimeSpan.FromTicks(UInt32.MaxValue); #endif System.Speech.Recognition.RecognitionResult WakeUpWordResult = RecognitionEngine.Recognize(); // RecognitionResult is null when some unidentified timeout expires around 30 seconds. Can't find a way to make timeouts infinite so just looping. if (WakeUpWordResult == null) { continue; } using (System.IO.FileStream waveStream = new System.IO.FileStream(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName, System.IO.FileMode.Create)) { WakeUpWordResult.Audio.WriteToWaveStream(waveStream); waveStream.Flush(); waveStream.Close(); } Console.WriteLine("Wake up word detected (" + WakeUpWordResult.Words[0].Text + "): confidence:" + WakeUpWordResult.Confidence + " Elapsed Ms:" + stopWatch.ElapsedMilliseconds); if (WakeUpWordResult.Confidence >= Options.options.wakeup.confidence) { byte[] bytes = await Helpers.ReadBytesFromFileAsync(Options.options.audio.speechSynthesisFileName); string text = WakeUpWordResult.Text; int sampleRate = await Audio.GetSampleRateAsync(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName); System.Collections.Generic.Dictionary <string, string> apiArgs = new System.Collections.Generic.Dictionary <string, string>() { { "sampleRate", sampleRate.ToString() } }; #if false // for testing await windows.SpeechToTextAsync(bytes, apiArgs); Console.WriteLine("Windows STT (demo):\"" + windows.ResponseResult + "\" Total Elapsed ms:" + windows.TotalElapsedMilliseconds + " Request Elapsed ms:" + windows.RequestElapsedMilliseconds); #endif System.IO.MemoryStream stream = new System.IO.MemoryStream(bytes); foreach (ISpeechToTextService STT in new FindServices <ISpeechToTextService>(Options.commandservices["SpeechToText"].preferredServices).PreferredOrderingOfServices) { // ISpeechToTextService STT = (ISpeechToTextService)constructor.Invoke(Type.EmptyTypes); SpeechToTextServiceResponse r = await STT.SpeechToTextServiceAsync(bytes, apiArgs); if (r.StatusCode == 200) { text = r.ResponseResult; Console.WriteLine(r.Service.name + ":\"" + text + "\" Total Elapsed ms:" + r.TotalElapsedMilliseconds + " Request Elapsed ms:" + r.RequestElapsedMilliseconds); return(text.Split(" ".ToCharArray(), StringSplitOptions.None)); } else { Console.WriteLine(r.Service.name + " not available."); } } CallServiceResponse <ISpeechToTextServiceResponse> response = await wSTT.CallServiceAsync(bytes, apiArgs); text = response.ResponseResult; Console.WriteLine("Windows STT (default):\"" + text + "\" Total Elapsed ms:" + response.TotalElapsedMilliseconds + " Request Elapsed ms:" + response.RequestElapsedMilliseconds); return(text.Split(" ".ToCharArray(), StringSplitOptions.None)); } } } }