public static void Main(string[] args) { // Obtain a KinectSensor if any are available KinectSensor sensor = (from sensorToCheck in KinectSensor.KinectSensors where sensorToCheck.Status == KinectStatus.Connected select sensorToCheck).FirstOrDefault(); if (sensor == null) { Console.WriteLine( "No Kinect sensors are attached to this computer or none of the ones that are\n" + "attached are \"Connected\".\n" + "Attach the KinectSensor and restart this application.\n" + "If that doesn't work run SkeletonViewer-WPF to better understand the Status of\n" + "the Kinect sensors.\n\n" + "Press any key to continue.\n"); // Give a chance for user to see console output before it is dismissed Console.ReadKey(true); return; } sensor.Start(); // Obtain the KinectAudioSource to do audio capture KinectAudioSource source = sensor.AudioSource; source.EchoCancellationMode = EchoCancellationMode.None; // No AEC for this sample source.AutomaticGainControlEnabled = false; // Important to turn this off for speech recognition System.Speech.Recognition.RecognizerInfo ri = System.Speech.Recognition.SpeechRecognitionEngine.InstalledRecognizers().FirstOrDefault(); using (var recoEngine = new System.Speech.Recognition.SpeechRecognitionEngine(ri.Id)) { // Create the question dictation grammar. System.Speech.Recognition.DictationGrammar customDictationGrammar = new System.Speech.Recognition.DictationGrammar(); customDictationGrammar.Name = "Dictation"; customDictationGrammar.Enabled = true; // Create a SpeechRecognitionEngine object and add the grammars to it. recoEngine.LoadGrammar(customDictationGrammar); recoEngine.SpeechRecognized += (s, sargs) => Console.Write(sargs.Result.Text); using (Stream s = source.Start()) { recoEngine.SetInputToAudioStream(s, new System.Speech.AudioFormat.SpeechAudioFormatInfo(System.Speech.AudioFormat.EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null)); Console.WriteLine("Dictating. Press ENTER to stop"); recoEngine.RecognizeAsync(System.Speech.Recognition.RecognizeMode.Multiple); Console.ReadLine(); Console.WriteLine("Stopping recognizer ..."); recoEngine.RecognizeAsyncStop(); } } sensor.Stop(); }
// https://github.com/SynHub/syn-speech-samples // https://github.com/SynHub/syn-speech public static void Test() { System.Console.WriteLine("System.Speech needs Microsoft Speech SDK installed (COM-Object)"); System.Console.WriteLine("https://www.microsoft.com/en-us/download/details.aspx?id=10121"); System.Console.WriteLine("Depending on Framework-Version, recognizers installed with language pack are missing."); System.Console.WriteLine("Despite NetStandard, System.Speech is Windows-ONLY !"); System.Console.WriteLine(System.Environment.NewLine); System.Console.WriteLine("Installed recognizers:"); // Searches in the COM-Object defined in HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Recognizers foreach (System.Speech.Recognition.RecognizerInfo ri in System.Speech.Recognition.SpeechRecognitionEngine.InstalledRecognizers()) { System.Console.Write(" - "); // System.Console.WriteLine(ri.Culture.TwoLetterISOLanguageName); System.Console.WriteLine(ri.Culture.IetfLanguageTag); } // Next ri // Create an in-process speech recognizer for the en-US locale. using (System.Speech.Recognition.SpeechRecognitionEngine recognizer = new System.Speech.Recognition.SpeechRecognitionEngine( // new System.Globalization.CultureInfo("en-US") // new System.Globalization.CultureInfo("de-CH") System.Globalization.CultureInfo.InstalledUICulture )) { // Create and load a dictation grammar. recognizer.LoadGrammar(new System.Speech.Recognition.DictationGrammar()); // Add a handler for the speech recognized event. recognizer.SpeechRecognized += new System.EventHandler <System.Speech.Recognition.SpeechRecognizedEventArgs>(OnSpeechRecognized); // Configure input to the speech recognizer. recognizer.SetInputToDefaultAudioDevice(); // recognizer.SetInputToNull(); // recognizer.SetInputToWaveFile(""); // recognizer.SetInputToWaveStream(new System.IO.MemoryStream()); // System.Speech.AudioFormat.SpeechAudioFormatInfo af = // new System.Speech.AudioFormat.SpeechAudioFormatInfo(System.Speech.AudioFormat.EncodingFormat.Pcm, 4000, 12, 1, 12, 0, null); // recognizer.SetInputToAudioStream(new System.IO.MemoryStream(), af); // Start asynchronous, continuous speech recognition. recognizer.RecognizeAsync(System.Speech.Recognition.RecognizeMode.Multiple); // Keep the console window open. while (true) { System.Console.ReadLine(); } // Whend } // End Using recognizer } // End Sub Test
//private static System.Speech.Recognition.SpeechRecognizedEventArgs WakeUpWordResults; public static async System.Threading.Tasks.Task <string> MicrophoneToTextAsync() { CallServices <ISpeechToTextService, ISpeechToTextServiceResponse> wSTT = new CallServices <ISpeechToTextService, ISpeechToTextServiceResponse>(null); System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language); using (System.Speech.Recognition.SpeechRecognitionEngine RecognitionEngine = new System.Speech.Recognition.SpeechRecognitionEngine(ci)) { RecognitionEngine.SetInputToDefaultAudioDevice(); RecognitionEngine.LoadGrammar(new System.Speech.Recognition.DictationGrammar()); System.Speech.Recognition.RecognitionResult WakeUpWordResult = RecognitionEngine.Recognize(); if (WakeUpWordResult == null) { return(null); } using (System.IO.FileStream waveStream = new System.IO.FileStream(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName, System.IO.FileMode.Create)) { WakeUpWordResult.Audio.WriteToWaveStream(waveStream); waveStream.Flush(); waveStream.Close(); } byte[] bytes = await Helpers.ReadBytesFromFileAsync(Options.options.audio.speechSynthesisFileName); System.IO.MemoryStream stream = new System.IO.MemoryStream(bytes); string text = WakeUpWordResult.Text; int sampleRate = await Audio.GetSampleRateAsync(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName); System.Collections.Generic.Dictionary <string, string> apiArgs = new System.Collections.Generic.Dictionary <string, string>() { { "sampleRate", sampleRate.ToString() } }; foreach (ISpeechToTextService STT in new FindServices <ISpeechToTextService>(Options.commandservices["SpeechToText"].preferredServices).PreferredOrderingOfServices) { // ISpeechToTextService STT = (ISpeechToTextService)constructor.Invoke(Type.EmptyTypes);; SpeechToTextServiceResponse r = await STT.SpeechToTextServiceAsync(bytes, apiArgs); if (r.StatusCode == 200) { text = r.ResponseResult; Console.WriteLine(r.Service.name + ":\"" + text + "\" Total Elapsed ms:" + r.TotalElapsedMilliseconds + " Request Elapsed ms:" + r.RequestElapsedMilliseconds); return(text); } else { Console.WriteLine(r.Service.name + " not available."); } } CallServiceResponse <ISpeechToTextServiceResponse> response = await wSTT.CallServiceAsync(bytes, apiArgs); text = response.ResponseResult; Console.WriteLine("Windows STT (default):\"" + text + "\" Total Elapsed ms:" + response.TotalElapsedMilliseconds + " Request Elapsed ms:" + response.RequestElapsedMilliseconds); return(text); } }
/// <summary> /// Loads a Grammar object into the recognizer /// </summary> /// <param name="grammar"></param> /// <param name="name">The name to give the Grammar</param> /// <param name="callback">The code to fire when the Grammar is spoken</param> /// <returns></returns> public string LoadGrammar(DesktopGrammar grammar, string name, dynamic callback) { _loadedGrammarDictionary.Add(name, new LoadedGrammar { Grammar = grammar, Callback = callback }); var g = new System.Speech.Recognition.Grammar(grammar.GrammarBuilder) { Name = name }; _recognizer.LoadGrammar(g); return(grammar.GetPhrases()); }
public static async System.Threading.Tasks.Task <string> SpeechToTextServiceAsync(System.IO.MemoryStream stream) { System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language); using (System.Speech.Recognition.SpeechRecognitionEngine RecognitionEngine = new System.Speech.Recognition.SpeechRecognitionEngine(ci)) { RecognitionEngine.SetInputToWaveStream(stream); RecognitionEngine.LoadGrammar(new System.Speech.Recognition.DictationGrammar()); System.Speech.Recognition.RecognitionResult result = RecognitionEngine.Recognize(); if (result == null) { return("Speech.RecognitionEngine.Recognize returned null result"); } return(result.Text); } }
public static async System.Threading.Tasks.Task <string> SpeechToTextServicesAsync(string fileName, System.Collections.Generic.Dictionary <string, string> apiArgs) { if (System.IO.File.Exists(Options.options.tempFolderPath + fileName)) { System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language); using (System.Speech.Recognition.SpeechRecognitionEngine RecognitionEngine = new System.Speech.Recognition.SpeechRecognitionEngine(ci)) { RecognitionEngine.SetInputToWaveFile(Options.options.tempFolderPath + fileName); RecognitionEngine.LoadGrammar(new System.Speech.Recognition.DictationGrammar()); System.Speech.Recognition.RecognitionResult result = RecognitionEngine.Recognize(); return(result.Text); } } return(null); }
public void speechToTextInit() { //This method is used for converting speech into text //Creates an instance of the system.speech speech recognition engine for use with speech to text stt_sre = new System.Speech.Recognition.SpeechRecognitionEngine(); //Uses dictation grammar to allow freedom of speech for entering any word detected System.Speech.Recognition.Grammar dictationGrammar = new System.Speech.Recognition.DictationGrammar(); //Loads the dictation grammar into the speech recognition engine stt_sre.LoadGrammar(dictationGrammar); try { //Try catch is used here to catch any invalid operation exceptions that could occur when detecting speech to text stt_sre.SetInputToDefaultAudioDevice(); //Saves result from speech recognition as a recognition result object System.Speech.Recognition.RecognitionResult result = stt_sre.Recognize(); if (result != null) { //Speech result is set to the string of the result to be used in speech to text speechResult = result.Text; //Removes any spaces to prevent inconsistencies speechResult.Replace(",", ""); Console.WriteLine("The result is: " + speechResult); try { //Used in passing results from speech to text to other classes speechInputCheck(speechResult); } catch (NullReferenceException null_ref) { //Used in catching if the speechResult is null, showing that the exception has been thrown and its source Console.WriteLine("NullReferenceException thrown in speech: " + null_ref.Message + null_ref.Source); } } } catch (InvalidOperationException invalid) { Console.WriteLine("InvalidOperationException in speech: " + invalid.Message + invalid.Source); } finally { //Unloads the speech recognition engine once finished stt_sre.UnloadAllGrammars(); } }
public static async System.Threading.Tasks.Task <string> TextToSpelledPronunciationServiceAsync(string text, System.Collections.Generic.Dictionary <string, string> apiArgs) { //System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language); using (System.Speech.Recognition.SpeechRecognitionEngine RecognitionEngine = new System.Speech.Recognition.SpeechRecognitionEngine()) { RecognitionEngine.LoadGrammar(new System.Speech.Recognition.DictationGrammar()); text = text.Replace(".", ""); // EmulateRecognize returns null if a period is in the text System.Speech.Recognition.RecognitionResult result = RecognitionEngine.EmulateRecognize(text); if (result == null) { throw new Exception("TextToSpelledPronunciationServiceAsync: RecognitionResult returned null probably due to invalid language input."); } string pronunciations = null; foreach (System.Speech.Recognition.RecognizedWordUnit w in result.Words) { pronunciations += w.Pronunciation + " "; } return(pronunciations); } }
private void PwdGrammar() { using (System.Speech.Recognition.SpeechRecognitionEngine recognizer = new System.Speech.Recognition.SpeechRecognitionEngine(new System.Globalization.CultureInfo("en-US"))) { //recognizer.LoadGrammar(new System.Speech.Recognition.DictationGrammar()); System.Speech.Recognition.GrammarBuilder pwdBuilder = new System.Speech.Recognition.GrammarBuilder("My Password is"); System.Speech.Recognition.GrammarBuilder wildcardBuilder = new System.Speech.Recognition.GrammarBuilder(); wildcardBuilder.AppendWildcard(); System.Speech.Recognition.SemanticResultKey pwd = new System.Speech.Recognition.SemanticResultKey("Password", wildcardBuilder); pwdBuilder += pwd; System.Speech.Recognition.Grammar grammar = new System.Speech.Recognition.Grammar(pwdBuilder); grammar.Name = "Password input"; grammar.SpeechRecognized += new EventHandler <System.Speech.Recognition.SpeechRecognizedEventArgs>(Grammar_SpeechRecognized); grammar.Enabled = true; recognizer.LoadGrammar(grammar); //UpdateGrammarTree(_grammarTreeView, _recognizer); } }
private void PwdGrammar() { using (System.Speech.Recognition.SpeechRecognitionEngine recognizer = new System.Speech.Recognition.SpeechRecognitionEngine(new System.Globalization.CultureInfo("en-US"))) { //recognizer.LoadGrammar(new System.Speech.Recognition.DictationGrammar()); System.Speech.Recognition.GrammarBuilder pwdBuilder = new System.Speech.Recognition.GrammarBuilder("My Password is"); System.Speech.Recognition.GrammarBuilder wildcardBuilder = new System.Speech.Recognition.GrammarBuilder(); wildcardBuilder.AppendWildcard(); System.Speech.Recognition.SemanticResultKey pwd = new System.Speech.Recognition.SemanticResultKey("Password", wildcardBuilder); pwdBuilder += pwd; System.Speech.Recognition.Grammar grammar = new System.Speech.Recognition.Grammar(pwdBuilder); grammar.Name = "Password input"; grammar.SpeechRecognized += new EventHandler<System.Speech.Recognition.SpeechRecognizedEventArgs>(Grammar_SpeechRecognized); grammar.Enabled = true; recognizer.LoadGrammar(grammar); //UpdateGrammarTree(_grammarTreeView, _recognizer); } }
private void bVoice_Click(object sender, EventArgs e) { if (MonoCompat.IsMono) { Logger.Log(LogType.Warning, "Voice commands are for windows operating systems only"); return; } //if button was already clicked, cancel if (listening) { listening = false; bVoice.ForeColor = System.Drawing.Color.Black; return; } System.Speech.Recognition.SpeechRecognitionEngine engine = new System.Speech.Recognition.SpeechRecognitionEngine(); bVoice.ForeColor = System.Drawing.Color.Aqua; System.Speech.Recognition.Choices commands = new System.Speech.Recognition.Choices(); commands.Add(new string[] { "restart", "shutdown", "status report", "players", "help" }); System.Speech.Recognition.Grammar gr = new System.Speech.Recognition.Grammar(new System.Speech.Recognition.GrammarBuilder(commands)); try { listening = true; engine.RequestRecognizerUpdate(); engine.LoadGrammar(gr); engine.SpeechRecognized += engine_SpeechRecognized; engine.SetInputToDefaultAudioDevice(); engine.RecognizeAsync(System.Speech.Recognition.RecognizeMode.Multiple); engine.Recognize(); } catch { return; } }
public static async System.Threading.Tasks.Task <string[]> WaitForWakeUpWordThenRecognizeRemainingSpeechAsync(string[] WakeUpWords) { Console.WriteLine("Say the wakeup word (" + string.Join(" ", WakeUpWords) + ") followed by the request ..."); CallServices <ISpeechToTextService, ISpeechToTextServiceResponse> wSTT = new CallServices <ISpeechToTextService, ISpeechToTextServiceResponse>(null); System.Diagnostics.Stopwatch stopWatch = new System.Diagnostics.Stopwatch(); stopWatch.Start(); // continues until return System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language); while (true) { using (System.Speech.Recognition.SpeechRecognitionEngine RecognitionEngine = new System.Speech.Recognition.SpeechRecognitionEngine(ci)) { RecognitionEngine.SetInputToDefaultAudioDevice(); // build wakeup word grammar System.Speech.Recognition.GrammarBuilder wakeUpWordBuilder = new System.Speech.Recognition.GrammarBuilder(); wakeUpWordBuilder.Append(new System.Speech.Recognition.Choices(WakeUpWords)); // build words-after-wakeup word grammar System.Speech.Recognition.GrammarBuilder wordsAfterWakeUpWordBuilder = new System.Speech.Recognition.GrammarBuilder(); wordsAfterWakeUpWordBuilder.AppendWildcard(); System.Speech.Recognition.SemanticResultKey wordsAfterWakeUpWordKey = new System.Speech.Recognition.SemanticResultKey("wordsAfterWakeUpWordKey", wordsAfterWakeUpWordBuilder); wakeUpWordBuilder.Append(new System.Speech.Recognition.SemanticResultKey("wordsAfterWakeUpWordKey", wordsAfterWakeUpWordBuilder)); // initialize recognizer, wait for result, save result to file System.Speech.Recognition.Grammar g = new System.Speech.Recognition.Grammar(wakeUpWordBuilder); RecognitionEngine.LoadGrammar(g); if (Options.options.wakeup.initialSilenceTimeout == -1) { RecognitionEngine.InitialSilenceTimeout = TimeSpan.FromTicks(Int32.MaxValue); // never timeout } else { RecognitionEngine.InitialSilenceTimeout = TimeSpan.FromSeconds(Options.options.wakeup.initialSilenceTimeout); // timesout after this much silence } RecognitionEngine.EndSilenceTimeout = TimeSpan.FromSeconds(Options.options.wakeup.endSilenceTimeout); // maximum silence allowed after hearing wakeup word #if true // experimenting with Babble and other timeouts RecognitionEngine.BabbleTimeout = TimeSpan.FromSeconds(0); #else RecognitionEngine.BabbleTimeout = TimeSpan.FromTicks(UInt32.MaxValue); #endif System.Speech.Recognition.RecognitionResult WakeUpWordResult = RecognitionEngine.Recognize(); // RecognitionResult is null when some unidentified timeout expires around 30 seconds. Can't find a way to make timeouts infinite so just looping. if (WakeUpWordResult == null) { continue; } using (System.IO.FileStream waveStream = new System.IO.FileStream(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName, System.IO.FileMode.Create)) { WakeUpWordResult.Audio.WriteToWaveStream(waveStream); waveStream.Flush(); waveStream.Close(); } Console.WriteLine("Wake up word detected (" + WakeUpWordResult.Words[0].Text + "): confidence:" + WakeUpWordResult.Confidence + " Elapsed Ms:" + stopWatch.ElapsedMilliseconds); if (WakeUpWordResult.Confidence >= Options.options.wakeup.confidence) { byte[] bytes = await Helpers.ReadBytesFromFileAsync(Options.options.audio.speechSynthesisFileName); string text = WakeUpWordResult.Text; int sampleRate = await Audio.GetSampleRateAsync(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName); System.Collections.Generic.Dictionary <string, string> apiArgs = new System.Collections.Generic.Dictionary <string, string>() { { "sampleRate", sampleRate.ToString() } }; #if false // for testing await windows.SpeechToTextAsync(bytes, apiArgs); Console.WriteLine("Windows STT (demo):\"" + windows.ResponseResult + "\" Total Elapsed ms:" + windows.TotalElapsedMilliseconds + " Request Elapsed ms:" + windows.RequestElapsedMilliseconds); #endif System.IO.MemoryStream stream = new System.IO.MemoryStream(bytes); foreach (ISpeechToTextService STT in new FindServices <ISpeechToTextService>(Options.commandservices["SpeechToText"].preferredServices).PreferredOrderingOfServices) { // ISpeechToTextService STT = (ISpeechToTextService)constructor.Invoke(Type.EmptyTypes); SpeechToTextServiceResponse r = await STT.SpeechToTextServiceAsync(bytes, apiArgs); if (r.StatusCode == 200) { text = r.ResponseResult; Console.WriteLine(r.Service.name + ":\"" + text + "\" Total Elapsed ms:" + r.TotalElapsedMilliseconds + " Request Elapsed ms:" + r.RequestElapsedMilliseconds); return(text.Split(" ".ToCharArray(), StringSplitOptions.None)); } else { Console.WriteLine(r.Service.name + " not available."); } } CallServiceResponse <ISpeechToTextServiceResponse> response = await wSTT.CallServiceAsync(bytes, apiArgs); text = response.ResponseResult; Console.WriteLine("Windows STT (default):\"" + text + "\" Total Elapsed ms:" + response.TotalElapsedMilliseconds + " Request Elapsed ms:" + response.RequestElapsedMilliseconds); return(text.Split(" ".ToCharArray(), StringSplitOptions.None)); } } } }