Esempio n. 1
0
        //private static System.Speech.Recognition.SpeechRecognizedEventArgs WakeUpWordResults;

        public static async System.Threading.Tasks.Task <string> MicrophoneToTextAsync()
        {
            CallServices <ISpeechToTextService, ISpeechToTextServiceResponse> wSTT = new CallServices <ISpeechToTextService, ISpeechToTextServiceResponse>(null);

            System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language);
            using (System.Speech.Recognition.SpeechRecognitionEngine RecognitionEngine = new System.Speech.Recognition.SpeechRecognitionEngine(ci))
            {
                RecognitionEngine.SetInputToDefaultAudioDevice();
                RecognitionEngine.LoadGrammar(new System.Speech.Recognition.DictationGrammar());
                System.Speech.Recognition.RecognitionResult WakeUpWordResult = RecognitionEngine.Recognize();
                if (WakeUpWordResult == null)
                {
                    return(null);
                }
                using (System.IO.FileStream waveStream = new System.IO.FileStream(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName, System.IO.FileMode.Create))
                {
                    WakeUpWordResult.Audio.WriteToWaveStream(waveStream);
                    waveStream.Flush();
                    waveStream.Close();
                }
                byte[] bytes = await Helpers.ReadBytesFromFileAsync(Options.options.audio.speechSynthesisFileName);

                System.IO.MemoryStream stream = new System.IO.MemoryStream(bytes);
                string text       = WakeUpWordResult.Text;
                int    sampleRate = await Audio.GetSampleRateAsync(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName);

                System.Collections.Generic.Dictionary <string, string> apiArgs = new System.Collections.Generic.Dictionary <string, string>()
                {
                    { "sampleRate", sampleRate.ToString() }
                };
                foreach (ISpeechToTextService STT in new FindServices <ISpeechToTextService>(Options.commandservices["SpeechToText"].preferredServices).PreferredOrderingOfServices)
                {
                    // ISpeechToTextService STT = (ISpeechToTextService)constructor.Invoke(Type.EmptyTypes);;
                    SpeechToTextServiceResponse r = await STT.SpeechToTextServiceAsync(bytes, apiArgs);

                    if (r.StatusCode == 200)
                    {
                        text = r.ResponseResult;
                        Console.WriteLine(r.Service.name + ":\"" + text + "\" Total Elapsed ms:" + r.TotalElapsedMilliseconds + " Request Elapsed ms:" + r.RequestElapsedMilliseconds);
                        return(text);
                    }
                    else
                    {
                        Console.WriteLine(r.Service.name + " not available.");
                    }
                }
                CallServiceResponse <ISpeechToTextServiceResponse> response = await wSTT.CallServiceAsync(bytes, apiArgs);

                text = response.ResponseResult;
                Console.WriteLine("Windows STT (default):\"" + text + "\" Total Elapsed ms:" + response.TotalElapsedMilliseconds + " Request Elapsed ms:" + response.RequestElapsedMilliseconds);
                return(text);
            }
        }
Esempio n. 2
0
        public static async System.Threading.Tasks.Task <string[]> WaitForWakeUpWordThenRecognizeRemainingSpeechAsync(string[] WakeUpWords)
        {
            Console.WriteLine("Say the wakeup word (" + string.Join(" ", WakeUpWords) + ") followed by the request ...");
            CallServices <ISpeechToTextService, ISpeechToTextServiceResponse> wSTT = new CallServices <ISpeechToTextService, ISpeechToTextServiceResponse>(null);

            System.Diagnostics.Stopwatch stopWatch = new System.Diagnostics.Stopwatch();
            stopWatch.Start(); // continues until return
            System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language);
            while (true)
            {
                using (System.Speech.Recognition.SpeechRecognitionEngine RecognitionEngine = new System.Speech.Recognition.SpeechRecognitionEngine(ci))
                {
                    RecognitionEngine.SetInputToDefaultAudioDevice();

                    // build wakeup word grammar
                    System.Speech.Recognition.GrammarBuilder wakeUpWordBuilder = new System.Speech.Recognition.GrammarBuilder();
                    wakeUpWordBuilder.Append(new System.Speech.Recognition.Choices(WakeUpWords));

                    // build words-after-wakeup word grammar
                    System.Speech.Recognition.GrammarBuilder wordsAfterWakeUpWordBuilder = new System.Speech.Recognition.GrammarBuilder();
                    wordsAfterWakeUpWordBuilder.AppendWildcard();
                    System.Speech.Recognition.SemanticResultKey wordsAfterWakeUpWordKey = new System.Speech.Recognition.SemanticResultKey("wordsAfterWakeUpWordKey", wordsAfterWakeUpWordBuilder);
                    wakeUpWordBuilder.Append(new System.Speech.Recognition.SemanticResultKey("wordsAfterWakeUpWordKey", wordsAfterWakeUpWordBuilder));

                    // initialize recognizer, wait for result, save result to file
                    System.Speech.Recognition.Grammar g = new System.Speech.Recognition.Grammar(wakeUpWordBuilder);
                    RecognitionEngine.LoadGrammar(g);
                    if (Options.options.wakeup.initialSilenceTimeout == -1)
                    {
                        RecognitionEngine.InitialSilenceTimeout = TimeSpan.FromTicks(Int32.MaxValue); // never timeout
                    }
                    else
                    {
                        RecognitionEngine.InitialSilenceTimeout = TimeSpan.FromSeconds(Options.options.wakeup.initialSilenceTimeout); // timesout after this much silence
                    }
                    RecognitionEngine.EndSilenceTimeout = TimeSpan.FromSeconds(Options.options.wakeup.endSilenceTimeout);             // maximum silence allowed after hearing wakeup word
#if true                                                                                                                              // experimenting with Babble and other timeouts
                    RecognitionEngine.BabbleTimeout = TimeSpan.FromSeconds(0);
#else
                    RecognitionEngine.BabbleTimeout = TimeSpan.FromTicks(UInt32.MaxValue);
#endif
                    System.Speech.Recognition.RecognitionResult WakeUpWordResult = RecognitionEngine.Recognize();
                    // RecognitionResult is null when some unidentified timeout expires around 30 seconds. Can't find a way to make timeouts infinite so just looping.
                    if (WakeUpWordResult == null)
                    {
                        continue;
                    }
                    using (System.IO.FileStream waveStream = new System.IO.FileStream(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName, System.IO.FileMode.Create))
                    {
                        WakeUpWordResult.Audio.WriteToWaveStream(waveStream);
                        waveStream.Flush();
                        waveStream.Close();
                    }

                    Console.WriteLine("Wake up word detected (" + WakeUpWordResult.Words[0].Text + "): confidence:" + WakeUpWordResult.Confidence + " Elapsed Ms:" + stopWatch.ElapsedMilliseconds);
                    if (WakeUpWordResult.Confidence >= Options.options.wakeup.confidence)
                    {
                        byte[] bytes = await Helpers.ReadBytesFromFileAsync(Options.options.audio.speechSynthesisFileName);

                        string text       = WakeUpWordResult.Text;
                        int    sampleRate = await Audio.GetSampleRateAsync(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName);

                        System.Collections.Generic.Dictionary <string, string> apiArgs = new System.Collections.Generic.Dictionary <string, string>()
                        {
                            { "sampleRate", sampleRate.ToString() }
                        };
#if false // for testing
                        await windows.SpeechToTextAsync(bytes, apiArgs);

                        Console.WriteLine("Windows STT (demo):\"" + windows.ResponseResult + "\" Total Elapsed ms:" + windows.TotalElapsedMilliseconds + " Request Elapsed ms:" + windows.RequestElapsedMilliseconds);
#endif
                        System.IO.MemoryStream stream = new System.IO.MemoryStream(bytes);
                        foreach (ISpeechToTextService STT in new FindServices <ISpeechToTextService>(Options.commandservices["SpeechToText"].preferredServices).PreferredOrderingOfServices)
                        {
                            // ISpeechToTextService STT = (ISpeechToTextService)constructor.Invoke(Type.EmptyTypes);
                            SpeechToTextServiceResponse r = await STT.SpeechToTextServiceAsync(bytes, apiArgs);

                            if (r.StatusCode == 200)
                            {
                                text = r.ResponseResult;
                                Console.WriteLine(r.Service.name + ":\"" + text + "\" Total Elapsed ms:" + r.TotalElapsedMilliseconds + " Request Elapsed ms:" + r.RequestElapsedMilliseconds);
                                return(text.Split(" ".ToCharArray(), StringSplitOptions.None));
                            }
                            else
                            {
                                Console.WriteLine(r.Service.name + " not available.");
                            }
                        }
                        CallServiceResponse <ISpeechToTextServiceResponse> response = await wSTT.CallServiceAsync(bytes, apiArgs);

                        text = response.ResponseResult;
                        Console.WriteLine("Windows STT (default):\"" + text + "\" Total Elapsed ms:" + response.TotalElapsedMilliseconds + " Request Elapsed ms:" + response.RequestElapsedMilliseconds);
                        return(text.Split(" ".ToCharArray(), StringSplitOptions.None));
                    }
                }
            }
        }