/// <summary> /// Configures the voice. /// </summary> private static void ConfigureVoice() { SpWaveFormatEx fFormat = new SpWaveFormatEx(); ttSVoice = new TTSVoice(); //ttSVoice.SPVoice.Voice = ttSVoice.SPVoice.GetVoices("gender=female", "").Item(0); StartService("AudioSrv", 20 * 1000); /* set the audio out to the telephony device*/ ttSVoice.MMSysAudioOut.DeviceId = (int)w2cDrv.Device.Info.WaveOutDrvID; WriteLog("Sound DeviceId: " + w2cDrv.Device.Info.WaveOutDrvID.ToStr()); fFormat.FormatTag = 1; fFormat.Channels = 1; fFormat.SamplesPerSec = 8000; fFormat.AvgBytesPerSec = 16000; fFormat.BlockAlign = 2; fFormat.BitsPerSample = 16; ttSVoice.MMSysAudioOut.Format.SetWaveFormatEx(fFormat); /* Prevent SAPI from changing the wave format when the device changes*/ ttSVoice.SPVoice.AllowAudioOutputFormatChangesOnNextSet = false; ttSVoice.SPVoice.AudioOutputStream = ttSVoice.MMSysAudioOut; }
/// <summary> /// Generate() will be called only once if there is no typed-text; it /// should use dictation. Generate() will be called a second time if /// there is typed-text; the second pass should use both dictation and /// context-free-grammar (ie, Command and Control: a Rule that's based /// on the typed-text). /// </summary> void Generate() { #if DEBUG logfile.Log(); logfile.Log("Generate() _generato= " + _generato); #endif _offset = 0; Confidence_def_count = 0; // was "2" but MS doc says not needed on its end. // and I don't see grammar id #2 defined on this end either. _recoGrammar = _recoContext.CreateGrammar(); // _recoGrammar.DictationLoad(); // ("Pronunciation") <- causes orthemes to print as phonemes instead of words switch (_generato) { case Generator.Dictati: if (_recoGrammar.Rules.FindRule(RULE) != null) { #if DEBUG logfile.Log(". set Rule INACTIVE"); #endif _recoGrammar.CmdSetRuleState(RULE, SpeechRuleState.SGDSInactive); } #if DEBUG logfile.Log(". set Dictation ACTIVE"); #endif _recoGrammar.DictationSetState(SpeechRuleState.SGDSActive); break; case Generator.Dialogi: #if DEBUG logfile.Log(". set Dictation INACTIVE"); #endif _recoGrammar.DictationSetState(SpeechRuleState.SGDSInactive); if (_recoGrammar.Rules.FindRule(RULE) == null) { #if DEBUG logfile.Log(". . add \"" + RULE + "\" Rule"); #endif ISpeechGrammarRule rule = _recoGrammar.Rules.Add(RULE, SpeechRuleAttributes.SRATopLevel, 1); rule.InitialState.AddWordTransition(null, _text, " ", SpeechGrammarWordType.SGLexical, RULE, 1); _recoGrammar.Rules.Commit(); } #if DEBUG logfile.Log(". set Rule ACTIVE"); #endif _recoGrammar.CmdSetRuleState(RULE, SpeechRuleState.SGDSActive); // logfile.Log(". max alternates(pre)= " + _recoContext.CmdMaxAlternates); // _recoContext.CmdMaxAlternates = 3; // logfile.Log(". max alternates(pos)= " + _recoContext.CmdMaxAlternates); break; } #if DEBUG logfile.Log(". create (SpFileStream)_fs"); #endif _fs = new SpFileStream(); #if DEBUG logfile.Log(". (SpFileStream)_fs CREATED"); #endif // _fs.Format.Type = SpeechAudioFormatType.SAFT44kHz16BitMono; #if DEBUG logfile.Log(". Open Wavefile _fs"); #endif _fs.Open(Wavefile); #if DEBUG logfile.Log(". _fs.Format.Type= " + _fs.Format.Type); // SpeechAudioFormatType.SAFT44kHz16BitMono SpWaveFormatEx data = _fs.Format.GetWaveFormatEx(); logfile.Log(". . SamplesPerSec= " + data.SamplesPerSec); logfile.Log(". . BitsPerSample= " + data.BitsPerSample); logfile.Log(". . AvgBytesPerSec= " + data.AvgBytesPerSec); logfile.Log(". . Channels= " + data.Channels); logfile.Log(". . BlockAlign= " + data.BlockAlign); logfile.Log(". . FormatTag= " + data.FormatTag); logfile.Log(". . ExtraData= " + data.ExtraData); // filestream byte-data -> // int bytes, pos = 0; // object o = new byte[2]; // while ((bytes = _fs.Read(out o, 2)) > 0) // { // var buffer = (byte[])o; // logfile.Log(pos + " : " + buffer[1] + " " + buffer[0]); // treat as little-endian shorts // pos += bytes; // } // _fs.Seek(0); logfile.Log(". assign _fs to _recognizer.AudioInputStream"); #endif _recognizer.AudioInputStream = _fs; // <- start Recognition <-- #if DEBUG logfile.Log("Generate() DONE"); logfile.Log(); #endif }