public void Speak(string script, string voice, int echoDelay, int distortionLevel, int chorusLevel, int reverbLevel, int compressLevel, bool radio) { if (script == null) { return; } try { using (SpeechSynthesizer synth = new SpeechSynthesizer()) using (MemoryStream stream = new MemoryStream()) { if (String.IsNullOrWhiteSpace(voice)) { voice = configuration.StandardVoice; } if (voice != null) { try { synth.SelectVoice(voice); } catch { } } synth.Rate = configuration.Rate; synth.SetOutputToWaveStream(stream); string speech = SpeechFromScript(script); if (speech.Contains("<phoneme")) { speech = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><speak version=\"1.0\" xmlns=\"http://www.w3.org/2001/10/synthesis\" xml:lang=\"" + locale + "\"><s>" + speech + "</s></speak>"; synth.SpeakSsml(speech); } else { synth.Speak(speech); } stream.Seek(0, SeekOrigin.Begin); using (System.IO.StreamWriter file = new System.IO.StreamWriter(Environment.GetEnvironmentVariable("AppData") + @"\EDDI\speech.log", true)) { file.WriteLine("" + System.Threading.Thread.CurrentThread.ManagedThreadId + ": Turned script " + script + " in to speech " + speech); } IWaveSource source = new WaveFileReader(stream); // We need to extend the duration of the wave source if we have any effects going on if (chorusLevel != 0 || reverbLevel != 0 || echoDelay != 0) { // Add a base of 500ms plus 10ms per effect level over 50 source = source.AppendSource(x => new ExtendedDurationWaveSource(x, 500 + Math.Max(0, (configuration.EffectsLevel - 50) * 10))); } // Add various effects... // We always have chorus if (chorusLevel != 0) { source = source.AppendSource(x => new DmoChorusEffect(x) { Depth = chorusLevel, WetDryMix = Math.Min(100, (int)(180 * ((decimal)configuration.EffectsLevel) / ((decimal)100))), Delay = 16, Frequency = 2, Feedback = 25 }); } // We only have reverb and echo if we're not transmitting or receiving if (!radio) { if (reverbLevel != 0) { // We tone down the reverb level with the distortion level, as the combination is nasty source = source.AppendSource(x => new DmoWavesReverbEffect(x) { ReverbTime = (int)(1 + 999 * ((decimal)configuration.EffectsLevel) / ((decimal)100)), ReverbMix = Math.Max(-96, -96 + (96 * reverbLevel / 100) - distortionLevel) }); } if (echoDelay != 0) { // We tone down the echo level with the distortion level, as the combination is nasty source = source.AppendSource(x => new DmoEchoEffect(x) { LeftDelay = echoDelay, RightDelay = echoDelay, WetDryMix = Math.Max(5, (int)(10 * ((decimal)configuration.EffectsLevel) / ((decimal)100)) - distortionLevel), Feedback = Math.Max(0, 10 - distortionLevel / 2) }); } } if (configuration.EffectsLevel > 0 && distortionLevel > 0) { source = source.AppendSource(x => new DmoDistortionEffect(x) { Edge = distortionLevel, Gain = -6 - (distortionLevel / 2), PostEQBandwidth = 4000, PostEQCenterFrequency = 4000 }); } if (radio) { source = source.AppendSource(x => new DmoDistortionEffect(x) { Edge = 7, Gain = -4 - distortionLevel / 2, PostEQBandwidth = 2000, PostEQCenterFrequency = 6000 }); source = source.AppendSource(x => new DmoCompressorEffect(x) { Attack = 1, Ratio = 3, Threshold = -10 }); } EventWaitHandle waitHandle = new EventWaitHandle(false, EventResetMode.AutoReset); var soundOut = new WasapiOut(); soundOut.Initialize(source); soundOut.Stopped += (s, e) => waitHandle.Set(); activeSpeeches.Add(soundOut); soundOut.Play(); // Add a timeout, in case it doesn't come back waitHandle.WaitOne(source.GetTime(source.Length)); // It's possible that this has been disposed of, so ensure that it's still there before we try to finish it lock (activeSpeeches) { if (activeSpeeches.Contains(soundOut)) { activeSpeeches.Remove(soundOut); soundOut.Stop(); soundOut.Dispose(); } } source.Dispose(); } } catch (Exception ex) { using (System.IO.StreamWriter file = new System.IO.StreamWriter(Environment.GetEnvironmentVariable("AppData") + @"\EDDI\speech.log", true)) { file.WriteLine("" + System.Threading.Thread.CurrentThread.ManagedThreadId + ": Caught exception " + ex); } } }
public void Speak(string speech, string voice, int echoDelay, int distortionLevel, int chorusLevel, int reverbLevel, int compressLevel, bool wait = true, int priority = 3) { if (speech == null) { return; } Thread speechThread = new Thread(() => { string finalSpeech = null; try { using (SpeechSynthesizer synth = new SpeechSynthesizer()) using (MemoryStream stream = new MemoryStream()) { if (string.IsNullOrWhiteSpace(voice)) { voice = configuration.StandardVoice; } if (voice != null && !voice.Contains("Microsoft Server Speech Text to Speech Voice")) { try { Logging.Debug("Selecting voice " + voice); synth.SelectVoice(voice); Logging.Debug("Selected voice " + synth.Voice.Name); } catch (Exception ex) { Logging.Error("Failed to select voice " + voice, ex); } } Logging.Debug("Post-selection"); Logging.Debug("Configuration is " + configuration == null ? "<null>" : JsonConvert.SerializeObject(configuration)); synth.Rate = configuration.Rate; Logging.Debug("Rate is " + synth.Rate); synth.Volume = configuration.Volume; Logging.Debug("Volume is " + synth.Volume); synth.StateChanged += new EventHandler<StateChangedEventArgs>(synth_StateChanged); Logging.Debug("Tracking state changes"); synth.SetOutputToWaveStream(stream); Logging.Debug("Output set to stream"); if (speech.Contains("<phoneme") || speech.Contains("<break")) { Logging.Debug("Speech is SSML"); if (configuration.DisableSsml) { Logging.Debug("Disabling SSML at user request"); // User has disabled SSML so remove it finalSpeech = Regex.Replace(speech, "<.*?>", string.Empty); synth.Speak(finalSpeech); } else { Logging.Debug("Obtaining best guess culture"); string culture = bestGuessCulture(synth); Logging.Debug("Best guess culture is " + culture); finalSpeech = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><speak version=\"1.0\" xmlns=\"http://www.w3.org/2001/10/synthesis\" xml:lang=\"" + bestGuessCulture(synth) + "\"><s>" + speech + "</s></speak>"; Logging.Debug("SSML speech: " + finalSpeech); try { Logging.Debug("Speaking SSML"); synth.SpeakSsml(finalSpeech); Logging.Debug("Finished speaking SSML"); } catch (Exception ex) { Logging.Error("Best guess culture of " + bestGuessCulture(synth) + " for voice " + synth.Voice.Name + " was incorrect", ex); Logging.Info("SSML does not work for the chosen voice; falling back to normal speech"); // Try again without Ssml finalSpeech = Regex.Replace(speech, "<.*?>", string.Empty); synth.Speak(finalSpeech); } } } else { Logging.Debug("Speech does not contain SSML"); Logging.Debug("Speech: " + speech); finalSpeech = speech; Logging.Debug("Speaking normal speech"); synth.Speak(finalSpeech); Logging.Debug("Finished speaking normal speech"); } Logging.Debug("Seeking back to the beginning of the stream"); stream.Seek(0, SeekOrigin.Begin); Logging.Debug("Setting up source from stream"); IWaveSource source = new WaveFileReader(stream); // We need to extend the duration of the wave source if we have any effects going on if (chorusLevel != 0 || reverbLevel != 0 || echoDelay != 0) { // Add a base of 500ms plus 10ms per effect level over 50 Logging.Debug("Extending duration by " + 500 + Math.Max(0, (configuration.EffectsLevel - 50) * 10) + "ms"); source = source.AppendSource(x => new ExtendedDurationWaveSource(x, 500 + Math.Max(0, (configuration.EffectsLevel - 50) * 10))); } // Add various effects... Logging.Debug("Effects level is " + configuration.EffectsLevel + ", chorus level is " + chorusLevel + ", reverb level is " + reverbLevel + ", echo delay is " + echoDelay); // We always have chorus if (chorusLevel != 0) { Logging.Debug("Adding chorus"); source = source.AppendSource(x => new DmoChorusEffect(x) { Depth = chorusLevel, WetDryMix = Math.Min(100, (int)(180 * ((decimal)configuration.EffectsLevel) / ((decimal)100))), Delay = 16, Frequency = (configuration.EffectsLevel / 10), Feedback = 25 }); } // We only have reverb and echo if we're not transmitting or receiving //if (!radio) //{ if (reverbLevel != 0) { Logging.Debug("Adding reverb"); // We tone down the reverb level with the distortion level, as the combination is nasty source = source.AppendSource(x => new DmoWavesReverbEffect(x) { ReverbTime = (int)(1 + 999 * ((decimal)configuration.EffectsLevel) / ((decimal)100)), ReverbMix = Math.Max(-96, -96 + (96 * reverbLevel / 100) - distortionLevel) }); } if (echoDelay != 0) { Logging.Debug("Adding echo"); // We tone down the echo level with the distortion level, as the combination is nasty source = source.AppendSource(x => new DmoEchoEffect(x) { LeftDelay = echoDelay, RightDelay = echoDelay, WetDryMix = Math.Max(5, (int)(10 * ((decimal)configuration.EffectsLevel) / ((decimal)100)) - distortionLevel), Feedback = Math.Max(0, 10 - distortionLevel / 2) }); } //} if (configuration.EffectsLevel > 0 && distortionLevel > 0) { Logging.Debug("Adding distortion"); source = source.AppendSource(x => new DmoDistortionEffect(x) { Edge = distortionLevel, Gain = -distortionLevel / 2, PostEQBandwidth = 4000, PostEQCenterFrequency = 4000 }); } //if (radio) //{ // source = source.AppendSource(x => new DmoDistortionEffect(x) { Edge = 7, Gain = -distortionLevel / 2, PostEQBandwidth = 2000, PostEQCenterFrequency = 6000 }); // source = source.AppendSource(x => new DmoCompressorEffect(x) { Attack = 1, Ratio = 3, Threshold = -10 }); //} if (priority < activeSpeechPriority) { Logging.Debug("About to StopCurrentSpeech"); StopCurrentSpeech(); Logging.Debug("Finished StopCurrentSpeech"); } Logging.Debug("Creating waitHandle"); EventWaitHandle waitHandle = new EventWaitHandle(false, EventResetMode.AutoReset); Logging.Debug("Setting up soundOut"); var soundOut = GetSoundOut(); Logging.Debug("Setting up soundOut"); soundOut.Initialize(source); Logging.Debug("Configuring waitHandle"); soundOut.Stopped += (s, e) => waitHandle.Set(); Logging.Debug("Starting speech"); StartSpeech(soundOut, priority); Logging.Debug("Waiting for speech"); // Add a timeout, in case it doesn't come back with the signal waitHandle.WaitOne(source.GetTime(source.Length)); Logging.Debug("Finished waiting for speech"); Logging.Debug("Stopping speech (just to be sure)"); StopCurrentSpeech(); Logging.Debug("Disposing of speech source"); source.Dispose(); } } catch (Exception ex) { Logging.Error("Failed to speak \"" + finalSpeech + "\"", ex); } }); Logging.Debug("Setting thread name"); speechThread.Name = "Speech service speak"; Logging.Debug("Setting thread background"); speechThread.IsBackground = true; try { Logging.Debug("Starting speech thread"); speechThread.Start(); if (wait) { Logging.Debug("Waiting for speech thread"); speechThread.Join(); Logging.Debug("Finished waiting for speech thread"); } } catch (ThreadAbortException tax) { Thread.ResetAbort(); Logging.Error(speech, tax); } catch (Exception ex) { Logging.Error(speech, ex); speechThread.Abort(); } }