private static void SynthToCam(string text, CameraWindow cw) { var synthFormat = new System.Speech.AudioFormat.SpeechAudioFormatInfo(System.Speech.AudioFormat.EncodingFormat.Pcm, 11025, 16, 1, 22100, 2, null); using (var synthesizer = new SpeechSynthesizer()) { using (var waveStream = new MemoryStream()) { //write some silence to the stream to allow camera to initialise properly var silence = new byte[1 * 22050]; waveStream.Write(silence, 0, silence.Count()); var pbuilder = new PromptBuilder(); var pStyle = new PromptStyle { Emphasis = PromptEmphasis.Strong, Rate = PromptRate.Slow, Volume = PromptVolume.ExtraLoud }; pbuilder.StartStyle(pStyle); pbuilder.StartParagraph(); pbuilder.StartVoice(VoiceGender.Male, VoiceAge.Adult, 2); pbuilder.StartSentence(); pbuilder.AppendText(text); pbuilder.EndSentence(); pbuilder.EndVoice(); pbuilder.EndParagraph(); pbuilder.EndStyle(); synthesizer.SetOutputToAudioStream(waveStream, synthFormat); synthesizer.Speak(pbuilder); synthesizer.SetOutputToNull(); //write some silence to the stream to allow camera to end properly waveStream.Write(silence, 0, silence.Count()); waveStream.Seek(0, SeekOrigin.Begin); var ds = new DirectStream(waveStream) { RecordingFormat = new WaveFormat(11025, 16, 1) }; var talkTarget = TalkHelper.GetTalkTarget(cw.Camobject, ds); ds.Start(); talkTarget.Start(); while (ds.IsRunning) { Thread.Sleep(100); } ds.Stop(); talkTarget.Stop(); talkTarget = null; ds = null; waveStream.Close(); } } }
private static void SynthToCam(string text, CameraWindow cw) { var synthFormat = new System.Speech.AudioFormat.SpeechAudioFormatInfo(System.Speech.AudioFormat.EncodingFormat.Pcm, 11025, 16, 1, 22100, 2, null); using (var synthesizer = new SpeechSynthesizer()) { using (var waveStream = new MemoryStream()) { //write some silence to the stream to allow camera to initialise properly var silence = new byte[1 * 22050]; waveStream.Write(silence, 0, silence.Length); var pbuilder = new PromptBuilder(); var pStyle = new PromptStyle { Emphasis = PromptEmphasis.Strong, Rate = PromptRate.Slow, Volume = PromptVolume.ExtraLoud }; pbuilder.StartStyle(pStyle); pbuilder.StartParagraph(); pbuilder.StartVoice(VoiceGender.Male, VoiceAge.Adult, 2); pbuilder.StartSentence(); pbuilder.AppendText(text); pbuilder.EndSentence(); pbuilder.EndVoice(); pbuilder.EndParagraph(); pbuilder.EndStyle(); synthesizer.SetOutputToAudioStream(waveStream, synthFormat); synthesizer.Speak(pbuilder); synthesizer.SetOutputToNull(); //write some silence to the stream to allow camera to end properly waveStream.Write(silence, 0, silence.Length); waveStream.Seek(0, SeekOrigin.Begin); var ds = new DirectStream(waveStream) { RecordingFormat = new WaveFormat(11025, 16, 1) }; var talkTarget = TalkHelper.GetTalkTarget(cw.Camobject, ds); ds.Start(); talkTarget.Start(); while (ds.IsRunning) { Thread.Sleep(100); } ds.Stop(); talkTarget.Stop(); talkTarget = null; ds = null; } } }
private void makeWaveButton_Click(object sender, RoutedEventArgs e) { System.Speech.AudioFormat.SpeechAudioFormatInfo formatInfo = new System.Speech.AudioFormat.SpeechAudioFormatInfo(8000, System.Speech.AudioFormat.AudioBitsPerSample.Eight, System.Speech.AudioFormat.AudioChannel.Mono); speaker.SetOutputToWaveFile(PathToWav, formatInfo); speaker.Speak(textBox1.Text); speaker.SetOutputToDefaultAudioDevice(); result = true; speaker.Dispose(); Close(); }
public static async System.Threading.Tasks.Task <Byte[]> TextToSpeechServiceAsync(string text, System.Collections.Generic.Dictionary <string, string> apiArgs) { Log.WriteLine("text:\"" + text + "\""); //System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo(Options.options.locale.language); using (System.Speech.Synthesis.SpeechSynthesizer synth = new System.Speech.Synthesis.SpeechSynthesizer()) { // Explicitly specify audio settings. All services are ok with 16000/16/1. It's ok to cast options to enums as their values are identical. int sampleRate = int.Parse(apiArgs["sampleRate"]); System.Speech.AudioFormat.SpeechAudioFormatInfo si = new System.Speech.AudioFormat.SpeechAudioFormatInfo(sampleRate, (System.Speech.AudioFormat.AudioBitsPerSample)WoundifyShared.Options.options.audio.bitDepth, (System.Speech.AudioFormat.AudioChannel)WoundifyShared.Options.options.audio.channels); // TODO: use memory based file instead synth.SetOutputToWaveFile(Options.options.tempFolderPath + Options.options.audio.speechSynthesisFileName, si); synth.SelectVoiceByHints((System.Speech.Synthesis.VoiceGender)Options.commandservices["TextToSpeech"].voiceGender, (System.Speech.Synthesis.VoiceAge)Options.commandservices["TextToSpeech"].voiceAge); synth.Speak(text); } return(await Helpers.ReadBytesFromFileAsync(Options.options.audio.speechSynthesisFileName)); }
private void Save_BTN_Click(object sender, RoutedEventArgs e) { SetOutputPath(); synth.SelectVoice(installedVoices[VoiceDropdown.SelectedIndex].VoiceInfo.Name); ///// ///Set Speech Audio Formatting /// System.Speech.AudioFormat.SpeechAudioFormatInfo audioFormatInfo = new System.Speech.AudioFormat.SpeechAudioFormatInfo(84300, System.Speech.AudioFormat.AudioBitsPerSample.Sixteen, System.Speech.AudioFormat.AudioChannel.Stereo); synth.SetOutputToWaveFile(SetOutputPath(), audioFormatInfo); synth.Speak(TextToSpeak.Text); synth.SetOutputToNull(); }
public async Task Speak(string text, BufferedWaveProvider waveProvider, int rate) { var fmt = new System.Speech.AudioFormat.SpeechAudioFormatInfo(waveProvider.WaveFormat.SampleRate, (System.Speech.AudioFormat.AudioBitsPerSample)waveProvider.WaveFormat.BitsPerSample, (System.Speech.AudioFormat.AudioChannel)waveProvider.WaveFormat.Channels); var _synthesizer = new SpeechSynthesizer() { Volume = 100, // 0...100 Rate = rate, // -10...10 }; _synthesizer.SelectVoice(Voice.VoiceInfo.Name); _synthesizer.SetOutputToAudioStream(new OutputStream(waveProvider), fmt); _synthesizer.SpeakAsync(text); await Task.FromResult(0); }
public async Task Speak(string text, BufferedWaveProvider waveProvider, int rate) { var fmt = new System.Speech.AudioFormat.SpeechAudioFormatInfo(waveProvider.WaveFormat.SampleRate, (System.Speech.AudioFormat.AudioBitsPerSample)waveProvider.WaveFormat.BitsPerSample, (System.Speech.AudioFormat.AudioChannel)waveProvider.WaveFormat.Channels); // Creates an instance of a speech config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechConfig.FromSubscription(Key, Region); config.SpeechSynthesisLanguage = Language; config.SpeechSynthesisVoiceName = Voice; // Creates an audio out stream. using (var stream = AudioOutputStream.CreatePullStream(AudioStreamFormat.GetWaveFormatPCM((uint)waveProvider.WaveFormat.SampleRate, (byte)waveProvider.WaveFormat.BitsPerSample, (byte)waveProvider.WaveFormat.Channels))) { // Creates a speech synthesizer using audio stream output. using (var streamConfig = AudioConfig.FromStreamOutput(stream)) using (var synthesizer = new SpeechSynthesizer(config, streamConfig)) { using (var result = await synthesizer.SpeakTextAsync(text)) { if (result.Reason == ResultReason.SynthesizingAudioCompleted) { //Console.WriteLine($"Speech synthesized for text [{text}], and the audio was written to output stream."); } else if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); OnLog?.Invoke($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { OnLog?.Invoke($"CANCELED: ErrorCode={cancellation.ErrorCode}"); OnLog?.Invoke($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]"); OnLog?.Invoke($"CANCELED: Did you update the subscription info?"); } } } } /* * using (var reader = new WaveFileReader(new PullStream(stream))) * { * var newFormat = new WaveFormat(waveProvider.WaveFormat.SampleRate, waveProvider.WaveFormat.BitsPerSample, waveProvider.WaveFormat.Channels); * using (var conversionStream = new WaveFormatConversionStream(newFormat, reader)) * { * //WaveFileWriter.CreateWaveFile("output.wav", conversionStream); * byte[] buffer = new byte[32000]; * int filledSize = 0; * int totalSize = 0; * while ((filledSize = conversionStream.Read(buffer, 0, buffer.Length)) > 0) * { * waveProvider.AddSamples(buffer, 0, (int)filledSize); * //Console.WriteLine($"{filledSize} bytes received."); * totalSize += filledSize; * } * } * }*/ // Reads(pulls) data from the stream byte[] buffer = new byte[32000]; uint filledSize = 0; uint totalSize = 0; while ((filledSize = stream.Read(buffer)) > 0) { waveProvider.AddSamples(buffer, 0, (int)filledSize); //Console.WriteLine($"{filledSize} bytes received."); totalSize += filledSize; } } }
static void Main(string[] args) { SpeechSynthesizer s = new SpeechSynthesizer(); int Volume = s.Volume; int Rate = s.Rate; string line; bool mode_interactive = false; bool mode_tee = false; bool mode_scroll_inspect = false; bool mode_report_progress = false; int consumed = 0; // the number of args that are used up - and thusly not the payload string InputFilename = ""; string OutputFilename = ""; // sound properties System.Speech.AudioFormat.AudioChannel numchannels = System.Speech.AudioFormat.AudioChannel.Stereo; System.Speech.AudioFormat.AudioBitsPerSample numbits = System.Speech.AudioFormat.AudioBitsPerSample.Sixteen; Int32 khz = 44; // So... unicode. Notepad for instance can save accented characters in ANSI mode but you really want utf-8 bool unicode_warning_sent = false; // This annoys me that I had to do this. bool keyAvailableHack = false; try { keyAvailableHack = System.Console.KeyAvailable; } catch { keyAvailableHack = true; } // help for no args unless we are piping if (args.Length == 0 && keyAvailableHack == false) { Help(); Environment.Exit(0); } for (int i = 0; i < args.Length; i++) { if (args[i] == "-v" || args[i] == "--volume" || args[i] == "/v" || args[i] == "/volume") { try { Volume = Convert.ToInt32(args[i + 1]); if (Volume < 0 || Volume > 100) { Console.WriteLine("Volume must be between 0 and 100, inclusive."); Environment.Exit(1); } } catch { Console.WriteLine("Invalid volume specified"); Environment.Exit(1); } i++; consumed += 2; } if (args[i] == "-r" || args[i] == "--rate" || args[i] == "/r" || args[i] == "/rate") { try { Rate = Convert.ToInt32(args[i + 1]); if (Rate < -10 || Rate > 10) { Console.WriteLine("Rate must be between -10 and 10, inclusive."); Environment.Exit(1); } } catch { Console.WriteLine("Invalid rate specified"); Environment.Exit(1); } i++; consumed += 2; } if (args[i] == "-m" || args[i] == "--male" || args[i] == "/m" || args[i] == "/male") { s.SelectVoiceByHints(VoiceGender.Male); consumed++; } if (args[i] == "-f" || args[i] == "--female" || args[i] == "/f" || args[i] == "/female") { s.SelectVoiceByHints(VoiceGender.Female); consumed++; } if (args[i] == "-n" || args[i] == "--name" || args[i] == "/n" || args[i] == "/name") { try { s.SelectVoice(args[i + 1]); } catch { Console.WriteLine("Invalid voice name specified. Did you use quotes? Try using --list to see available names"); Environment.Exit(1); } i++; consumed += 2; } if (args[i] == "-l" || args[i] == "--list" || args[i] == "/l" || args[i] == "/list") { //choose by name foreach (InstalledVoice v in s.GetInstalledVoices()) { Console.WriteLine("\"{0}\" - {1},{2},{3}", v.VoiceInfo.Name, v.VoiceInfo.Age, v.VoiceInfo.Gender, v.VoiceInfo.Culture); //Console.WriteLine(v.VoiceInfo.Description); //Console.WriteLine(v.VoiceInfo.Id); } Environment.Exit(0); } if (args[i] == "-?" || args[i] == "--?" || args[i] == "/?" || args[i] == "/h" || args[i] == "-h" || args[i] == "/help" || args[i] == "--help" || args[i] == "-help") { //help and version and quit Help(); Environment.Exit(0); } if (args[i] == "-i" || args[i] == "--interactive" || args[i] == "/i" || args[i] == "/interactive") { mode_interactive = true; consumed++; } if (args[i] == "-t" || args[i] == "--tee" || args[i] == "/t" || args[i] == "/tee") { mode_tee = true; consumed++; } if (args[i] == "-s" || args[i] == "--scroll" || args[i] == "/s" || args[i] == "/scroll") { mode_scroll_inspect = true; consumed++; } if (args[i] == "-p" || args[i] == "--progress" || args[i] == "/p" || args[i] == "/progress") { mode_report_progress = true; consumed++; } if (args[i] == "-d" || args[i] == "--done" || args[i] == "/d" || args[i] == "/done") { // nothing after "done" gets parsed, in case there are actual codes in the speech consumed++; break; } if (args[i] == "-k" || args[i] == "--input" || args[i] == "/k" || args[i] == "/input") { InputFilename = args[i + 1]; i++; consumed += 2; } if (args[i] == "-o" || args[i] == "--output" || args[i] == "/o" || args[i] == "/output") { OutputFilename = args[i + 1]; i++; consumed += 2; } if (args[i] == "--mono" || args[i] == "/mono") { numchannels = System.Speech.AudioFormat.AudioChannel.Mono; consumed++; } if (args[i] == "--8bit" || args[i] == "/8bit") { numbits = System.Speech.AudioFormat.AudioBitsPerSample.Eight; consumed++; } if (args[i] == "--khz" || args[i] == "/khz") { khz = Convert.ToInt32(args[i + 1]); i++; consumed += 2; } } if (mode_report_progress) { s.SpeakProgress += new EventHandler <SpeakProgressEventArgs>(report_progress); } s.Volume = Volume; s.Rate = Rate; System.Speech.AudioFormat.SpeechAudioFormatInfo fmt = new System.Speech.AudioFormat.SpeechAudioFormatInfo(khz * 1000, numbits, numchannels); if (OutputFilename != "") { try { s.SetOutputToWaveFile(OutputFilename, fmt); } catch (Exception e) { Console.WriteLine("Error, the output file could not be written."); Console.WriteLine(e.Message); } } if (InputFilename != "") { try { using (StreamReader sr = new StreamReader(InputFilename, true)) { while ((line = sr.ReadLine()) != null) { // Including this warning because it is apparently very easy (I did it, anyway) to save what you *think* are properly // accented non-english characters in a notepad file under ANSI - they show up properly in notepad etc but when you // feed them to Speech.Synthesis they are replaced with the 65533 garbage character. if (unicode_warning_sent == false) { for (int i = 0; i < line.Length; i++) { if (Convert.ToInt32(line[i]) == 65533) { Console.WriteLine("Warning: Unicode replacement character 65533 detected. Check encoding on input file."); unicode_warning_sent = true; break; } } } s.Speak(line); } } } catch (Exception e) { Console.WriteLine("The input file could not be read:"); Console.WriteLine(e.Message); } } // "from stdin, noninteractive" mode if (keyAvailableHack) { while (true) { line = Console.ReadLine(); if (line == null) { break; } else { if (mode_scroll_inspect) { // check for scroll lock if ((((ushort)GetKeyState(0x91)) & 0xffff) != 0) { Console.Error.WriteLine(line); s.Speak(line); } Console.WriteLine(line); continue; } if (mode_tee) { Console.WriteLine(line); } s.Speak(line); } } Environment.Exit(0); } // from stdin, interactive // "from stdin, noninteractive" mode if (mode_interactive) { Console.WriteLine("Interactive mode. Control-c to quit."); while (true) { line = Console.ReadLine(); if (line != null) { s.Speak(line); } else { break; } } Environment.Exit(0); } // "from the args" mode string tosay = String.Join(" ", args.Skip(consumed)); if (tosay.Length > 0) { s.Speak(tosay); } Environment.Exit(0); }
private static void SynthToCam(string text, CameraWindow cw) { var synthFormat = new System.Speech.AudioFormat.SpeechAudioFormatInfo(System.Speech.AudioFormat.EncodingFormat.Pcm, 11025, 16, 1, 22100, 2, null); using (var synthesizer = new SpeechSynthesizer()) { using (var waveStream = new MemoryStream()) { //write some silence to the stream to allow camera to initialise properly var silence = new byte[1 * 22050]; waveStream.Write(silence, 0, silence.Count()); var pbuilder = new PromptBuilder(); var pStyle = new PromptStyle { Emphasis = PromptEmphasis.Strong, Rate = PromptRate.Slow, Volume = PromptVolume.ExtraLoud }; pbuilder.StartStyle(pStyle); pbuilder.StartParagraph(); pbuilder.StartVoice(VoiceGender.Male, VoiceAge.Adult, 2); pbuilder.StartSentence(); pbuilder.AppendText(text); pbuilder.EndSentence(); pbuilder.EndVoice(); pbuilder.EndParagraph(); pbuilder.EndStyle(); synthesizer.SetOutputToAudioStream(waveStream, synthFormat); synthesizer.Speak(pbuilder); synthesizer.SetOutputToNull(); //write some silence to the stream to allow camera to end properly waveStream.Write(silence, 0, silence.Count()); waveStream.Seek(0, SeekOrigin.Begin); ITalkTarget talkTarget = null; var ds = new DirectStream(waveStream) { RecordingFormat = new WaveFormat(11025, 16, 1) }; switch (cw.Camobject.settings.audiomodel) { case "Foscam": ds.Interval = 40; ds.PacketSize = 882; // (40ms packet at 22050 bytes per second) talkTarget = new TalkFoscam(cw.Camobject.settings.audioip, cw.Camobject.settings.audioport, cw.Camobject.settings.audiousername, cw.Camobject.settings.audiopassword, ds); break; case "NetworkKinect": ds.Interval = 40; ds.PacketSize = 882; talkTarget = new TalkNetworkKinect(cw.Camobject.settings.audioip, cw.Camobject.settings.audioport, ds); break; case "iSpyServer": ds.Interval = 40; ds.PacketSize = 882; talkTarget = new TalkiSpyServer(cw.Camobject.settings.audioip, cw.Camobject.settings.audioport, ds); break; case "Axis": talkTarget = new TalkAxis(cw.Camobject.settings.audioip, cw.Camobject.settings.audioport, cw.Camobject.settings.audiousername, cw.Camobject.settings.audiopassword, ds); break; default: //local playback talkTarget = new TalkLocal(ds); break; } ds.Start(); talkTarget.Start(); while (ds.IsRunning) { Thread.Sleep(100); } ds.Stop(); if (talkTarget != null) { talkTarget.Stop(); } talkTarget = null; ds = null; waveStream.Close(); } } }