Exemplo n.º 1
0
        static bool GenerateFile(Options options)
        {
            AudioBitsPerSample bitRate = options.BitRate == 1 ? AudioBitsPerSample.Eight : AudioBitsPerSample.Sixteen;
            AudioChannel       channel = options.Channel == 1 ? AudioChannel.Stereo : AudioChannel.Mono;

            var format = new SpeechAudioFormatInfo(options.SampleRate, bitRate, channel);

            string text = "";

            if (String.IsNullOrEmpty(options.Text))
            {
                text = File.ReadAllText(options.InputFile, Encoding.UTF8);
            }
            else
            {
                text = options.Text;
            }

            string path = String.IsNullOrEmpty(options.Folder) ? options.Path : String.Format("{0}/{1}", options.Folder, options.Path);

            SpeechSynthesizer speaker = new SpeechSynthesizer();

            try
            {
                speaker.SelectVoice(options.VoiceName);
                speaker.SetOutputToWaveFile(path, format);
                speaker.Speak(text);
            }
            catch (Exception)
            {
                return(false);
            }

            return(true);
        }
        public void Start(RecognitionConfig config)
        {
            _config = config.Windows;
            _stream = new PipeStream(9600);

            if (_speechEngine == null)
            {
                _speechEngine = new SpeechRecognitionEngine();
                _speechEngine.LoadGrammar(new DictationGrammar());

                _speechEngine.SpeechHypothesized += OnSpeechHypothesized;
                _speechEngine.SpeechRecognized   += OnSpeechRecognized;
            }

            var format = new SpeechAudioFormatInfo(48000, AudioBitsPerSample.Sixteen, AudioChannel.Mono);

            _speechEngine.SetInputToAudioStream(_stream, format);

            if (!_recognizing)
            {
                _recognizing = true;
                _speaking    = false;
                _speechEngine.RecognizeAsync(RecognizeMode.Multiple);
            }
        }
Exemplo n.º 3
0
 private void SetOutputStream(Stream stream, SpeechAudioFormatInfo formatInfo, bool headerInfo, bool closeStreamOnExit)
 {
     SetOutputToNull();
     _outputStream      = stream;
     _closeStreamOnExit = closeStreamOnExit;
     VoiceSynthesizer.SetOutput(stream, formatInfo, headerInfo);
 }
Exemplo n.º 4
0
        public SAPI()
        {
            int frequency = 16000;

            speechRate            = 0;
            speechAudioFormatInfo = new SpeechAudioFormatInfo(frequency, AudioBitsPerSample.Sixteen, AudioChannel.Mono);
        }
Exemplo n.º 5
0
 public void SetOutputToWaveFile(string path, SpeechAudioFormatInfo formatInfo)
 {
     Helpers.ThrowIfEmptyOrNull(path, "path");
     Helpers.ThrowIfNull(formatInfo, "formatInfo");
     SetOutputToNull();
     SetOutputStream(new FileStream(path, FileMode.Create, FileAccess.Write), formatInfo, true, true);
 }
Exemplo n.º 6
0
 /// <summary>
 /// Convert XML to WAV bytes. WAV won't have the header, so you have to add it separately.
 /// </summary>
 static byte[] ConvertSsmlXmlToWav(string voiceId, string xml, WaveFormat format)
 {
     using (var ms = new MemoryStream())
     {
         using (var synthesizer = new SpeechSynthesizer())
         {
             //var format = new SpeechAudioFormatInfo(
             if (format != null)
             {
                 //var bps = format.BitsPerSample == 8 ? AudioBitsPerSample.Eight : AudioBitsPerSample.Sixteen;
                 var blockAlignment         = format.BitsPerSample / 8 * format.Channels;
                 var averagerBytesPerSecond = format.SampleRate * format.BitsPerSample / 8 * format.Channels;
                 var formatInfo             = new SpeechAudioFormatInfo(EncodingFormat.Pcm, format.SampleRate, format.BitsPerSample, format.Channels, averagerBytesPerSecond, blockAlignment, new byte[0]);
                 // Returns WAV data only.
                 synthesizer.SetOutputToAudioStream(ms, formatInfo);
             }
             try
             {
                 var voice = synthesizer.GetInstalledVoices().Cast <InstalledVoice>().FirstOrDefault(x => x.VoiceInfo.Id == voiceId);
                 synthesizer.SelectVoice(voice.VoiceInfo.Name);
                 synthesizer.SpeakSsml(xml);
                 return(ms.ToArray());
             }
             catch (Exception ex)
             {
                 ex.Data.Add("Voice", "voiceName");
                 OnEvent(Exception, ex);
             }
         }
     }
     return(null);
 }
Exemplo n.º 7
0
        public void SetOutputToAudioStream(Stream audioDestination, SpeechAudioFormatInfo formatInfo)
        {
            Helpers.ThrowIfNull(audioDestination, nameof(audioDestination));
            Helpers.ThrowIfNull(formatInfo, nameof(formatInfo));

            SetOutputStream(audioDestination, formatInfo, false, false);
        }
        public void Initialise(Speech words)
        {
            if (WindowKinect.Device == null)
            {
                return;
            }

            WordsToWatch = words;

            var commands = words.GetCommands(new Choices());

            commands.Add(new SemanticResultValue("sleep", SleepKey));
            commands.Add(new SemanticResultValue("wake", WakeUpKey));

            var speechAudioFormat  = new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null);
            var grammerConstructor = new GrammarBuilder();

            grammerConstructor.Culture = RecogniserInfo.Culture;
            grammerConstructor.Append(commands);

            SpeechEngine = new SpeechRecognitionEngine(RecogniserInfo.Id);
            SpeechEngine.LoadGrammar(new Grammar(grammerConstructor));
            SpeechEngine.SpeechRecognized += SpeeckRecognized;
            SpeechEngine.UpdateRecognizerSetting("AdaptationOn", 0);
            SpeechEngine.SetInputToAudioStream(WindowKinect.Device.AudioSource.Start(), speechAudioFormat);
            SpeechEngine.RecognizeAsync(RecognizeMode.Multiple);

            Active = WindowKinect.Device.ElevationAngle > 0;
        }
        public QueuedSpeechSynthesizer(ref MixingSampleProvider mspStandard, ref MixingSampleProvider mspLoopback, int volume, int rate)
        {
            synthesizer = new SpeechSynthesizer();

            this.volume = volume;
            this.rate   = rate;

            queue = new ObservableCollection <QSSQueueItem>();

            this.mspStandard = mspStandard;
            this.mspLoopback = mspLoopback;

            int samplerate = int.Parse(File.ReadAllLines("audioformat.txt")[0].Replace("Sample rate: ", ""));
            int channels   = int.Parse(File.ReadAllLines("audioformat.txt")[1].Replace("Channels: ", ""));

            if (channels == 1)
            {
                synthesizerAudioFormat = new SpeechAudioFormatInfo(samplerate, AudioBitsPerSample.Sixteen, AudioChannel.Mono);
            }
            else
            {
                synthesizerAudioFormat = new SpeechAudioFormatInfo(samplerate, AudioBitsPerSample.Sixteen, AudioChannel.Stereo);
            }
            waveFormat = new WaveFormat(samplerate, channels);

            startSpeakLoop();
        }
Exemplo n.º 10
0
        void InitializeSpeechEngine(SpeechRecognitionEngine sre)
        {
            // Log function entrance
            TraceLog.TraceFunction();

            try
            {
                // initialize and cache format info
                formatInfo = new SpeechAudioFormatInfo(defaultSampleRate, defaultBitsPerSample, defaultAudioChannels);

                // initialize and cache speech engine
                sre.UpdateRecognizerSetting("AssumeCFGFromTrustedSource", 1);

                string fileName    = @"TELLME-SMS-LM.cfgp";
                string appDataPath = HttpContext.Current.Server.MapPath("~/Content/grammars");
                string grammarPath = Path.Combine(appDataPath, fileName);
                TraceLog.TraceInfo("Grammar path: " + grammarPath);

                // make sure the grammar files are copied over from the approot directory to the appDataPath
                InitializeGrammar(grammarPath, appDataPath, fileName);

                // initialize and load the grammar
                Grammar grammar = new Grammar(grammarPath);
                grammar.Enabled = true;
                sre.LoadGrammar(grammar);
            }
            catch (Exception ex)
            {
                TraceLog.TraceError("Speech Engine initialization failed: " + ex.Message);
            }
        }
Exemplo n.º 11
0
        public void SpeakMessage(AudioVideoFlow flow, string message)
        {
            try
            {
                SpeechSynthesizer synth = new SpeechSynthesizer();
                SpeechAudioFormatInfo formatInfo = new SpeechAudioFormatInfo(16000, AudioBitsPerSample.Sixteen, Microsoft.Speech.AudioFormat.AudioChannel.Mono);
                SpeechSynthesisConnector connector = new SpeechSynthesisConnector();

                synth.SetOutputToAudioStream(connector.Stream, formatInfo);

                connector.AttachFlow(flow);
                connector.Start();

                synth.SpeakCompleted += new EventHandler<SpeakCompletedEventArgs>(
                    (sender, args) =>
                    {
                        connector.Stop();
                        synth.Dispose();
                    });

                synth.SpeakAsync(message);
            }
            catch (Exception ex)
            {
                Console.WriteLine("Failed to play the message. {0}", ex);
            }

        }
Exemplo n.º 12
0
        private void StartSpeech(AssignedVoice vb, string outputfile)
        {
            WinAvailableVoice wv = (WinAvailableVoice)vb.root;

            // Find the best audio format to use for this voice.
            System.Collections.ObjectModel.ReadOnlyCollection <SpeechAudioFormatInfo> formats =
                wv.winVoice.VoiceInfo.SupportedAudioFormats;

            format = formats.FirstOrDefault();

            if (format == null)
            {
                // The voice did not tell us its parameters, so we pick some.
                format = new SpeechAudioFormatInfo(
                    16000,      // Samples per second
                    AudioBitsPerSample.Sixteen,
                    AudioChannel.Mono);
            }

            // First set up to synthesize the message into a WAV file.
            mstream = new FileStream(outputfile, FileMode.Create, FileAccess.Write);

            syn.SetOutputToWaveStream(mstream);

            pb        = new PromptBuilder();
            mainStyle = new PromptStyle();
            //            mainStyle.Volume = promptVol;
            syn.SelectVoice(wv.winVoice.VoiceInfo.Name);
            pb.StartStyle(mainStyle);
        }
Exemplo n.º 13
0
        /// <summary>
        /// Create an instance of AudioFileOut.
        /// </summary>
        internal AudioFileOut(Stream stream, SpeechAudioFormatInfo formatInfo, bool headerInfo, IAsyncDispatch asyncDispatch)
        {
            _asyncDispatch       = asyncDispatch;
            _stream              = stream;
            _startStreamPosition = _stream.Position;
            _hasHeader           = headerInfo;

            _wfxOut = new WAVEFORMATEX();
            // if we have a formatInfo object, format conversion may be necessary
            if (formatInfo != null)
            {
                // Build the Wave format from the formatInfo
                _wfxOut.wFormatTag     = (short)formatInfo.EncodingFormat;
                _wfxOut.wBitsPerSample = (short)formatInfo.BitsPerSample;
                _wfxOut.nSamplesPerSec = formatInfo.SamplesPerSecond;
                _wfxOut.nChannels      = (short)formatInfo.ChannelCount;
            }
            else
            {
                // Set the default values
                _wfxOut = WAVEFORMATEX.Default;
            }
            _wfxOut.nBlockAlign     = (short)(_wfxOut.nChannels * _wfxOut.wBitsPerSample / 8);
            _wfxOut.nAvgBytesPerSec = _wfxOut.wBitsPerSample * _wfxOut.nSamplesPerSec * _wfxOut.nChannels / 8;
        }
Exemplo n.º 14
0
        public MeWantSpeech()
        {
            // convert text to audio stream using .net 3.x speechsynthesis g.711 u-law (pcm 64kb/s bit rate (u-law encodes 14-bit to 8-bit samples by adding 32 / binary 100000)
            _speechSynthesizer = new SpeechSynthesizer();

            // select (if it exists)
            _speechSynthesizer.SelectVoiceByHints(VoiceGender.Female, VoiceAge.Adult);

            // can also change voice with SelectVoice method
            _speechSynthesizer.Rate = 1;


            // encoding format enums are Pcm, ALaw, ULaw
            int samplesPerSecond = 8000;
            int bitsPerSample    = 8;
            // System.Speech.AudioFormat.AudioBitsPerSample.Eight
            int channelCount = 1;
            // System.Speech.AudioFormat.AudioChannel.Mono
            int averageBytesPerSecond = 20;
            int blockAlign            = 2;

            byte[] formatSpecificData = null;

            _formatInfo = new SpeechAudioFormatInfo(EncodingFormat.ULaw, samplesPerSecond, bitsPerSample, channelCount, averageBytesPerSecond, blockAlign, formatSpecificData);
        }
Exemplo n.º 15
0
 private void CreateWAV(string fileName, string verbiage)
 {
     using (Stream ret = new MemoryStream())
         using (SpeechSynthesizer synth = new SpeechSynthesizer())
         {
             var mi = synth.GetType().GetMethod("SetOutputStream", BindingFlags.Instance | BindingFlags.NonPublic);
             //   var fmt = new SpeechAudioFormatInfo(8000, AudioBitsPerSample.Eight, AudioChannel.Mono);
             var fmt = new SpeechAudioFormatInfo(EncodingFormat.ULaw, 8000, 8, 1, 16000, 2, null);
             mi.Invoke(synth, new object[] { ret, fmt, true, true });
             synth.SelectVoiceByHints(VoiceGender.Female, VoiceAge.Adult);
             synth.Speak(verbiage);
             // Testing code:
             using (var fs = new FileStream(fileName, FileMode.Create, FileAccess.Write, FileShare.None))
             {
                 ret.Position = 0;
                 byte[] buffer = new byte[4096];
                 for (; ;)
                 {
                     int len = ret.Read(buffer, 0, buffer.Length);
                     if (len == 0)
                     {
                         break;
                     }
                     fs.Write(buffer, 0, len);
                 }
             }
         }
 }
Exemplo n.º 16
0
        public string RecognizeSpeech(byte[] speechToParse, int sampleRate)
        {
            SpeechRecognitionEngine sre = new SpeechRecognitionEngine();

            if (_grammar == null)
            {
                InitializeGrammar();
            }
            sre.LoadGrammar(_grammar);

            MemoryStream ms         = new MemoryStream(speechToParse);
            var          formatInfo = new SpeechAudioFormatInfo(sampleRate, AudioBitsPerSample.Sixteen, AudioChannel.Mono);

            sre.SetInputToAudioStream(ms, formatInfo);
            var result = sre.Recognize();

            ms = null;

            if (result == null)
            {
                return("Unable to recognize speech");
            }
            else
            {
                return(result.Text);
            }
        }
Exemplo n.º 17
0
        protected void InitSpeechEngine(bool def)
        {
            try {
                WSRConfig        cfg     = WSRConfig.GetInstance();
                WSRSpeechManager manager = WSRSpeechManager.GetInstance();

                // File
                manager.InitEngines();

                // Default
                if (def)
                {
                    manager.AddDefaultEngine("Default", cfg.language, cfg.confidence);
                }

                // RTP
                if (rtpClient == null)
                {
                    return;
                }
                var format = new SpeechAudioFormatInfo(16000, AudioBitsPerSample.Sixteen, AudioChannel.Stereo);
                manager.AddEngine("RTP", cfg.language, cfg.confidence, rtpClient.AudioStream, format);
            }
            catch (Exception ex) {
                WSRConfig.GetInstance().logError("ENGINE", "InitEngines: " + ex.Message);
            }
        }
Exemplo n.º 18
0
        void Flow_StateChanged(object sender, MediaFlowStateChangedEventArgs e)
        {
            Log("ControlAVCall Flow_StateChanged PreviousState=" + e.PreviousState + " State=" + e.State);

            AudioVideoFlow avFlow = (AudioVideoFlow)sender;

            if (avFlow.State == MediaFlowState.Active)
            {
                SpeechRecognitionConnector speechRecognitionConnector = new SpeechRecognitionConnector();
                speechRecognitionConnector.AttachFlow(avFlow);

                SpeechRecognitionStream stream = speechRecognitionConnector.Start();

                _speechRecognitionEngine = new SpeechRecognitionEngine();
                _speechRecognitionEngine.SpeechRecognized     += new EventHandler <SpeechRecognizedEventArgs>(_speechRecognitionEngine_SpeechRecognized);
                _speechRecognitionEngine.LoadGrammarCompleted += new EventHandler <LoadGrammarCompletedEventArgs>(_speechRecognitionEngine_LoadGrammarCompleted);

                Choices pathChoice = new Choices(new string[] { "previous", "next" });
                Grammar gr         = new Grammar(new GrammarBuilder(pathChoice));
                _speechRecognitionEngine.LoadGrammarAsync(gr);

                SpeechAudioFormatInfo speechAudioFormatInfo = new SpeechAudioFormatInfo(8000, AudioBitsPerSample.Sixteen, Microsoft.Speech.AudioFormat.AudioChannel.Mono);
                _speechRecognitionEngine.SetInputToAudioStream(stream, speechAudioFormatInfo);
                _speechRecognitionEngine.RecognizeAsync(RecognizeMode.Multiple);
            }
            else
            {
                if (avFlow.SpeechRecognitionConnector != null)
                {
                    avFlow.SpeechRecognitionConnector.DetachFlow();
                }
            }
        }
Exemplo n.º 19
0
 public static void SpeakInWave(string text, string wavefile)
 {
     try
     {
         if (Initialized)
         {
             var fmt = new SpeechAudioFormatInfo(8000, AudioBitsPerSample.Eight, AudioChannel.Mono);
             SPS.SetOutputToWaveFile(wavefile, fmt);
             SPS.Speak(text);
             SPS.SetOutputToDefaultAudioDevice();
         }
         else
         {
             Initialize();
             var fmt = new SpeechAudioFormatInfo(8000, AudioBitsPerSample.Eight, AudioChannel.Mono);
             SPS.SetOutputToWaveFile(wavefile, fmt);
             SPS.Speak(text);
             SPS.SetOutputToDefaultAudioDevice();
         }
     }
     catch (Exception ex)
     {
     }
     finally
     {
     }
 }
        public void SetInputToAudioStream(Stream audioSource, SpeechAudioFormatInfo audioFormat)
        {
            Helpers.ThrowIfNull(audioSource, nameof(audioSource));
            Helpers.ThrowIfNull(audioFormat, nameof(audioFormat));

            RecoBase.SetInput(audioSource, audioFormat);
        }
Exemplo n.º 21
0
        /// <summary>
        /// Creates a new speech recognition engine.
        /// </summary>
        /// <returns>A new speech recognition engine object.</returns>
        private SpeechRecognitionEngine CreateSpeechRecognitionEngine()
        {
            // Create speech recognition engine
            var recognizer = SystemSpeech.CreateSpeechRecognitionEngine(this.Configuration.Language, this.Configuration.Grammars);

            // Attach event handlers for speech recognition events
            recognizer.AudioStateChanged  += this.OnAudioStateChanged;
            recognizer.RecognizeCompleted += this.OnRecognizeCompleted;

            // Create the format info from the configuration input format
            SpeechAudioFormatInfo formatInfo = new SpeechAudioFormatInfo(
                (EncodingFormat)this.Configuration.InputFormat.FormatTag,
                (int)this.Configuration.InputFormat.SamplesPerSec,
                this.Configuration.InputFormat.BitsPerSample,
                this.Configuration.InputFormat.Channels,
                (int)this.Configuration.InputFormat.AvgBytesPerSec,
                this.Configuration.InputFormat.BlockAlign,
                (this.Configuration.InputFormat is WaveFormatEx) ? ((WaveFormatEx)this.Configuration.InputFormat).ExtraInfo : null);

            // Specify the input stream and audio format
            recognizer.SetInputToAudioStream(this.inputAudioStream, formatInfo);

            // Set the speech recognition engine parameters
            recognizer.InitialSilenceTimeout      = TimeSpan.FromMilliseconds(this.Configuration.InitialSilenceTimeoutMs);
            recognizer.BabbleTimeout              = TimeSpan.FromMilliseconds(this.Configuration.BabbleTimeoutMs);
            recognizer.EndSilenceTimeout          = TimeSpan.FromMilliseconds(this.Configuration.EndSilenceTimeoutMs);
            recognizer.EndSilenceTimeoutAmbiguous = TimeSpan.FromMilliseconds(this.Configuration.EndSilenceTimeoutAmbiguousMs);

            return(recognizer);
        }
Exemplo n.º 22
0
        public void Run()
        {
            // Create AudioVideoFlow
            AudioVideoFlowHelper audioVideoFlowHelper = new AudioVideoFlowHelper();

            _audioVideoFlow = audioVideoFlowHelper.CreateAudioVideoFlow(
                null,
                audioVideoFlow_StateChanged);

            // Create a speech synthesis connector and attach it to an AudioVideoFlow
            SpeechSynthesisConnector speechSynthesisConnector = new SpeechSynthesisConnector();

            speechSynthesisConnector.AttachFlow(_audioVideoFlow);

            // Create a speech synthesis and set connector to it
            SpeechSynthesizer     speechSynthesis = new SpeechSynthesizer();
            SpeechAudioFormatInfo audioformat     = new SpeechAudioFormatInfo(16000, AudioBitsPerSample.Sixteen, Microsoft.Speech.AudioFormat.AudioChannel.Mono);

            speechSynthesis.SetOutputToAudioStream(speechSynthesisConnector, audioformat);

            //Load readme file as the source
            Console.WriteLine();
            Console.Write("Please enter the source file => ");
            string filename = Console.ReadLine();

            string msg = "";

            try
            {
                StreamReader objReader = new StreamReader(filename);
                msg = objReader.ReadToEnd();
            }
            catch (FileNotFoundException)
            {
                Console.WriteLine("\r\nFile doesn't exist.");
                ShutdownPlatform();
            }

            //Start connector
            speechSynthesisConnector.Start();
            Console.WriteLine("\r\nStreaming source file for 15 seconds.");

            //Start streaming from speech synthesis.
            speechSynthesis.SpeakAsync(new Prompt(msg));

            //Allow the connector to stream 15 seconds by waiting for 15 seconds
            Thread.Sleep(15000);

            //Stop the connector
            speechSynthesisConnector.Stop();
            Console.WriteLine("\r\nSpeech synthesis connector stopped.");

            //speech synthesis connector must be detached from the flow, otherwise if the connector is rooted, it will keep the flow in memory.
            speechSynthesisConnector.DetachFlow();

            // Shutdown the platform
            ShutdownPlatform();

            _waitForShutdownEventCompleted.WaitOne();
        }
Exemplo n.º 23
0
        /// <summary>
        /// Creates a new speech recognition engine
        /// </summary>
        /// <returns>A new speech recognition engine object.</returns>
        private SpeechRecognitionEngine CreateSpeechRecognitionEngine()
        {
            // Create the recognizer
            var recognizer = SystemSpeech.CreateSpeechRecognitionEngine(this.Configuration.Language, this.Configuration.Grammars);

            // Attach event handlers for speech recognition events
            recognizer.SpeechDetected             += this.OnSpeechDetected;
            recognizer.SpeechHypothesized         += this.OnSpeechHypothesized;
            recognizer.SpeechRecognized           += this.OnSpeechRecognized;
            recognizer.SpeechRecognitionRejected  += this.OnSpeechRecognitionRejected;
            recognizer.AudioSignalProblemOccurred += this.OnAudioSignalProblemOccurred;
            recognizer.AudioStateChanged          += this.OnAudioStateChanged;
            recognizer.RecognizeCompleted         += this.OnRecognizeCompleted;
            recognizer.RecognizerUpdateReached    += this.OnRecognizerUpdateReached;
            recognizer.AudioLevelUpdated          += this.OnAudioLevelUpdated;
            recognizer.EmulateRecognizeCompleted  += this.OnEmulateRecognizeCompleted;
            recognizer.LoadGrammarCompleted       += this.OnLoadGrammarCompleted;

            // Create the format info from the configuration input format
            SpeechAudioFormatInfo formatInfo = new SpeechAudioFormatInfo(
                (EncodingFormat)this.Configuration.InputFormat.FormatTag,
                (int)this.Configuration.InputFormat.SamplesPerSec,
                this.Configuration.InputFormat.BitsPerSample,
                this.Configuration.InputFormat.Channels,
                (int)this.Configuration.InputFormat.AvgBytesPerSec,
                this.Configuration.InputFormat.BlockAlign,
                (this.Configuration.InputFormat is WaveFormatEx) ? ((WaveFormatEx)this.Configuration.InputFormat).ExtraInfo : null);

            // Specify the input stream and audio format
            recognizer.SetInputToAudioStream(this.inputAudioStream, formatInfo);

            return(recognizer);
        }
Exemplo n.º 24
0
        public bool InitSTT(string recognizerID = null)
        {
            try
            {
                Console.Write("InitSTT");
                Initialized = false;
                var RecognizerInfoLit = SpeechRecognitionEngine.InstalledRecognizers();
                _recognition = new SpeechRecognitionEngine(new CultureInfo("en-US"));
                _recognition.LoadGrammar(new Grammar(new GrammarBuilder("exit")));
                _recognition.LoadGrammar(new DictationGrammar());
                loadAdditionalGrammer(_recognition);

                //_recognition.BabbleTimeout = new TimeSpan(0);
                // _recognition.InitialSilenceTimeout = new TimeSpan(0);

                _recognition.SpeechHypothesized        += recognition_SpeechHypothesized;
                _recognition.SpeechRecognized          += recognition_SpeechRecognized;
                _recognition.SpeechDetected            += recognition_SpeechDetected;
                _recognition.RecognizeCompleted        += recognition_RecognizeCompleted;
                _recognition.SpeechRecognitionRejected += (recognition_SpeechRecognizedRejected);
                _speechFormat = new SpeechAudioFormatInfo(_audioFormat.SampleRate, (AudioBitsPerSample)_audioFormat.BitRate, (AudioChannel)_audioFormat.Channels);
                //_recognition.UnloadAllGrammars();
                Initialized = true;
                return(true);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
            }

            return(false);
        }
Exemplo n.º 25
0
        public void Run()
        {
            // A helper class to take care of platform and endpoint setup and cleanup.
            _helper = new UCMASampleHelper();

            // Create a user endpoint using the network credential object.
            _userEndpoint = _helper.CreateEstablishedUserEndpoint("Broadcast User");

            // Register a delegate to be called when an incoming audio-video call arrives.
            _userEndpoint.RegisterForIncomingCall <AudioVideoCall>(AudioVideoCall_Received);

            // Wait for the incoming call to be accepted.
            Console.WriteLine("Waiting for incoming call...");
            _waitForCallToBeAccepted.WaitOne();

            // Create a speech recognition connector and attach an AudioVideoFlow to it.
            SpeechRecognitionConnector speechRecognitionConnector = new SpeechRecognitionConnector();

            speechRecognitionConnector.AttachFlow(_audioVideoFlow);

            // Start the speech recognition connector.
            SpeechRecognitionStream stream = speechRecognitionConnector.Start();

            // Create a speech recognition engine.
            SpeechRecognitionEngine speechRecognitionEngine = new SpeechRecognitionEngine();

            speechRecognitionEngine.SpeechRecognized += new EventHandler <SpeechRecognizedEventArgs>(SpeechRecognitionEngine_SpeechRecognized);

            //Add a grammar.
            string[] recoString = { "buy", "sell", "Fabrikam", "Contoso", "maximum", "minimum", "one", "ten", "twenty", "send" };
            Choices  choices    = new Choices(recoString);

            speechRecognitionEngine.LoadGrammar(new Grammar(new GrammarBuilder(choices)));

            //Attach to audio stream to the SR engine.
            SpeechAudioFormatInfo speechAudioFormatInfo = new SpeechAudioFormatInfo(8000, AudioBitsPerSample.Sixteen, Microsoft.Speech.AudioFormat.AudioChannel.Mono);

            speechRecognitionEngine.SetInputToAudioStream(stream, speechAudioFormatInfo);
            Console.WriteLine("\r\nGrammar loaded, say send to send IM.");

            //Prepare the SR engine to perform multiple asynchronous recognitions.
            speechRecognitionEngine.RecognizeAsync(RecognizeMode.Multiple);

            //Pause the main thread until recognition completes.
            _waitForConnectorToStop.WaitOne();
            speechRecognitionConnector.Stop();
            Console.WriteLine("connector stopped");

            // Detach the flow from the speech recognition connector, to prevent the flow from being kept in memory.
            speechRecognitionConnector.DetachFlow();

            // Terminate the call, the conversation, and then unregister the
            // endpoint from receiving an incoming call.
            _audioVideoCall.BeginTerminate(CallTerminateCB, _audioVideoCall);
            _waitForConversationToBeTerminated.WaitOne();

            // Shut down the platform.
            _helper.ShutdownPlatform();
        }
Exemplo n.º 26
0
 internal RecognizedAudio(byte[] rawAudioData, SpeechAudioFormatInfo audioFormat, DateTime startTime, TimeSpan audioPosition, TimeSpan audioDuration)
 {
     _audioFormat   = audioFormat;
     _startTime     = startTime;
     _audioPosition = audioPosition;
     _audioDuration = audioDuration;
     _rawAudioData  = rawAudioData;
 }
        public void UseAudioQueue()
        {
            Debug.WriteLine("SR is using queued stream");
            ebookStream = new EbookStream(ref conQueue);
            SpeechAudioFormatInfo info = new SpeechAudioFormatInfo(44100, AudioBitsPerSample.Sixteen, AudioChannel.Mono);

            recEngine.SetInputToAudioStream(ebookStream, info);
        }
Exemplo n.º 28
0
        public static string GetBase64Audio(string textInput)
        {
            var speechAudioFormatConfig = new SpeechAudioFormatInfo(samplesPerSecond: 8000, bitsPerSample: AudioBitsPerSample.Sixteen, channel: AudioChannel.Stereo);
            var waveFormat = new WaveFormat(speechAudioFormatConfig.SamplesPerSecond, speechAudioFormatConfig.BitsPerSample, speechAudioFormatConfig.ChannelCount);
            var prompt     = new PromptBuilder
            {
                Culture = CultureInfo.CreateSpecificCulture("en-US")
            };

            prompt.StartVoice(prompt.Culture);
            prompt.StartSentence();
            prompt.StartStyle(new PromptStyle()
            {
                Emphasis = PromptEmphasis.Reduced,
                Rate     = PromptRate.Slow
            });
            prompt.AppendText(textInput);
            prompt.EndStyle();
            prompt.EndSentence();
            prompt.EndVoice();

            var mp3Stream = new MemoryStream();

            byte[] audioOutputBytes;
            string audioOutputAsString = null;

            using (var synthWaveMemoryStream = new MemoryStream())
            {
                var resetEvent = new ManualResetEvent(false);
                ThreadPool.QueueUserWorkItem(arg =>
                {
                    try
                    {
                        var siteSpeechSynth = new SpeechSynthesizer();
                        siteSpeechSynth.SetOutputToAudioStream(synthWaveMemoryStream, speechAudioFormatConfig);
                        siteSpeechSynth.Speak(prompt);
                    }
                    finally
                    {
                        resetEvent.Set();
                    }
                });
                WaitHandle.WaitAll(new WaitHandle[] { resetEvent });
                var bitRate = (speechAudioFormatConfig.AverageBytesPerSecond * 8);

                synthWaveMemoryStream.Position = 0;

                using (var mp3FileWriter = new LameMP3FileWriter(outStream: mp3Stream, format: waveFormat, bitRate: bitRate))
                {
                    synthWaveMemoryStream.CopyTo(mp3FileWriter);
                }

                audioOutputBytes    = mp3Stream.ToArray();
                audioOutputAsString = $"data:audio/mp3;base64,{Convert.ToBase64String(audioOutputBytes)}";
            }

            return(audioOutputAsString);
        }
Exemplo n.º 29
0
        private void SetOutputStream(Stream stream, SpeechAudioFormatInfo formatInfo, bool headerInfo, bool closeStreamOnExit)
        {
            SetOutputToNull();
            _outputStream      = stream;
            _closeStreamOnExit = closeStreamOnExit;

            // Need to serialize into a proper wav file before closing the stream
            VoiceSynthesizer.SetOutput(stream, formatInfo, headerInfo);
        }
Exemplo n.º 30
0
 public void printDebugFormatInfo(SpeechAudioFormatInfo speechAudioFormatInfoToPrint)
 {
     System.Diagnostics.Debug.WriteLine("Samples per second: " + speechAudioFormatInfoToPrint.SamplesPerSecond);
     System.Diagnostics.Debug.WriteLine("Average bytes per second: " + speechAudioFormatInfoToPrint.AverageBytesPerSecond);
     System.Diagnostics.Debug.WriteLine("Bits per sample: " + speechAudioFormatInfoToPrint.BitsPerSample);
     System.Diagnostics.Debug.WriteLine("Channel count: " + speechAudioFormatInfoToPrint.ChannelCount);
     System.Diagnostics.Debug.WriteLine("Encoding format: " + speechAudioFormatInfoToPrint.EncodingFormat);
     System.Diagnostics.Debug.WriteLine("Block Align: " + speechAudioFormatInfoToPrint.BlockAlign);
 }
Exemplo n.º 31
0
    public WSRSpeechEngine AddEngine(String prefix, String language, double confidence, Stream source, SpeechAudioFormatInfo format) {
      WSRSpeechEngine engine = new WSRSpeechEngine(prefix, language, confidence);
      engine.LoadGrammar();
      engine.Init();
      engine.GetEngine().SetInputToAudioStream(source, format);
      engine.Start();

      Engines.Add(prefix, engine);
      return engine;
    }
Exemplo n.º 32
0
        public WindowsSpeech()
        {
            speechRecognizer = new SpeechRecognitionEngine();
            Grammar grammar = new Grammar(new GrammarBuilder("one two"));

            speechRecognizer.LoadGrammar(grammar);
            formatInfo = new SpeechAudioFormatInfo(16000, AudioBitsPerSample.Sixteen, AudioChannel.Mono);

            speechRecognizer.SpeechRecognized += new EventHandler <SpeechRecognizedEventArgs>(SpeechRecognizedHandler);
        }
Exemplo n.º 33
0
        public double computeLogEnergy(short[] audioArray, SpeechAudioFormatInfo speechAudioFormatInfo)
        {
            // Compute power by summing the squares of the signal.
            double energy = 0.0;
            for (int i = 0; i < audioArray.Length; ++i)
                energy += (double)audioArray[i] * (double)audioArray[i];

            energy /= audioArray.Length;
            energy = Math.Log(energy);

            // System.Diagnostics.Debug.WriteLine("energy[]: " + energy);
            return energy;
        }
Exemplo n.º 34
0
        /// <summary>
        /// Класс генератора звука.
        /// </summary>
        /// <param name="aVoice">Выбранный голос из системы.</param>
        /// <param name="aRate">Скорость воспроизведения.</param>
        /// <param name="aSamplePerSecond">Сэмплов в секунду.</param>
        public VoiceSynthesizer(string aVoice, int aRate = 0, int aSamplePerSecond = 22050)
        {
            if (string.IsNullOrEmpty(aVoice)) {
                throw new ArgumentNullException("aVoice");
            }

            if (aRate < -10 || aRate > 10) {
                throw new ArgumentException("Rate must be in the interval [-10; 10]");
            }

            generator = new SpeechSynthesizer();
            generator.SelectVoice(aVoice);
            generator.Rate = aRate;

            audioFormat = new SpeechAudioFormatInfo(aSamplePerSecond, AudioBitsPerSample.Sixteen, AudioChannel.Stereo);
        }
    public void AddSpeechEngine(Stream stream, string format, String device, String language, double confidence) {

      language = (language == null) ? ConfigManager.GetInstance().Find("bot.language", "fr-FR") : language;

      var info = new SpeechAudioFormatInfo(16000, AudioBitsPerSample.Sixteen, AudioChannel.Stereo);
      if ("Kinect".Equals(format)) {
        info = new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null);
      }

      SpeechEngine engine = new SpeechEngine(device, language, confidence);
      engine.Load(GrammarManager.GetInstance().Cache, false); 
      engine.Init();
      engine.Engine.SetInputToAudioStream(stream, info);
      engine.Start();

      Engines.Add(device, engine);
    }
Exemplo n.º 36
0
        public string RecognizeSpeech(byte[] speechToParse, int sampleRate)
        {
            SpeechRecognitionEngine sre = new SpeechRecognitionEngine();

            if (_grammar == null)
                InitializeGrammar();
            sre.LoadGrammar(_grammar);

            MemoryStream ms = new MemoryStream(speechToParse);
            var formatInfo = new SpeechAudioFormatInfo(sampleRate, AudioBitsPerSample.Sixteen, AudioChannel.Mono);
            sre.SetInputToAudioStream(ms, formatInfo);
            var result = sre.Recognize();
            ms = null;

            if (result == null)
                return "Unable to recognize speech";
            else
                return result.Text;
        }
Exemplo n.º 37
0
        static void Main(string[] args)
        {
            var text = args.Length > 0 ? args[0] : "enter some text as first argument to this command";

            // Initialize a new instance of the SpeechSynthesizer.
            SpeechSynthesizer synth = new SpeechSynthesizer();

            // Configure the audio output.
            if (args.Length > 1)
            {
                var audioformat = new SpeechAudioFormatInfo(16000,AudioBitsPerSample.Sixteen, AudioChannel.Mono);
                synth.SetOutputToWaveFile(args[1], audioformat);
            }
            else
            {
                synth.SetOutputToDefaultAudioDevice();
            }

            // Speak a string.
            synth.Speak(text);
        }
Exemplo n.º 38
0
        private void button1_Click(object sender, EventArgs e)
        {
            if (string.IsNullOrEmpty(textBox1.Text))
            {
                MessageBox.Show(this, "Write some text to start.", "Empty text", MessageBoxButtons.OK, MessageBoxIcon.Asterisk);
                return;
            }

            string[] texts = textBox1.Text.Split('\n');

            progressBar1.Value = 0;
            progressBar1.Maximum = texts.Length;

            for (int i = 0; i < texts.Length; i++)
            {
                var reader = new SpeechSynthesizer();
                //reader.SpeakCompleted += new EventHandler<SpeakCompletedEventArgs>(synth_SpeakCompleted);
                reader.Rate = trackRate.Value;
                reader.Volume = trackVolume.Value;
                reader.SelectVoice(((VoiceInfo)cmbVoice.SelectedItem).Name);

                var bits = radio8Bits.Checked ? AudioBitsPerSample.Eight : AudioBitsPerSample.Sixteen;
                var channel = radioChannelMono.Checked ? AudioChannel.Mono : AudioChannel.Stereo;

                var format = new SpeechAudioFormatInfo(int.Parse(cmbSamples.Text), bits, channel);
                string filePath = Directory.GetCurrentDirectory() + @"\Output\";
                if (!Directory.Exists(filePath))
                {
                    Directory.CreateDirectory(filePath);
                }
                reader.SetOutputToWaveFile(Directory.GetCurrentDirectory() + @"\Output\" + GetAudioFileName(texts[i]), format);
                reader.Speak(GetAudioText(texts[i]));
                progressBar1.Value++;
                reader.Dispose();
            }
            MessageBox.Show(this, "All done. Check .wav files on 'Output' folder.", "Finish", MessageBoxButtons.OK, MessageBoxIcon.Information);
        }
Exemplo n.º 39
0
        public void Generate(List<string> p_aryNames, List<string> p_aryLines, string p_strPath, int p_nRate, AudioBitsPerSample p_samples, AudioChannel p_channels)
        {
            SpeechAudioFormatInfo t_audioFormatInfo = new SpeechAudioFormatInfo(p_nRate, p_samples, p_channels);
            SpeechSynthesizer t_synth = new SpeechSynthesizer();

            progressBar1.Maximum = p_aryLines.Count;
            progressBar1.Step = 1;

            label1.Text = progressBar1.Step + "/" + p_aryNames.Count;

            for (int t_i = 0; t_i < p_aryNames.Count; ++t_i)
            {
                t_synth.SetOutputToWaveFile(p_strPath + "\\" + p_aryNames[t_i] + ".wav");
                t_synth.Speak(p_aryLines[t_i]);

                label1.Text = (t_i + 1) + "/" + p_aryLines.Count;
                progressBar1.PerformStep();
                progressBar1.Refresh();
            }

            t_synth.Dispose();

            Close();
        }
Exemplo n.º 40
0
 public void SetInputToAudioStream( Stream audioSource, SpeechAudioFormatInfo audioFormat )
 {
     engine.SetInputToAudioStream( audioSource, audioFormat );
 }
Exemplo n.º 41
0
        protected void InitSpeechEngine(bool def)
        {
            try {

            WSRConfig cfg = WSRConfig.GetInstance();
            WSRSpeechManager manager = WSRSpeechManager.GetInstance();

            // File
            manager.InitEngines();

            // Default
            if (def){
              manager.AddDefaultEngine("Default", cfg.language, cfg.confidence);
            }

            // RTP
            if (rtpClient == null) { return; }
            var format = new SpeechAudioFormatInfo(16000, AudioBitsPerSample.Sixteen, AudioChannel.Stereo);
            manager.AddEngine("RTP", cfg.language, cfg.confidence, rtpClient.AudioStream, format);
              }
              catch (Exception ex) {
            WSRConfig.GetInstance().logError("ENGINE", "InitEngines: " + ex.Message);
              }
        }
void StartAudioStream()
{
    sensor.Start();

    //Console.WriteLine("TomL: " + sensor.IsRunning);
    _kinectSource = sensor.AudioSource;
        _kinectSource.AutomaticGainControlEnabled = true;
        _kinectSource.EchoCancellationMode = EchoCancellationMode.None;
        _kinectSource.BeamAngleMode = BeamAngleMode.Adaptive;
    
    Console.WriteLine(" lakjsdflajsdlfjla " + _kinectSource.ToString());

    _stream = _kinectSource.Start();
    SpeechAudioFormatInfo bleh = new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null) ;


    _speechEngine.SetInputToAudioStream(_kinectSource.Start(), bleh);


    _speechEngine.RecognizeAsync(RecognizeMode.Multiple);
}
Exemplo n.º 43
0
 void InitializeRecognitionEngine(string cultureName = "en-US")
 {
     RecognizerInfo ri = TryGetKinectRecognizer(cultureName);
     if(ri==null)
     {
         throw new Exception("No Recognizer");
     }
     recognitionEngine = new SpeechRecognitionEngine(ri.Id);
     recognitionEngine.SpeechRecognized += recognitionEngine_SpeechRecognized;
     SpeechAudioFormatInfo speechAudioFormatInfo = new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null);
     recognitionEngine.SetInputToAudioStream(convertStream, speechAudioFormatInfo);
 }
Exemplo n.º 44
0
        // Analyzes the audio currently in the buffer and estimates the fundamental frequency.
        public int extractPitch(short[] audioArray, SpeechAudioFormatInfo speechAudioFormatInfo)
        {
            System.Diagnostics.Debug.WriteLine("SpeechEmotionRecognitionEngine::extractPitch()");
            if (audioArray == null)
            {
                System.Diagnostics.Debug.WriteLine("audioArray is null");
                return -1;
            }

            // For Debugging.
            // printDebugFormatInfo(speechAudioFormatInfo);

            // Only allow 8 or 16 bit audio.
            if (speechAudioFormatInfo.BitsPerSample != 8 && speechAudioFormatInfo.BitsPerSample != 16)
            {
                System.Diagnostics.Debug.WriteLine("Invalid BitsPerSample");
                return -1;
            }

            // To detect the pitch, we take a window of the signal, with a length at least twice as long
            // as the longest period that we might detect. If the sampling rate is 44,100 Hz, this
            // corresponded to a length of 1200 samples. For effecient calculation, I use this ratio to approximate
            // the windowSize. This give a ratio of 36.75, which I rounded down to 36.
            int windowSize = 2048; //  speechAudioFormatInfo.SamplesPerSecond / 36;
            int numWindows = audioArray.Length / (windowSize * speechAudioFormatInfo.ChannelCount);

            double[][] correlationFunctions = new double[numWindows][];
            int[] estimatedFundamentalFrequencies = new int[numWindows];
            for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex)
            {
                // Store the current window in inputAudio so we can work with it.
                short[] inputAudio = new short[windowSize];
                for (int i = 0; i < windowSize; ++i)
                    inputAudio[i] = audioArray[i + windowIndex * windowSize];

                // Calculate the correlation function.
                correlationFunctions[windowIndex] = correlation(inputAudio, speechAudioFormatInfo);

                // Clip all results below 0 to 0.
                for (int i = 0; i < windowSize / 2; ++i)
                {
                    if (correlationFunctions[windowIndex][i] < 0)
                        correlationFunctions[windowIndex][i] = 0;
                }

                // Stretch correlation results by a factor of 2 and subtract from the original signal.
                for (int i = 0; i < windowSize / 2; ++i)
                {
                    int value;
                    if (i % 2 == 0)
                        value = inputAudio[i / 2];
                    else
                        value = (inputAudio[i / 2 + 1] - inputAudio[i / 2]) / 2;
                    correlationFunctions[windowIndex][i] -= value;
                }

                // Clip all results below 0 to 0.
                for (int i = 0; i < windowSize / 2; ++i)
                {
                    if (correlationFunctions[windowIndex][i] < 0)
                        correlationFunctions[windowIndex][i] = 0;
                }

                // Finally, estimate fundamental frequency.
                estimatedFundamentalFrequencies[windowIndex] = estimateF0(correlationFunctions[windowIndex], speechAudioFormatInfo);
            }

            // Calculate the average frequency over all the windows.
            // Can this overflow? Nah, probably not.
            long tmp = estimatedFundamentalFrequencies[0];
            for (int windowIndex = 1; windowIndex < numWindows; ++windowIndex)
                tmp += estimatedFundamentalFrequencies[windowIndex];
            int averageEstimatedFrequency = (int)(tmp / numWindows);

            // Could also return useful information like standard deviation, pitch acceleration, rising/falling, etc.

            return averageEstimatedFrequency;
        }
 public void printDebugFormatInfo(SpeechAudioFormatInfo speechAudioFormatInfoToPrint)
 {
     System.Diagnostics.Debug.WriteLine("Samples per second: " + speechAudioFormatInfoToPrint.SamplesPerSecond);
     System.Diagnostics.Debug.WriteLine("Average bytes per second: " + speechAudioFormatInfoToPrint.AverageBytesPerSecond);
     System.Diagnostics.Debug.WriteLine("Bits per sample: " + speechAudioFormatInfoToPrint.BitsPerSample);
     System.Diagnostics.Debug.WriteLine("Channel count: " + speechAudioFormatInfoToPrint.ChannelCount);
     System.Diagnostics.Debug.WriteLine("Encoding format: " + speechAudioFormatInfoToPrint.EncodingFormat);
     System.Diagnostics.Debug.WriteLine("Block Align: " + speechAudioFormatInfoToPrint.BlockAlign);
 }
Exemplo n.º 46
0
 /// <summary>
 /// Sets where synthesized speech is rendered to.  This sets the output to wave file.
 /// </summary>
 /// <param name="path">The path where to save the wave to.</param>
 /// <param name="formatInfo">The format info for rendering the file.</param>
 public virtual void SetOutputToWaveFile(string path, SpeechAudioFormatInfo formatInfo)
 {
     throw new NotImplementedException();
 }
        public void addCommentToLocation(string streamOfComment, string latitude , string longitude)
        {
            SpeechAudioFormatInfo audioType = new SpeechAudioFormatInfo(1000,AudioBitsPerSample.Sixteen,AudioChannel.Mono);
            SpeechSynthesizer speech = new SpeechSynthesizer("SmartAudioCityGuide", "Lz+vYpOFm6NTP83A9y0tPoX6ByJa06Q6yxHvoBsD0xo=");
            byte[] streamString;
            Locations location = new Locations();
            byte[] buffer = new byte[10];
            MemoryStream stream = new MemoryStream();
            using (SpeechRecognitionEngine speechRecongnizeEngine = new SpeechRecognitionEngine())
            {
                location.latitude = Convert.ToDouble(latitude);
                location.longitude = Convert.ToDouble(longitude);
                locationsServices.addLocations(location);

                streamString = serializer.Deserialize<byte[]>(streamOfComment);
                buffer = new byte[streamString.Count()];

                stream.Write(buffer, 0, buffer.Length);

                // Add a handler for the LoadGrammarCompleted event.
                speechRecongnizeEngine.LoadGrammarCompleted +=
                  new EventHandler<LoadGrammarCompletedEventArgs>(speechRecongnizeEngine_LoadGrammarCompleted);

                // Add a handler for the SpeechRecognized event.
                speechRecongnizeEngine.SpeechRecognized +=
                new EventHandler<SpeechRecognizedEventArgs>(speechRecongnizeEngine_SpeechRecognized);

                speechRecongnizeEngine.LoadGrammar(new DictationGrammar());
                speechRecongnizeEngine.SetInputToAudioStream(stream, audioType);
                speechRecongnizeEngine.RecognizeAsync(RecognizeMode.Multiple);
            }
            using (SpeechRecognizer recognizer = new SpeechRecognizer())
            {

                // Create SemanticResultValue objects that contain cities and airport codes.
                SemanticResultValue chicago = new SemanticResultValue("Chicago", "ORD");
                SemanticResultValue boston = new SemanticResultValue("Boston", "BOS");
                SemanticResultValue miami = new SemanticResultValue("Miami", "MIA");
                SemanticResultValue dallas = new SemanticResultValue("Dallas", "DFW");

                // Create a Choices object and add the SemanticResultValue objects, using
                // implicit conversion from SemanticResultValue to GrammarBuilder
                Choices cities = new Choices();
                cities.Add(new Choices(new GrammarBuilder[] { chicago, boston, miami, dallas }));

                // Build the phrase and add SemanticResultKeys.
                GrammarBuilder chooseCities = new GrammarBuilder();
                chooseCities.Append("I want to fly from");
                chooseCities.Append(new SemanticResultKey("origin", cities));
                chooseCities.Append("to");
                chooseCities.Append(new SemanticResultKey("destination", cities));

                // Build a Grammar object from the GrammarBuilder.
                Grammar bookFlight = new Grammar(chooseCities);
                bookFlight.Name = "Book Flight";

                // Add a handler for the LoadGrammarCompleted event.
                recognizer.LoadGrammarCompleted +=
                  new EventHandler<LoadGrammarCompletedEventArgs>(recognizer_LoadGrammarCompleted);

                // Add a handler for the SpeechRecognized event.
                recognizer.SpeechRecognized +=
                  new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
                // Attach event handlers for recognition events.
                recognizer.SpeechRecognized +=
                  new EventHandler<SpeechRecognizedEventArgs>(
                    SpeechRecognizedHandler);
                recognizer.EmulateRecognizeCompleted +=
                  new EventHandler<EmulateRecognizeCompletedEventArgs>(
                    EmulateRecognizeCompletedHandler);
                // Load the grammar object to the recognizer.
                recognizer.LoadGrammarAsync(bookFlight);
            }
        }
Exemplo n.º 48
0
        public void computeSpectrum(short[] audioArray, float[] freqOut, SpeechAudioFormatInfo speechAudioFormatInfo)
        {
            System.Diagnostics.Debug.WriteLine("SpeechEmotionRecognitionEngine::computeSpectrum()");
            if (audioArray == null || freqOut == null)
            {
                System.Diagnostics.Debug.WriteLine("audioArray or freqOut is null");
                return;
            }

            // For Debugging.
            // printDebugFormatInfo(speechAudioFormatInfo);

            // Only allow 8 or 16 bit audio.
            if (speechAudioFormatInfo.BitsPerSample != 8 && speechAudioFormatInfo.BitsPerSample != 16)
            {
                System.Diagnostics.Debug.WriteLine("Invalid BitsPerSample");
                return;
            }

            int windowSize = audioArray.Length;
            int numWindows = audioArray.Length / (windowSize * speechAudioFormatInfo.ChannelCount);
            int height = windowSize / 2;
            int half = windowSize / 2;
            int maxSamples = half;

            float[] processed = new float[windowSize];
            for (int i = 0; i < windowSize; ++i)
                processed[i] = 0.0f;

            float[] fftOut = new float[windowSize];
            float[] inputAudio = new float[windowSize];
            int[] estimatedFundamentalFrequencies = new int[numWindows];

            for (int i = 0; i < windowSize; ++i)
                inputAudio[i] = (float)audioArray[i];

            windowFunction(WindowFunction.HANNING, windowSize, inputAudio);

            // Take FFT.
            fft(inputAudio, null, fftOut, null, 1, speechAudioFormatInfo);

            // Compute power.
            for (int i = 0; i < windowSize; ++i)
                inputAudio[i] = (float)(fftOut[i] * fftOut[i]);

            // Tolonen and Karjalainen recommend taking the cube root
            // of the power, instead of the square root

            for (int i = 0; i < windowSize; i++)
                inputAudio[i] = (float)(Math.Pow(inputAudio[i], 1.0f / 3.0f));

            // Take FFT.
            fft(inputAudio, null, fftOut, null, 1, speechAudioFormatInfo);

            for (int i = 0; i < half; i++)
                processed[i] += fftOut[i];

            // Peak Pruning as described by Tolonen and Karjalainen, 2000

            // Clip at zero, copy to temp array
            for (int i = 0; i < maxSamples; ++i)
            {
                if (processed[i] < 0.0)
                    processed[i] = (float)0.0;
                fftOut[i] = processed[i];
            }

            // Subtract a time-doubled signal (linearly interp.) from the original
            // (clipped) signal
            for (int i = 0; i < maxSamples; ++i)
            {
                if ((i % 2) == 0)
                    processed[i] -= fftOut[i / 2];
                else
                    processed[i] -= ((fftOut[i / 2] + fftOut[i / 2 + 1]) / 2);
            }

            // Clip at zero again
            for (int i = 0; i < maxSamples; ++i)
            {
                if (processed[i] < 0.0)
                    processed[i] = (float)0.0;
            }

            // Find new max
            float max = 0;
            for (int i = 1; i < maxSamples; i++)
                if (processed[i] > max)
                    max = processed[i];

            // Reverse and scale
            for (int i = 0; i < maxSamples; ++i)
                inputAudio[i] = processed[i] / (windowSize / 4);
            for (int i = 0; i < maxSamples; ++i)
                processed[maxSamples - 1 - i] = inputAudio[i];

            // Finally, put it into bins in grayscaleOut[], normalized to a 0.0-1.0 scale

            for (int i = 0; i < height; ++i)
            {
                float bin0 = (float)(i) * maxSamples / height;
                float bin1 = (float)(i + 1) * maxSamples / height;

                float binwidth = bin1 - bin0;

                float value = 0.0f;

                if ((int)bin1 == (int)bin0)
                    value = processed[(int)bin0];
                else
                {
                    value += processed[(int)bin0] * ((int)bin0 + 1 - bin0);
                    bin0 = 1 + (int)bin0;
                    while (bin0 < (int)bin1)
                    {
                        value += processed[(int)bin0];
                        bin0 += 1.0f;
                    }
                    value += processed[(int)bin1] * (bin1 - (int)bin1);

                    value /= binwidth;
                }

                // Should we be clipping at max 1.0?
                // I trial-and-errored for a while, and I don't think the clipping is necessary.
                // if (value > 1.0)
                //     value = 1.0f;
                if (value < 0.0)
                    value = 0.0f;

                freqOut[i] = value;
            }
        }
Exemplo n.º 49
0
        public double getMaximumFrequencyValue(float[] fftRealOutput, float[] fftComplexOutput, SpeechAudioFormatInfo speechAudioFormatInfo)
        {
            if (fftRealOutput == null ||
                fftComplexOutput == null)
                return -1;  // Error.

            int numSamples = fftRealOutput.Length;
            if (fftComplexOutput.Length != numSamples)
                return -1;   // Error.

            // Calculate fundamental frequency.
            int fundamentalFrequencySamples = 0;
            double maxValue = Math.Pow(fftRealOutput[fundamentalFrequencySamples], 2) + Math.Pow(fftComplexOutput[fundamentalFrequencySamples], 2);
            for (int i = 1; i < numSamples; ++i)
            {
                if (Math.Pow(fftRealOutput[i], 2) + Math.Pow(fftComplexOutput[i], 2) > maxValue)
                {
                    fundamentalFrequencySamples = i;
                    maxValue = Math.Pow(fftRealOutput[fundamentalFrequencySamples], 2) + Math.Pow(fftComplexOutput[fundamentalFrequencySamples], 2);
                }
            }
            // System.Diagnostics.Debug.WriteLine("maxFrequencyValue: " + maxValue);

            return maxValue;
        }
Exemplo n.º 50
0
        public double getFundamentalFrequency(float[] fftRealOutput, float[] fftComplexOutput, SpeechAudioFormatInfo speechAudioFormatInfo)
        {
            if (fftRealOutput == null ||
                fftComplexOutput == null ||
                speechAudioFormatInfo == null)
                return -1;  // Error.

            int numSamples = fftRealOutput.Length;
            if (fftComplexOutput.Length != numSamples)
                return -1;   // Error.

            // Calculate fundamental frequency.
            int fundamentalFrequencySamples = 0;
            double maxValue = Math.Pow(fftRealOutput[fundamentalFrequencySamples], 2) + Math.Pow(fftComplexOutput[fundamentalFrequencySamples], 2);
            for (int i = 1; i < numSamples; ++i)
            {
                if (Math.Pow(fftRealOutput[i], 2) + Math.Pow(fftComplexOutput[i], 2) > maxValue)
                {
                    fundamentalFrequencySamples = i;
                    maxValue = Math.Pow(fftRealOutput[fundamentalFrequencySamples], 2) + Math.Pow(fftComplexOutput[fundamentalFrequencySamples], 2);
                }
            }
            double fundamentalFrequency = fundamentalFrequencySamples * (speechAudioFormatInfo.SamplesPerSecond / ((double)numSamples / speechAudioFormatInfo.ChannelCount));
            // System.Diagnostics.Debug.WriteLine("fundamentalFrequency: " + fundamentalFrequency);

            return fundamentalFrequency;
        }
Exemplo n.º 51
0
        public double[] correlation(short[] inputAudio, SpeechAudioFormatInfo speechAudioFormatInfo)
        {
            int size = inputAudio.Length / 2;
            // Initialize the correlation function to 0.
            double[] correlationFunction = new double[size];
            for (int i = 0; i < size; ++i)
                correlationFunction[i] = 0;

            for (int shift = 0; shift < size; shift += speechAudioFormatInfo.ChannelCount)
            {
                for (int audioIndex = 0; audioIndex < size; audioIndex += speechAudioFormatInfo.ChannelCount)
                {
                    // Can overflow happen here when setting an int to the result of multiplying 2 bytes? No, no it can't. Yeah that's right...
                    /*
                    double difference = (double)(inputAudio[audioIndex] - inputAudio[audioIndex + shift * speechAudioFormatInfo.ChannelCount]);
                    correlationFunction[i] += (difference * difference);
                    */
                    correlationFunction[shift] += (double)inputAudio[audioIndex] * (double)inputAudio[audioIndex + shift];
                }
                correlationFunction[shift] /= size;
            }

            /* // For debugging.
            // Print the first window's correlation function, just to see what it looks like.
            String correlationFunctionString = "";
            for (int correlationIndex = 0; correlationIndex < size; ++correlationIndex)
                correlationFunctionString += correlationFunction[correlationIndex] + ".";
            System.Diagnostics.Debug.WriteLine("Correlation function: " + correlationFunctionString);
            */

            return correlationFunction;
        }
Exemplo n.º 52
0
        // Based off of Audacity source and numerical recipes.
        // Also, check out this useful website:
        // http://www.codeproject.com/KB/recipes/howtofft.aspx
        public void fft(float[] realIn, float[] imagIn,
                        float[] realOut, float[] imagOut, int sign,
                        SpeechAudioFormatInfo speechAudioFormatInfo)
        {
            int n, mmax, m, j, istep, i;
            double wtemp, wr, wpr, wpi, wi, theta, tempr, tempi;

            int numSamples = realIn.Length;
            int numBits = numBitsNeeded(numSamples);
            int numComplexSamples = numSamples * 2;

            if (!isPowerOfTwo(numSamples))
            {
                System.Diagnostics.Debug.WriteLine(numSamples + " is not a power of two");
                return;
            }

            if (imagOut == null)
                imagOut = new float[numSamples];

            if (sign > 0)
                sign = 1;
            else
                sign = -1;

            if (fastBitReversalTable == null)
                initFFT();

            // Do simultaneous data copy and bit-reversal ordering into interleaved intermediate output...
            float[] data = new float[numComplexSamples];
            for (i = 0; i < numSamples; i++)
            {
                j = reverseBits(i, numBits);
                data[2 * j] = (float)realIn[i];
                data[2 * j + 1] = (imagIn == null) ? 0.0f : imagIn[i];
            }

            // Do the FFT itself...
            // Danielson-Lanzcos routine
            mmax = 2;
            n = numComplexSamples;
            while (n > mmax)
            {
                istep = mmax << 1;
                theta = sign * (2 * Math.PI / mmax);
                wtemp = Math.Sin(0.5 * theta);
                wpr = -2.0 * wtemp * wtemp;
                wpi = Math.Sin(theta);
                wr = 1.0;
                wi = 0.0;

                for (m = 1; m < mmax; m += 2)
                {
                    for (i = m; i <= n; i += istep)
                    {
                        j = i + mmax;
                        tempr = wr * data[j - 1] - wi * data[j];
                        tempi = wr * data[j] + wi * data[j - 1];
                        data[j - 1] = data[i - 1] - (float)tempr;
                        data[j] = data[i] - (float)tempi;
                        data[i - 1] += (float)tempr;
                        data[i] += (float)tempi;
                    }
                    wtemp = wr;
                    wr = wtemp * wpr - wi * wpi + wr;
                    wi = wi * wpr + wtemp * wpi + wi;
                }
                mmax = istep;
            }

            // De-interleave the real/complex data into the outputs.
            for (i = 0; i < numSamples; ++i)
            {
                realOut[i] = data[2 * i];
                imagOut[i] = data[2 * i + 1];
            }

            // Happy =)
        }
Exemplo n.º 53
0
        private void saveButton_Click(object sender, EventArgs e)
        {
            string fileName = textBox1.Text.Trim();

            if (fileName.Length != 0)
            {
                if (fileName.EndsWith(".wav"))
                {
                    fileName = fileName.Substring(0, fileName.IndexOf(".wav"));
                }
                else
                {
                    textBox1.Text = fileName + ".wav";
                }

                if (fileName.Length > 8)
                {
                    string oldName = fileName;
                    fileName = fileName.Substring(0, 8);
                    fileName += ".wav";
                    string warning = "The filename: " + oldName + " is too long.\nMax length for Taranis is 12 characters including file ending.\nFile will be renamed to " + fileName;
                    MessageBox.Show(this, warning);
                }

                if (folderName == null)
                {
                    folderName = Environment.CurrentDirectory;
                }

                string text = textBox2.Text;
                speaker.SelectVoice(voices[comboBox1.SelectedIndex]);
                var speechAudioFormatInfo = new SpeechAudioFormatInfo(EncodingFormat.ULaw, 32000, 8, 1, 16000, 2, null);
                speaker.SetOutputToWaveFile(folderName + "\\" + fileName, speechAudioFormatInfo);
                speaker.Speak(text);
            }
        }
Exemplo n.º 54
0
 /// <summary>
 /// Sets where synthesized speech is rendered to.  This sets the output to audio stream.
 /// </summary>
 /// <param name="audioDestination">The audio destination.</param>
 /// <param name="formatInfo">The format info for rendering the stream.</param>
 public virtual void SetOutputToAudioStream(Stream audioDestination, SpeechAudioFormatInfo formatInfo)
 {
     throw new NotImplementedException();
 }
Exemplo n.º 55
0
        private void StartSpeech(AssignedVoice vb, string outputfile)
        {
            WinAvailableVoice wv = (WinAvailableVoice)vb.root;

            // Find the best audio format to use for this voice.
            System.Collections.ObjectModel.ReadOnlyCollection<SpeechAudioFormatInfo> formats =
                wv.winVoice.VoiceInfo.SupportedAudioFormats;

            format = formats.FirstOrDefault();

            if (format == null)
            {
                // The voice did not tell us its parameters, so we pick some.
                format = new SpeechAudioFormatInfo(
                    16000,      // Samples per second
                    AudioBitsPerSample.Sixteen,
                    AudioChannel.Mono);
            }

            // First set up to synthesize the message into a WAV file.
            mstream = new FileStream(outputfile, FileMode.Create, FileAccess.Write);

            syn.SetOutputToWaveStream(mstream);

            pb = new PromptBuilder();
            mainStyle = new PromptStyle();
            //            mainStyle.Volume = promptVol;
            syn.SelectVoice(wv.winVoice.VoiceInfo.Name);
            pb.StartStyle(mainStyle);
        }
Exemplo n.º 56
0
    // ==========================================
    //  HANDLE SPEECH RECOGNITION
    // ==========================================

    public override void InitSpeechEngine() {
      
      base.InitSpeechEngine(false);
      
      try {

        WSRConfig cfg = WSRConfig.GetInstance();
        WSRSpeechManager manager = WSRSpeechManager.GetInstance();
        SpeechAudioFormatInfo format = new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null);

        for( int i = 0 ; i < Sensors.Count ; i++) {
          KinectAudioSource source = Sensors[i].Sensor.AudioSource;
          source.EchoCancellationMode = EchoCancellationMode.CancellationAndSuppression;
          source.NoiseSuppression = true;
          source.BeamAngleMode = BeamAngleMode.Adaptive; //set the beam to adapt to the surrounding
          source.AutomaticGainControlEnabled = false;
          if (WSRConfig.GetInstance().Echo >= 0){
            source.EchoCancellationSpeakerIndex = WSRConfig.GetInstance().Echo;
          }

          String prefix = "KINECT_" + i;
          cfg.logInfo(prefix, "AutomaticGainControlEnabled : "  + source.AutomaticGainControlEnabled);
          cfg.logInfo(prefix, "BeamAngle : "                    + source.BeamAngle);
          cfg.logInfo(prefix, "EchoCancellationMode : "         + source.EchoCancellationMode);
          cfg.logInfo(prefix, "EchoCancellationSpeakerIndex : " + source.EchoCancellationSpeakerIndex);
          cfg.logInfo(prefix, "NoiseSuppression : "             + source.NoiseSuppression);
          cfg.logInfo(prefix, "SoundSourceAngle : "             + source.SoundSourceAngle);
          cfg.logInfo(prefix, "SoundSourceAngleConfidence : "   + source.SoundSourceAngleConfidence);

          var stream = source.Start();
          // streamer = new SpeechStreamer(stream); // FIXME
          manager.AddEngine(prefix, cfg.language, cfg.confidence, stream, format);
        }
      }
      catch (Exception ex) {
        WSRConfig.GetInstance().logError("ENGINE", "Init Kinect Engines: " + ex.Message);
      }
    }
Exemplo n.º 57
0
        protected override void OnLoad(EventArgs e)
        {
            Visible = false;
            ShowInTaskbar = false;
            base.OnLoad(e);
 
            /*
             *  Get all installed voices
             * 
             */
            var voices = speech.GetInstalledVoices();
            string voice = "";
 
            foreach (InstalledVoice v in voices)
            {
                if (v.Enabled)
                    //voice = v.VoiceInfo.Name;
                    Console.WriteLine(v.VoiceInfo.Name);
                    
            }

            queuetimer = new System.Timers.Timer(250);
            queuetimer.Elapsed += (object sender, ElapsedEventArgs ev) => 
            {
                TTSRequest r;
                if (Queue.TryDequeue(out r))
                {
                    Console.WriteLine("dequeing off of concurrent queue...");
                    if (r.Interrupt)
                    {
                        // stop current TTS
                            if (IsSpeaking)
                            {
                                //speech.StopSpeaking();
                            }
                            if (IsSounding)
                            {
                                //sound.Stop();
                                if(sound.PlaybackState == PlaybackState.Playing) {
                                    sound.Stop(); 
                                }
                            }
                        // clear queue
                        SpeechQueue.Clear();
                    }
                    if(!r.Reset) {
                        SpeechQueue.Enqueue(r);
                    }
                    RequestCount++;
                }
                
                var eventdata = new Hashtable();
                eventdata.Add("ProcessedRequests", RequestCount);
                eventdata.Add("QueuedRequests", SpeechQueue.Count);
                eventdata.Add("IsSpeaking", IsSounding);
                InstrumentationEvent blam = new InstrumentationEvent();
                blam.EventName = "status";
                blam.Data = eventdata;
                NotifyGui(blam.EventMessage());  
            };

            // when this timer fires, it will pull off of the speech queue and speak it
            // the long delay also adds a little pause between tts requests.
            speechtimer = new System.Timers.Timer(250);
            speechtimer.Elapsed += (object sender, ElapsedEventArgs ev) =>
            {
                if (IsSpeaking.Equals(false))
                {
                    if (SpeechQueue.Count > 0)
                    {
                        TTSRequest r = SpeechQueue.Dequeue();
                        Console.WriteLine("dequeuing off of speech queue");
                        IsSpeaking = true;
                        speechtimer.Enabled = false;

                        //speech.SpeakAsync(r.Text);

                        //using (speech = new SpeechSynthesizer()) {
                        speech = new SpeechSynthesizer();
                            speech.SpeakCompleted += speech_SpeakCompleted;
                            format = new SpeechAudioFormatInfo(EncodingFormat.ALaw, 8000, 8, 1, 1, 2, null);
                            //format = new SpeechAudioFormatInfo(11025, AudioBitsPerSample.Sixteen, AudioChannel.Mono);
                           // var si = speech.GetType().GetMethod("SetOutputStream", BindingFlags.Instance | BindingFlags.NonPublic);
                            stream = new MemoryStream();
                            //si.Invoke(speech, new object[] { stream, format, true, true });
                            //speech.SetOutputToWaveStream(stream);
                            speech.SetOutputToAudioStream(stream, format);
                            speech.SelectVoice(config.getVoice (r.Language, r.Voice));
                            int rate = (r.Speed * 2 - 10);
                            
                            Console.WriteLine(rate);
                            try
                            {
                                speech.Rate = rate;
                            }
                            catch (ArgumentOutOfRangeException ex)
                            {
                                speech.Rate = 0;
                            }
                            speech.SpeakAsync(r.Text);
                        //}

                        synthesis.WaitOne();
                        speech.SpeakCompleted -= speech_SpeakCompleted;
                        speech.SetOutputToNull();
                        speech.Dispose();
                        //IsSpeaking = false;
                        IsSounding = true;
                        stream.Position = 0;
                        //WaveFormat.CreateCustomFormat(WaveFormatEncoding.WmaVoice9, 11025, 1, 16000, 2, 16)
                        using(RawSourceWaveStream reader = new RawSourceWaveStream(stream, WaveFormat.CreateALawFormat(8000, 1))) {
                            WaveStream ws = WaveFormatConversionStream.CreatePcmStream(reader);

                            //var waveProvider = new MultiplexingWaveProvider(new IWaveProvider[] { ws }, 4);
                            //waveProvider.ConnectInputToOutput(0, 3);

                            sound = new WaveOutEvent();
                            // set output device *before* init
                            Console.WriteLine("Output Device: " + OutputDeviceId);
                            sound.DeviceNumber = OutputDeviceId;
                            sound.Init(ws);
                            //sound.Init(waveProvider);
                            sound.PlaybackStopped += output_PlaybackStopped;
                           // Console.WriteLine("playing here " + ws.Length);
                            sound.Play();
                        }
                        playback.WaitOne();
                        //IsSounding = false;
                        speechtimer.Enabled = true;
                    }
                }
            };

            queuetimer.Enabled = true;
            queuetimer.Start();
            speechtimer.Enabled = true;
            speechtimer.Start();

            InitHTTPServer();

        }
Exemplo n.º 58
0
        // Estimate the fundamental frequency using the correlation function.
        // Look for the first change in sign -- from negative to positive -- in the differentiated correlationFunction to approximate the fundamental frequency.
        public int estimateF0(double[] corr, SpeechAudioFormatInfo speechAudioFormatInfo)
        {
            int fundamentalPeriodSamples = 0;
            int jitter = 0;
            bool wasNegative = false;

            for (int i = 0; i < corr.Length - 1; ++i)
            {
                if (wasNegative)
                {
                    if (corr[i + 1] - corr[i] >= 0)
                    {
                        if (jitter > 3)
                        {
                            i -= jitter;
                            fundamentalPeriodSamples = i;
                            break;
                        }
                        jitter++;
                    }
                }
                else if (corr[i + 1] - corr[i] <= 0)
                {
                    if (jitter > 3)
                    {
                        wasNegative = true;
                        i -= jitter;
                        jitter = 0;
                        continue;
                    }
                    jitter++;
                }
            }

            int estimatedF0 = 0;
            if (fundamentalPeriodSamples > 0)
                estimatedF0 = (int)(1.0 * speechAudioFormatInfo.SamplesPerSecond / fundamentalPeriodSamples);

            // For debugging.
            System.Diagnostics.Debug.WriteLine("Estimated Fundamental Frequency: " + estimatedF0);

            return estimatedF0;
        }
Exemplo n.º 59
0
        public void TextToSpeech(string text)
        {
            this.Log.Debug("Creating wav file of: " + text);
            SpeechAudioFormatInfo synthFormat = new SpeechAudioFormatInfo(44100, AudioBitsPerSample.Sixteen, AudioChannel.Stereo);
            SpeechSynthesizer speechEngine = new SpeechSynthesizer();

            this.Log.Debug("setting output: " + ttsSave);
            speechEngine.SetOutputToWaveFile(ttsSave, synthFormat);
            this.Log.Debug("speaking");
            speechEngine.Speak(text);
            speechEngine.Dispose();
        }
Exemplo n.º 60
0
        private void StartKinect( KinectSensor k )
        {
            kinect = k;

            // ストリームの有効化
            kinect.ColorStream.Enable( rgbFormat );
            kinect.DepthStream.Enable( depthFormat );

            // RGBカメラ用バッファの初期化
            pixelBuffer = new byte[kinect.ColorStream.FramePixelDataLength];
            bmpBuffer = new RenderTargetBitmap( kinect.ColorStream.FrameWidth,
                kinect.ColorStream.FrameHeight, 96, 96, PixelFormats.Default );

            rgbImage.Source = bmpBuffer;

            // 距離カメラ用バッファの初期化
            depthBuffer = new short[kinect.DepthStream.FramePixelDataLength];
            depthColorPoint = new ColorImagePoint[kinect.DepthStream.FramePixelDataLength];
            depthMaskBuffer = new byte[kinect.ColorStream.FramePixelDataLength];

            // 骨格ストリームの有効化
            kinect.SkeletonStream.Enable();

            // 骨格ストリーム用のバッファの初期化
            skeletonBuffer = new Skeleton[kinect.SkeletonStream.FrameSkeletonArrayLength];
            playerGesture = new PlayerGesture[kinect.SkeletonStream.FrameSkeletonArrayLength];
            for ( int i = 0; i < playerGesture.Length; i++ ) {
                playerGesture[i] = new PlayerGesture();
            }

            // RGB,Depth,Skeletonのイベントを受け取るイベントハンドラの登録
            kinect.AllFramesReady +=
                    new EventHandler<AllFramesReadyEventArgs>( kinect_AllFramesReady );

            // Kinectセンサーからのストリーム取得を開始
            // KinectSensorChooserでやってくれる
            //kinect.Start();

            // 音声認識関連の設定
            kinect.AudioSource.SoundSourceAngleChanged += AudioSource_SoundSourceAngleChanged;
            var stream = kinect.AudioSource.Start();

            speechEngine = InitSpeechEngine();
            speechEngine.SpeechRecognized += speechEngine_SpeechRecognized;
            var format = new SpeechAudioFormatInfo( EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null );
            speechEngine.SetInputToAudioStream( stream, format );
            speechEngine.RecognizeAsync( RecognizeMode.Multiple );
        }