Ejemplo n.º 1
0
        // This keeps the WebSocket connected when we are not sending any data.
        private IEnumerator KeepAlive()
        {
            while (_listenSocket != null)
            {
                yield return(null);

                if ((DateTime.Now - _lastKeepAlive).TotalSeconds > WsKeepAliveInterval)
                {
                    //  Temporary clip to use for KeepAlive
                    //  TODO: Generate small sound clip to send to the service to keep alive.
                    //AudioClip _keepAliveClip = Resources.Load<AudioClip>("highHat");

#if ENABLE_DEBUGGING
                    Log.Debug("SpeechToText.KeepAlive()", "Sending keep alive.");
#endif
                    //_listenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(_keepAliveClip)));
                    //_keepAliveClip = null;

                    Debug.Log("Sending " + _prefixClips.Count + " prefix clips for keep alive");
                    foreach (AudioData prefixClip in _prefixClips)
                    {
                        _listenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(prefixClip.Clip)));
                    }
                    _prefixClips.Clear();

                    _lastKeepAlive = DateTime.Now;
                }
            }
            Log.Debug("SpeechToText.KeepAlive()", "KeepAlive exited.");
        }
Ejemplo n.º 2
0
        /// <summary>
        /// This function should be invoked with the AudioData input after StartListening() method has been invoked.
        /// The user should continue to invoke this function until they are ready to call StopListening(), typically
        /// microphone input is sent to this function.
        /// </summary>
        /// <param name="clip">A AudioData object containing the AudioClip and max level found in the clip.</param>
        public void OnListen(AudioData clip)
        {
            if (m_IsListening)
            {
                if (m_RecordingHZ < 0)
                {
                    m_RecordingHZ = clip.Clip.frequency;
                    SendStart();
                }

                if (!DetectSilence || clip.MaxLevel >= m_SilenceThreshold)
                {
                    if (m_ListenActive)
                    {
                        m_ListenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(clip.Clip)));
                        m_AudioSent = true;
                    }
                    else
                    {
                        // we have not received the "listening" state yet from the server, so just queue
                        // the audio clips until that happens.
                        m_ListenRecordings.Enqueue(clip);

                        // check the length of this queue and do something if it gets too full.
                        if (m_ListenRecordings.Count > MAX_QUEUED_RECORDINGS)
                        {
                            Log.Error("SpeechToText", "Recording queue is full.");

                            StopListening();
                            if (OnError != null)
                            {
                                OnError("Recording queue is full.");
                            }
                        }
                    }
                }
                else if (m_AudioSent)
                {
                    SendStop();
                    m_AudioSent = false;
                }

                // After sending start, we should get into the listening state within the amount of time specified
                // by LISTEN_TIMEOUT. If not, then stop listening and record the error.
                if (!m_ListenActive && (DateTime.Now - m_LastStartSent).TotalSeconds > LISTEN_TIMEOUT)
                {
                    Log.Error("SpeechToText", "Failed to enter listening state.");

                    StopListening();
                    if (OnError != null)
                    {
                        OnError("Failed to enter listening state.");
                    }
                }
            }
        }
Ejemplo n.º 3
0
        private void OnListenMessage(WSConnector.Message msg)
        {
            if (msg is WSConnector.TextMessage)
            {
                WSConnector.TextMessage tm = (WSConnector.TextMessage)msg;

                IDictionary json = Json.Deserialize(tm.Text) as IDictionary;
                if (json != null)
                {
                    if (json.Contains("results"))
                    {
                        SpeechResultList results = ParseRecognizeResponse(json);
                        if (results != null)
                        {
                            if (newSpeech == true)
                            {
                                newSpeech = false;
                                UnityEngine.Debug.Log("--new: " + new SpeechToTextData(results).Text);
                                Cloudspace.NotificationCenter.DefaultCenter().PostNotification(null, "OnListeningToUser",
                                                                                               new SpeechToTextData(results).Text);
                            }
                            if (finalResults == null)
                            {
                                finalResults = results;
                            }
                            else
                            {
                                SpeechResult[] aggregated = new SpeechResult[finalResults.Results.Length + results.Results.Length];
                                for (int i = 0; i < finalResults.Results.Length; i++)
                                {
                                    aggregated [i] = finalResults.Results [i];
                                }
                                for (int i = finalResults.Results.Length; i < finalResults.Results.Length + results.Results.Length; i++)
                                {
                                    aggregated [i] = results.Results [i - finalResults.Results.Length];
                                }
                                finalResults.Results = aggregated;
                            }
//							UnityEngine.Debug.Log ("--agg: "+new SpeechToTextData (finalResults).AllText);

                            // when we get results, start listening for the next block ..
                            // if continuous is true, then we don't need to do this..
                            if (!EnableContinousRecognition && results.HasFinalResult())
                            {
                                SendStart();
                            }

//                            if (m_ListenCallback == null) {
                            StopListening();
//							}
                        }
                        else
                        {
                            Log.Error("SpeechToText", "Failed to parse results: {0}", tm.Text);
                        }
                    }
                    else if (json.Contains("state"))
                    {
                        string state = (string)json["state"];
#if ENABLE_DEBUGGING
                        Log.Debug("SpeechToText", "Server state is {0}", state);
#endif
                        if (state == "listening")
                        {
                            if (m_IsListening)
                            {
                                if (!m_ListenActive)
                                {
                                    m_ListenActive = true;

                                    // send all pending audio clips ..
                                    while (m_ListenRecordings.Count > 0)
                                    {
                                        AudioData clip = m_ListenRecordings.Dequeue();
                                        m_ListenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(clip.Clip)));
                                        m_AudioSent = true;
                                    }
                                }
                            }
                        }
                    }
                    else if (json.Contains("error"))
                    {
                        string error = (string)json["error"];
                        Log.Error("SpeechToText", "Error: {0}", error);

                        StopListening();
                        if (OnError != null)
                        {
                            OnError(error);
                        }
                    }
                    else
                    {
                        Log.Warning("SpeechToText", "Unknown message: {0}", tm.Text);
                    }
                }
                else
                {
                    Log.Error("SpeechToText", "Failed to parse JSON from server: {0}", tm.Text);
                }
            }
            if (silenceCounter > m_PausesLimit)
            {
                silenceCounter = 0;

                if (finalResults != null)
                {
                    newSpeech = true;
                    m_ListenCallback(finalResults);
                    finalResults = null;
                }
            }
        }
Ejemplo n.º 4
0
        private void OnListenMessage(WSConnector.Message msg)
        {
            if (msg is WSConnector.TextMessage)
            {
                WSConnector.TextMessage tm = (WSConnector.TextMessage)msg;

                IDictionary json = Json.Deserialize(tm.Text) as IDictionary;
                if (json != null)
                {
                    if (json.Contains("results"))
                    {
                        SpeechResultList results = ParseRecognizeResponse(json);
                        if (results != null)
                        {
                            // when we get results, start listening for the next block ..
                            // if continuous is true, then we don't need to do this..
                            if (!EnableContinousRecognition && results.HasFinalResult())
                            {
                                SendStart();
                            }

                            if (m_ListenCallback != null)
                            {
                                m_ListenCallback(results);
                            }
                            else
                            {
                                StopListening();            // automatically stop listening if our callback is destroyed.
                            }
                        }
                        else
                        {
                            Log.Error("SpeechToText", "Failed to parse results: {0}", tm.Text);
                        }
                    }
                    else if (json.Contains("state"))
                    {
                        string state = (string)json["state"];

#if ENABLE_DEBUGGING
                        Log.Debug("SpeechToText", "Server state is {0}", state);
#endif
                        if (state == "listening")
                        {
                            if (m_IsListening)
                            {
                                if (!m_ListenActive)
                                {
                                    m_ListenActive = true;

                                    // send all pending audio clips ..
                                    while (m_ListenRecordings.Count > 0)
                                    {
                                        AudioData clip = m_ListenRecordings.Dequeue();
                                        m_ListenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(clip.Clip)));
                                        m_AudioSent = true;
                                    }
                                }
                            }
                        }
                    }
                    else if (json.Contains("error"))
                    {
                        string error = (string)json["error"];
                        Log.Error("SpeechToText", "Error: {0}", error);

                        StopListening();
                        if (OnError != null)
                        {
                            OnError(error);
                        }
                    }
                    else
                    {
                        Log.Warning("SpeechToText", "Unknown message: {0}", tm.Text);
                    }
                }
                else
                {
                    Log.Error("SpeechToText", "Failed to parse JSON from server: {0}", tm.Text);
                }
            }
        }
Ejemplo n.º 5
0
        private void OnListenMessage(WSConnector.Message msg)
        {
            if (msg is WSConnector.TextMessage)
            {
                WSConnector.TextMessage tm = (WSConnector.TextMessage)msg;

                IDictionary json = Json.Deserialize(tm.Text) as IDictionary;
                if (json != null)
                {
                    if (json.Contains("results"))
                    {
                        SpeechRecognitionEvent results = ParseRecognizeResponse(json);
                        if (results != null)
                        {
                            //// when we get results, start listening for the next block ..
                            //if (results.HasFinalResult())
                            //Log.Debug("SpeechToText.OnListenMessage()", "final json response: {0}", tm.Text);
                            //    SendStart();

                            if (_listenCallback != null)
                            {
                                _listenCallback(results);
                            }
                            else
                            {
                                StopListening();            // automatically stop listening if our callback is destroyed.
                            }
                        }
                        else
                        {
                            Log.Error("SpeechToText.OnListenMessage()", "Failed to parse results: {0}", tm.Text);
                        }
                    }
                    else if (json.Contains("state"))
                    {
                        string state = (string)json["state"];

#if ENABLE_DEBUGGING
                        Log.Debug("SpeechToText.OnListenMessage()", "Server state is {0}", state);
#endif
                        if (state == "listening")
                        {
                            if (_isListening)
                            {
                                if (!_listenActive)
                                {
                                    _listenActive = true;

                                    //Debug.Log("Listening, sending " + _listenRecordings.Count + " queued clips");

                                    bool hasAudio = _listenRecordings.Count > 0;

                                    // send all pending audio clips ..
                                    while (_listenRecordings.Count > 0)
                                    {
                                        AudioData clip = _listenRecordings.Dequeue();
                                        _listenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(clip.Clip)));
                                        _audioSent = true;
                                    }

                                    // We may have received a stop command while waiting for the listening state.
                                    if (_sendStopAfterListening && hasAudio)
                                    {
                                        SendStop();
                                    }
                                }
                            }
                        }
                    }
                    else if (json.Contains("speaker_labels"))
                    {
                        SpeakerRecognitionEvent speakerRecognitionEvent = ParseSpeakerRecognitionResponse(json);
                        if (speakerRecognitionEvent != null)
                        {
                            _speakerLabelCallback(speakerRecognitionEvent);
                        }
                    }
                    else if (json.Contains("error"))
                    {
                        string error = (string)json["error"];
                        Log.Error("SpeechToText.OnListenMessage()", "Error: {0}", error);

                        StopListening();
                        if (OnError != null)
                        {
                            OnError(error);
                        }
                    }
                    else
                    {
                        Log.Warning("SpeechToText.OnListenMessage()", "Unknown message: {0}", tm.Text);
                    }
                }
                else
                {
                    Log.Error("SpeechToText.OnListenMessage()", "Failed to parse JSON from server: {0}", tm.Text);
                }
            }
        }
Ejemplo n.º 6
0
        /// <summary>
        /// This function should be invoked with the AudioData input after StartListening() method has been invoked.
        /// The user should continue to invoke this function until they are ready to call StopListening(), typically
        /// microphone input is sent to this function.
        /// </summary>
        /// <param name="clip">A AudioData object containing the AudioClip and max level found in the clip.</param>
        /// <returns>True if audio was sent or enqueued, false if audio was discarded.</returns>
        public bool OnListen(AudioData clip)
        {
            bool audioSentOrEnqueued = false;

            _timeOffset = 0;

            if (_isListening)
            {
                if (_recordingHZ < 0)
                {
                    _recordingHZ = clip.Clip.frequency;
                    SendStart();
                }

                // If silence persists for _silenceCutoff seconds, send stop and discard clips until audio resumes
                if (DetectSilence && clip.MaxLevel < _silenceThreshold)
                {
                    _silenceDuration += clip.Clip.length;
                }
                else
                {
                    _silenceDuration = 0.0f;
                }

                if (!DetectSilence || _silenceDuration < _silenceCutoff)
                {
                    if (_stopSent)
                    {
                        // Send some clips of ambient sound leading up to the audio, to improve first word recognition
                        if (_listenActive)
                        {
                            //Debug.Log("Sending " + _prefixClips.Count + " prefix clips");
                            foreach (AudioData prefixClip in _prefixClips)
                            {
                                _listenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(prefixClip.Clip)));
                                _timeOffset -= prefixClip.Clip.length;
                            }
                        }
                        else
                        {
                            //Debug.Log("Queuing " + _prefixClips.Count + " prefix clips");
                            foreach (AudioData prefixClip in _prefixClips)
                            {
                                _listenRecordings.Enqueue(clip);
                                _timeOffset -= prefixClip.Clip.length;
                            }
                        }
                        _prefixClips.Clear();
                        _stopSent = false;
                    }

                    if (_listenActive)
                    {
                        CurrentStatus = LexiconSpeechStatus.Sending;
                        _listenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(clip.Clip)));
                        _audioSent          = true;
                        audioSentOrEnqueued = true;
                    }
                    else
                    {
                        CurrentStatus = LexiconSpeechStatus.WaitingForServer;
                        // we have not received the "listening" state yet from the server, so just queue
                        // the audio clips until that happens.
                        _listenRecordings.Enqueue(clip);
                        audioSentOrEnqueued = true;

                        // check the length of this queue and do something if it gets too full.
                        if (_listenRecordings.Count > MaxQueuedRecordings)
                        {
                            Log.Error("SpeechToText.OnListen()", "Recording queue is full.");

                            StopListening();
                            if (OnError != null)
                            {
                                OnError("Recording queue is full.");
                            }
                        }
                    }
                }
                else if (_audioSent)
                {
                    CurrentStatus = LexiconSpeechStatus.Silence;
                    //Debug.Log("Send stop");
                    SendStop();
                    _audioSent = false;
                    _stopSent  = true;
                    _prefixClips.Clear();
                }
                else
                {
                    // Buffer some of the ambient audio for when the user starts speaking again
                    _prefixClips.Add(clip);

                    if (_prefixClips.Count > _prefixClipCount)
                    {
                        _prefixClips.RemoveAt(0);
                    }
                }

                // After sending start, we should get into the listening state within the amount of time specified
                // by LISTEN_TIMEOUT. If not, then stop listening and record the error.
                if (!_listenActive && (DateTime.Now - _lastStartSent).TotalSeconds > ListenTimeout)
                {
                    Log.Error("SpeechToText.OnListen()", "Failed to enter listening state.");

                    StopListening();
                    if (OnError != null)
                    {
                        OnError("Failed to enter listening state.");
                    }
                }
            }

            return(audioSentOrEnqueued);
        }
Ejemplo n.º 7
0
 public override byte[] ToBinary()
 {
     return(AudioClipUtil.GetL16(m_Audio.Clip));
 }