// This keeps the WebSocket connected when we are not sending any data. private IEnumerator KeepAlive() { while (_listenSocket != null) { yield return(null); if ((DateTime.Now - _lastKeepAlive).TotalSeconds > WsKeepAliveInterval) { // Temporary clip to use for KeepAlive // TODO: Generate small sound clip to send to the service to keep alive. //AudioClip _keepAliveClip = Resources.Load<AudioClip>("highHat"); #if ENABLE_DEBUGGING Log.Debug("SpeechToText.KeepAlive()", "Sending keep alive."); #endif //_listenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(_keepAliveClip))); //_keepAliveClip = null; Debug.Log("Sending " + _prefixClips.Count + " prefix clips for keep alive"); foreach (AudioData prefixClip in _prefixClips) { _listenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(prefixClip.Clip))); } _prefixClips.Clear(); _lastKeepAlive = DateTime.Now; } } Log.Debug("SpeechToText.KeepAlive()", "KeepAlive exited."); }
/// <summary> /// This function should be invoked with the AudioData input after StartListening() method has been invoked. /// The user should continue to invoke this function until they are ready to call StopListening(), typically /// microphone input is sent to this function. /// </summary> /// <param name="clip">A AudioData object containing the AudioClip and max level found in the clip.</param> public void OnListen(AudioData clip) { if (m_IsListening) { if (m_RecordingHZ < 0) { m_RecordingHZ = clip.Clip.frequency; SendStart(); } if (!DetectSilence || clip.MaxLevel >= m_SilenceThreshold) { if (m_ListenActive) { m_ListenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(clip.Clip))); m_AudioSent = true; } else { // we have not received the "listening" state yet from the server, so just queue // the audio clips until that happens. m_ListenRecordings.Enqueue(clip); // check the length of this queue and do something if it gets too full. if (m_ListenRecordings.Count > MAX_QUEUED_RECORDINGS) { Log.Error("SpeechToText", "Recording queue is full."); StopListening(); if (OnError != null) { OnError("Recording queue is full."); } } } } else if (m_AudioSent) { SendStop(); m_AudioSent = false; } // After sending start, we should get into the listening state within the amount of time specified // by LISTEN_TIMEOUT. If not, then stop listening and record the error. if (!m_ListenActive && (DateTime.Now - m_LastStartSent).TotalSeconds > LISTEN_TIMEOUT) { Log.Error("SpeechToText", "Failed to enter listening state."); StopListening(); if (OnError != null) { OnError("Failed to enter listening state."); } } } }
private void OnListenMessage(WSConnector.Message msg) { if (msg is WSConnector.TextMessage) { WSConnector.TextMessage tm = (WSConnector.TextMessage)msg; IDictionary json = Json.Deserialize(tm.Text) as IDictionary; if (json != null) { if (json.Contains("results")) { SpeechResultList results = ParseRecognizeResponse(json); if (results != null) { if (newSpeech == true) { newSpeech = false; UnityEngine.Debug.Log("--new: " + new SpeechToTextData(results).Text); Cloudspace.NotificationCenter.DefaultCenter().PostNotification(null, "OnListeningToUser", new SpeechToTextData(results).Text); } if (finalResults == null) { finalResults = results; } else { SpeechResult[] aggregated = new SpeechResult[finalResults.Results.Length + results.Results.Length]; for (int i = 0; i < finalResults.Results.Length; i++) { aggregated [i] = finalResults.Results [i]; } for (int i = finalResults.Results.Length; i < finalResults.Results.Length + results.Results.Length; i++) { aggregated [i] = results.Results [i - finalResults.Results.Length]; } finalResults.Results = aggregated; } // UnityEngine.Debug.Log ("--agg: "+new SpeechToTextData (finalResults).AllText); // when we get results, start listening for the next block .. // if continuous is true, then we don't need to do this.. if (!EnableContinousRecognition && results.HasFinalResult()) { SendStart(); } // if (m_ListenCallback == null) { StopListening(); // } } else { Log.Error("SpeechToText", "Failed to parse results: {0}", tm.Text); } } else if (json.Contains("state")) { string state = (string)json["state"]; #if ENABLE_DEBUGGING Log.Debug("SpeechToText", "Server state is {0}", state); #endif if (state == "listening") { if (m_IsListening) { if (!m_ListenActive) { m_ListenActive = true; // send all pending audio clips .. while (m_ListenRecordings.Count > 0) { AudioData clip = m_ListenRecordings.Dequeue(); m_ListenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(clip.Clip))); m_AudioSent = true; } } } } } else if (json.Contains("error")) { string error = (string)json["error"]; Log.Error("SpeechToText", "Error: {0}", error); StopListening(); if (OnError != null) { OnError(error); } } else { Log.Warning("SpeechToText", "Unknown message: {0}", tm.Text); } } else { Log.Error("SpeechToText", "Failed to parse JSON from server: {0}", tm.Text); } } if (silenceCounter > m_PausesLimit) { silenceCounter = 0; if (finalResults != null) { newSpeech = true; m_ListenCallback(finalResults); finalResults = null; } } }
private void OnListenMessage(WSConnector.Message msg) { if (msg is WSConnector.TextMessage) { WSConnector.TextMessage tm = (WSConnector.TextMessage)msg; IDictionary json = Json.Deserialize(tm.Text) as IDictionary; if (json != null) { if (json.Contains("results")) { SpeechResultList results = ParseRecognizeResponse(json); if (results != null) { // when we get results, start listening for the next block .. // if continuous is true, then we don't need to do this.. if (!EnableContinousRecognition && results.HasFinalResult()) { SendStart(); } if (m_ListenCallback != null) { m_ListenCallback(results); } else { StopListening(); // automatically stop listening if our callback is destroyed. } } else { Log.Error("SpeechToText", "Failed to parse results: {0}", tm.Text); } } else if (json.Contains("state")) { string state = (string)json["state"]; #if ENABLE_DEBUGGING Log.Debug("SpeechToText", "Server state is {0}", state); #endif if (state == "listening") { if (m_IsListening) { if (!m_ListenActive) { m_ListenActive = true; // send all pending audio clips .. while (m_ListenRecordings.Count > 0) { AudioData clip = m_ListenRecordings.Dequeue(); m_ListenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(clip.Clip))); m_AudioSent = true; } } } } } else if (json.Contains("error")) { string error = (string)json["error"]; Log.Error("SpeechToText", "Error: {0}", error); StopListening(); if (OnError != null) { OnError(error); } } else { Log.Warning("SpeechToText", "Unknown message: {0}", tm.Text); } } else { Log.Error("SpeechToText", "Failed to parse JSON from server: {0}", tm.Text); } } }
private void OnListenMessage(WSConnector.Message msg) { if (msg is WSConnector.TextMessage) { WSConnector.TextMessage tm = (WSConnector.TextMessage)msg; IDictionary json = Json.Deserialize(tm.Text) as IDictionary; if (json != null) { if (json.Contains("results")) { SpeechRecognitionEvent results = ParseRecognizeResponse(json); if (results != null) { //// when we get results, start listening for the next block .. //if (results.HasFinalResult()) //Log.Debug("SpeechToText.OnListenMessage()", "final json response: {0}", tm.Text); // SendStart(); if (_listenCallback != null) { _listenCallback(results); } else { StopListening(); // automatically stop listening if our callback is destroyed. } } else { Log.Error("SpeechToText.OnListenMessage()", "Failed to parse results: {0}", tm.Text); } } else if (json.Contains("state")) { string state = (string)json["state"]; #if ENABLE_DEBUGGING Log.Debug("SpeechToText.OnListenMessage()", "Server state is {0}", state); #endif if (state == "listening") { if (_isListening) { if (!_listenActive) { _listenActive = true; //Debug.Log("Listening, sending " + _listenRecordings.Count + " queued clips"); bool hasAudio = _listenRecordings.Count > 0; // send all pending audio clips .. while (_listenRecordings.Count > 0) { AudioData clip = _listenRecordings.Dequeue(); _listenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(clip.Clip))); _audioSent = true; } // We may have received a stop command while waiting for the listening state. if (_sendStopAfterListening && hasAudio) { SendStop(); } } } } } else if (json.Contains("speaker_labels")) { SpeakerRecognitionEvent speakerRecognitionEvent = ParseSpeakerRecognitionResponse(json); if (speakerRecognitionEvent != null) { _speakerLabelCallback(speakerRecognitionEvent); } } else if (json.Contains("error")) { string error = (string)json["error"]; Log.Error("SpeechToText.OnListenMessage()", "Error: {0}", error); StopListening(); if (OnError != null) { OnError(error); } } else { Log.Warning("SpeechToText.OnListenMessage()", "Unknown message: {0}", tm.Text); } } else { Log.Error("SpeechToText.OnListenMessage()", "Failed to parse JSON from server: {0}", tm.Text); } } }
/// <summary> /// This function should be invoked with the AudioData input after StartListening() method has been invoked. /// The user should continue to invoke this function until they are ready to call StopListening(), typically /// microphone input is sent to this function. /// </summary> /// <param name="clip">A AudioData object containing the AudioClip and max level found in the clip.</param> /// <returns>True if audio was sent or enqueued, false if audio was discarded.</returns> public bool OnListen(AudioData clip) { bool audioSentOrEnqueued = false; _timeOffset = 0; if (_isListening) { if (_recordingHZ < 0) { _recordingHZ = clip.Clip.frequency; SendStart(); } // If silence persists for _silenceCutoff seconds, send stop and discard clips until audio resumes if (DetectSilence && clip.MaxLevel < _silenceThreshold) { _silenceDuration += clip.Clip.length; } else { _silenceDuration = 0.0f; } if (!DetectSilence || _silenceDuration < _silenceCutoff) { if (_stopSent) { // Send some clips of ambient sound leading up to the audio, to improve first word recognition if (_listenActive) { //Debug.Log("Sending " + _prefixClips.Count + " prefix clips"); foreach (AudioData prefixClip in _prefixClips) { _listenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(prefixClip.Clip))); _timeOffset -= prefixClip.Clip.length; } } else { //Debug.Log("Queuing " + _prefixClips.Count + " prefix clips"); foreach (AudioData prefixClip in _prefixClips) { _listenRecordings.Enqueue(clip); _timeOffset -= prefixClip.Clip.length; } } _prefixClips.Clear(); _stopSent = false; } if (_listenActive) { CurrentStatus = LexiconSpeechStatus.Sending; _listenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(clip.Clip))); _audioSent = true; audioSentOrEnqueued = true; } else { CurrentStatus = LexiconSpeechStatus.WaitingForServer; // we have not received the "listening" state yet from the server, so just queue // the audio clips until that happens. _listenRecordings.Enqueue(clip); audioSentOrEnqueued = true; // check the length of this queue and do something if it gets too full. if (_listenRecordings.Count > MaxQueuedRecordings) { Log.Error("SpeechToText.OnListen()", "Recording queue is full."); StopListening(); if (OnError != null) { OnError("Recording queue is full."); } } } } else if (_audioSent) { CurrentStatus = LexiconSpeechStatus.Silence; //Debug.Log("Send stop"); SendStop(); _audioSent = false; _stopSent = true; _prefixClips.Clear(); } else { // Buffer some of the ambient audio for when the user starts speaking again _prefixClips.Add(clip); if (_prefixClips.Count > _prefixClipCount) { _prefixClips.RemoveAt(0); } } // After sending start, we should get into the listening state within the amount of time specified // by LISTEN_TIMEOUT. If not, then stop listening and record the error. if (!_listenActive && (DateTime.Now - _lastStartSent).TotalSeconds > ListenTimeout) { Log.Error("SpeechToText.OnListen()", "Failed to enter listening state."); StopListening(); if (OnError != null) { OnError("Failed to enter listening state."); } } } return(audioSentOrEnqueued); }
public override byte[] ToBinary() { return(AudioClipUtil.GetL16(m_Audio.Clip)); }