private SpeechResultList ParseRecognizeResponse(IDictionary resp) { if (resp == null) { return(null); } try { List <SpeechResult> results = new List <SpeechResult>(); IList iresults = resp["results"] as IList; if (iresults == null) { return(null); } foreach (var r in iresults) { IDictionary iresult = r as IDictionary; if (iresults == null) { continue; } SpeechResult result = new SpeechResult(); result.Final = (bool)iresult["final"]; IList ialternatives = iresult["alternatives"] as IList; if (ialternatives == null) { continue; } List <SpeechAlt> alternatives = new List <SpeechAlt>(); foreach (var a in ialternatives) { IDictionary ialternative = a as IDictionary; if (ialternative == null) { continue; } SpeechAlt alternative = new SpeechAlt(); alternative.Transcript = (string)ialternative["transcript"]; if (ialternative.Contains("confidence")) { alternative.Confidence = (double)ialternative["confidence"]; } if (ialternative.Contains("timestamps")) { IList itimestamps = ialternative["timestamps"] as IList; TimeStamp[] timestamps = new TimeStamp[itimestamps.Count]; for (int i = 0; i < itimestamps.Count; ++i) { IList itimestamp = itimestamps[i] as IList; if (itimestamp == null) { continue; } TimeStamp ts = new TimeStamp(); ts.Word = (string)itimestamp[0]; ts.Start = (double)itimestamp[1]; ts.End = (double)itimestamp[2]; timestamps[i] = ts; } alternative.Timestamps = timestamps; } if (ialternative.Contains("word_confidence")) { IList iconfidence = ialternative["word_confidence"] as IList; WordConfidence[] confidence = new WordConfidence[iconfidence.Count]; for (int i = 0; i < iconfidence.Count; ++i) { IList iwordconf = iconfidence[i] as IList; if (iwordconf == null) { continue; } WordConfidence wc = new WordConfidence(); wc.Word = (string)iwordconf[0]; wc.Confidence = (double)iwordconf[1]; confidence[i] = wc; } alternative.WordConfidence = confidence; } alternatives.Add(alternative); } result.Alternatives = alternatives.ToArray(); results.Add(result); } return(new SpeechResultList(results.ToArray())); } catch (Exception e) { Log.Error("SpeechToText", "ParseJsonResponse exception: {0}", e.ToString()); return(null); } }
private void OnListenMessage(WSConnector.Message msg) { if (msg is WSConnector.TextMessage) { WSConnector.TextMessage tm = (WSConnector.TextMessage)msg; IDictionary json = Json.Deserialize(tm.Text) as IDictionary; if (json != null) { if (json.Contains("results")) { SpeechResultList results = ParseRecognizeResponse(json); if (results != null) { if (newSpeech == true) { newSpeech = false; UnityEngine.Debug.Log("--new: " + new SpeechToTextData(results).Text); Cloudspace.NotificationCenter.DefaultCenter().PostNotification(null, "OnListeningToUser", new SpeechToTextData(results).Text); } if (finalResults == null) { finalResults = results; } else { SpeechResult[] aggregated = new SpeechResult[finalResults.Results.Length + results.Results.Length]; for (int i = 0; i < finalResults.Results.Length; i++) { aggregated [i] = finalResults.Results [i]; } for (int i = finalResults.Results.Length; i < finalResults.Results.Length + results.Results.Length; i++) { aggregated [i] = results.Results [i - finalResults.Results.Length]; } finalResults.Results = aggregated; } // UnityEngine.Debug.Log ("--agg: "+new SpeechToTextData (finalResults).AllText); // when we get results, start listening for the next block .. // if continuous is true, then we don't need to do this.. if (!EnableContinousRecognition && results.HasFinalResult()) { SendStart(); } // if (m_ListenCallback == null) { StopListening(); // } } else { Log.Error("SpeechToText", "Failed to parse results: {0}", tm.Text); } } else if (json.Contains("state")) { string state = (string)json["state"]; #if ENABLE_DEBUGGING Log.Debug("SpeechToText", "Server state is {0}", state); #endif if (state == "listening") { if (m_IsListening) { if (!m_ListenActive) { m_ListenActive = true; // send all pending audio clips .. while (m_ListenRecordings.Count > 0) { AudioData clip = m_ListenRecordings.Dequeue(); m_ListenSocket.Send(new WSConnector.BinaryMessage(AudioClipUtil.GetL16(clip.Clip))); m_AudioSent = true; } } } } } else if (json.Contains("error")) { string error = (string)json["error"]; Log.Error("SpeechToText", "Error: {0}", error); StopListening(); if (OnError != null) { OnError(error); } } else { Log.Warning("SpeechToText", "Unknown message: {0}", tm.Text); } } else { Log.Error("SpeechToText", "Failed to parse JSON from server: {0}", tm.Text); } } if (silenceCounter > m_PausesLimit) { silenceCounter = 0; if (finalResults != null) { newSpeech = true; m_ListenCallback(finalResults); finalResults = null; } } }