/// <summary> /// Extract the speech-to-text result info from the next response JSON in the queue. /// </summary> void ProcessNextResponseJSON() { // Create a JSON object from the next string in the queue and process the speech-to-text result. var responseJSON = new JSONObject(m_ResponseJSONsQueue.Dequeue(), int.MaxValue); SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, responseJSON.ToString()); string errorText = GoogleSpeechToTextResponseJSONParser.GetErrorFromResponseJSON(responseJSON); if (errorText != null) { if (m_OnError != null) { m_OnError(errorText); } } JSONObject resultsJSON = responseJSON.GetField(Constants.GoogleResponseJSONResultsFieldKey); if (resultsJSON != null && resultsJSON.Count > 0) { JSONObject resultJSON = resultsJSON[0]; SpeechToTextResult textResult = GoogleSpeechToTextResponseJSONParser.GetTextResultFromResultJSON(resultJSON); bool isFinal = false; resultJSON.GetField(out isFinal, Constants.GoogleResponseJSONResultIsFinalFieldKey, isFinal); textResult.IsFinal = isFinal; SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "processing result - isFinal = " + isFinal); if (m_OnTextResult != null) { m_OnTextResult(textResult); } m_LastResult = textResult; } }
/// <summary> /// Translates speech to text by making a request to the speech-to-text API. /// </summary> protected override IEnumerator TranslateRecordingToText() { m_TempAudioComponent.ClearTempAudioFiles(); // Save recorded audio to a WAV file and convert it to FLAC format. string wavAudioFilePath = SavWav.Save(m_TempAudioComponent.TempAudioRelativePath(), AudioRecordingManager.Instance.RecordedAudio); string flacAudioFilePath = IOUtilities.MakeFilePathUnique(Path.ChangeExtension(wavAudioFilePath, "flac")); SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, "converting audio"); var audioConversionJob = new SoXAudioConversionJob(wavAudioFilePath, flacAudioFilePath, 16000); audioConversionJob.Start(); yield return(StartCoroutine(audioConversionJob.WaitFor())); if (audioConversionJob.ErrorMessage != null) { if (m_OnError != null) { m_OnError(audioConversionJob.ErrorMessage); } yield break; } var request = new Request("POST", Constants.GoogleNonStreamingSpeechToTextURL + "?" + Constants.GoogleAPIKeyParameterName + "=" + m_APIKey); request.headers.Add("Content-Type", "application/json"); // Construct JSON request body. JSONObject requestJSON = new JSONObject(); JSONObject requestConfig = new JSONObject(); requestConfig.AddField(Constants.GoogleRequestJSONConfigEncodingFieldKey, "FLAC"); requestConfig.AddField(Constants.GoogleRequestJSONConfigSampleRateFieldKey, "16000"); JSONObject requestAudio = new JSONObject(); requestAudio.AddField(Constants.GoogleRequestJSONAudioContentFieldKey, Convert.ToBase64String(File.ReadAllBytes(flacAudioFilePath))); requestJSON.AddField(Constants.GoogleRequestJSONConfigFieldKey, requestConfig); requestJSON.AddField(Constants.GoogleRequestJSONAudioFieldKey, requestAudio); request.Text = requestJSON.ToString(); request.Send(); SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, "sent request"); while (!request.isDone) { yield return(null); } // Grab the response JSON once the request is done and parse it. var responseJSON = new JSONObject(request.response.Text, int.MaxValue); SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, responseJSON.ToString()); string errorText = GoogleSpeechToTextResponseJSONParser.GetErrorFromResponseJSON(responseJSON); if (errorText != null) { if (m_OnError != null) { m_OnError(errorText); } } SpeechToTextResult textResult; JSONObject resultsJSON = responseJSON.GetField(Constants.GoogleResponseJSONResultsFieldKey); if (resultsJSON != null && resultsJSON.Count > 0) { JSONObject resultJSON = resultsJSON[0]; textResult = GoogleSpeechToTextResponseJSONParser.GetTextResultFromResultJSON(resultJSON); } else { textResult = GoogleSpeechToTextResponseJSONParser.GetDefaultGoogleSpeechToTextResult(); } if (m_OnTextResult != null) { m_OnTextResult(textResult); } m_TempAudioComponent.ClearTempAudioFiles(); }