/// <summary>
        /// Extract the speech-to-text result info from the next response JSON in the queue.
        /// </summary>
        void ProcessNextResponseJSON()
        {
            // Create a JSON object from the next string in the queue and process the speech-to-text result.
            var responseJSON = new JSONObject(m_ResponseJSONsQueue.Dequeue(), int.MaxValue);

            SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, responseJSON.ToString());

            string errorText = GoogleSpeechToTextResponseJSONParser.GetErrorFromResponseJSON(responseJSON);

            if (errorText != null)
            {
                if (m_OnError != null)
                {
                    m_OnError(errorText);
                }
            }

            JSONObject resultsJSON = responseJSON.GetField(Constants.GoogleResponseJSONResultsFieldKey);

            if (resultsJSON != null && resultsJSON.Count > 0)
            {
                JSONObject         resultJSON = resultsJSON[0];
                SpeechToTextResult textResult = GoogleSpeechToTextResponseJSONParser.GetTextResultFromResultJSON(resultJSON);
                bool isFinal = false;
                resultJSON.GetField(out isFinal, Constants.GoogleResponseJSONResultIsFinalFieldKey, isFinal);
                textResult.IsFinal = isFinal;

                SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "processing result - isFinal = " + isFinal);
                if (m_OnTextResult != null)
                {
                    m_OnTextResult(textResult);
                }
                m_LastResult = textResult;
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// Translates speech to text by making a request to the speech-to-text API.
        /// </summary>
        protected override IEnumerator TranslateRecordingToText()
        {
            m_TempAudioComponent.ClearTempAudioFiles();

            // Save recorded audio to a WAV file and convert it to FLAC format.
            string wavAudioFilePath  = SavWav.Save(m_TempAudioComponent.TempAudioRelativePath(), AudioRecordingManager.Instance.RecordedAudio);
            string flacAudioFilePath = IOUtilities.MakeFilePathUnique(Path.ChangeExtension(wavAudioFilePath, "flac"));

            SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, "converting audio");
            var audioConversionJob = new SoXAudioConversionJob(wavAudioFilePath, flacAudioFilePath, 16000);

            audioConversionJob.Start();
            yield return(StartCoroutine(audioConversionJob.WaitFor()));

            if (audioConversionJob.ErrorMessage != null)
            {
                if (m_OnError != null)
                {
                    m_OnError(audioConversionJob.ErrorMessage);
                }
                yield break;
            }

            var request = new Request("POST", Constants.GoogleNonStreamingSpeechToTextURL +
                                      "?" + Constants.GoogleAPIKeyParameterName + "=" + m_APIKey);

            request.headers.Add("Content-Type", "application/json");

            // Construct JSON request body.
            JSONObject requestJSON   = new JSONObject();
            JSONObject requestConfig = new JSONObject();

            requestConfig.AddField(Constants.GoogleRequestJSONConfigEncodingFieldKey, "FLAC");
            requestConfig.AddField(Constants.GoogleRequestJSONConfigSampleRateFieldKey, "16000");
            JSONObject requestAudio = new JSONObject();

            requestAudio.AddField(Constants.GoogleRequestJSONAudioContentFieldKey, Convert.ToBase64String(File.ReadAllBytes(flacAudioFilePath)));
            requestJSON.AddField(Constants.GoogleRequestJSONConfigFieldKey, requestConfig);
            requestJSON.AddField(Constants.GoogleRequestJSONAudioFieldKey, requestAudio);

            request.Text = requestJSON.ToString();
            request.Send();
            SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, "sent request");

            while (!request.isDone)
            {
                yield return(null);
            }

            // Grab the response JSON once the request is done and parse it.
            var responseJSON = new JSONObject(request.response.Text, int.MaxValue);

            SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, responseJSON.ToString());

            string errorText = GoogleSpeechToTextResponseJSONParser.GetErrorFromResponseJSON(responseJSON);

            if (errorText != null)
            {
                if (m_OnError != null)
                {
                    m_OnError(errorText);
                }
            }

            SpeechToTextResult textResult;
            JSONObject         resultsJSON = responseJSON.GetField(Constants.GoogleResponseJSONResultsFieldKey);

            if (resultsJSON != null && resultsJSON.Count > 0)
            {
                JSONObject resultJSON = resultsJSON[0];
                textResult = GoogleSpeechToTextResponseJSONParser.GetTextResultFromResultJSON(resultJSON);
            }
            else
            {
                textResult = GoogleSpeechToTextResponseJSONParser.GetDefaultGoogleSpeechToTextResult();
            }
            if (m_OnTextResult != null)
            {
                m_OnTextResult(textResult);
            }

            m_TempAudioComponent.ClearTempAudioFiles();
        }