/// <summary>
        /// Creates a temporary audio file from the next audio chunk in the queue and writes the
        /// file name to the streaming speech-to-text process so that it can send its data to the server.
        /// </summary>
        IEnumerator SaveAndSendNextChunk()
        {
            // Save recorded audio to a WAV file and convert it to RAW format.
            string wavAudioFilePath   = SavWav.Save(m_TempAudioComponent.TempAudioRelativePath(), m_AudioChunksQueue.Dequeue());
            string rawAudioFilePath   = IOUtilities.MakeFilePathUnique(Path.ChangeExtension(wavAudioFilePath, "raw"));
            var    audioConversionJob = new SoXAudioConversionJob(wavAudioFilePath, rawAudioFilePath, 16000, 16, 1,
                                                                  AudioEncoding.EncodingType.SignedInteger, Endianness.EndiannessType.Little);

            audioConversionJob.Start();
            yield return(StartCoroutine(audioConversionJob.WaitFor()));

            if (audioConversionJob.ErrorMessage != null)
            {
                if (m_OnError != null)
                {
                    m_OnError(audioConversionJob.ErrorMessage);
                }
                yield break;
            }

            m_StreamingSpeechToTextProcess.StandardInput.WriteLine(rawAudioFilePath);
        }
Esempio n. 2
0
        /// <summary>
        /// Translates speech to text by making a request to the speech-to-text API.
        /// </summary>
        protected override IEnumerator TranslateRecordingToText()
        {
            m_TempAudioComponent.ClearTempAudioFiles();

            // Save recorded audio to a WAV file and convert it to FLAC format.
            string wavAudioFilePath  = SavWav.Save(m_TempAudioComponent.TempAudioRelativePath(), AudioRecordingManager.Instance.RecordedAudio);
            string flacAudioFilePath = IOUtilities.MakeFilePathUnique(Path.ChangeExtension(wavAudioFilePath, "flac"));

            SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, "converting audio");
            var audioConversionJob = new SoXAudioConversionJob(wavAudioFilePath, flacAudioFilePath, 16000);

            audioConversionJob.Start();
            yield return(StartCoroutine(audioConversionJob.WaitFor()));

            if (audioConversionJob.ErrorMessage != null)
            {
                if (m_OnError != null)
                {
                    m_OnError(audioConversionJob.ErrorMessage);
                }
                yield break;
            }

            var request = new Request("POST", Constants.GoogleNonStreamingSpeechToTextURL +
                                      "?" + Constants.GoogleAPIKeyParameterName + "=" + m_APIKey);

            request.headers.Add("Content-Type", "application/json");

            // Construct JSON request body.
            JSONObject requestJSON   = new JSONObject();
            JSONObject requestConfig = new JSONObject();

            requestConfig.AddField(Constants.GoogleRequestJSONConfigEncodingFieldKey, "FLAC");
            requestConfig.AddField(Constants.GoogleRequestJSONConfigSampleRateFieldKey, "16000");
            JSONObject requestAudio = new JSONObject();

            requestAudio.AddField(Constants.GoogleRequestJSONAudioContentFieldKey, Convert.ToBase64String(File.ReadAllBytes(flacAudioFilePath)));
            requestJSON.AddField(Constants.GoogleRequestJSONConfigFieldKey, requestConfig);
            requestJSON.AddField(Constants.GoogleRequestJSONAudioFieldKey, requestAudio);

            request.Text = requestJSON.ToString();
            request.Send();
            SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, "sent request");

            while (!request.isDone)
            {
                yield return(null);
            }

            // Grab the response JSON once the request is done and parse it.
            var responseJSON = new JSONObject(request.response.Text, int.MaxValue);

            SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, responseJSON.ToString());

            string errorText = GoogleSpeechToTextResponseJSONParser.GetErrorFromResponseJSON(responseJSON);

            if (errorText != null)
            {
                if (m_OnError != null)
                {
                    m_OnError(errorText);
                }
            }

            SpeechToTextResult textResult;
            JSONObject         resultsJSON = responseJSON.GetField(Constants.GoogleResponseJSONResultsFieldKey);

            if (resultsJSON != null && resultsJSON.Count > 0)
            {
                JSONObject resultJSON = resultsJSON[0];
                textResult = GoogleSpeechToTextResponseJSONParser.GetTextResultFromResultJSON(resultJSON);
            }
            else
            {
                textResult = GoogleSpeechToTextResponseJSONParser.GetDefaultGoogleSpeechToTextResult();
            }
            if (m_OnTextResult != null)
            {
                m_OnTextResult(textResult);
            }

            m_TempAudioComponent.ClearTempAudioFiles();
        }