/// <summary>
 /// Function that is called when a speech-to-text result is received. If it is a final result and this widget
 /// is waiting for the last result of the session, then the widget will begin processing the end results
 /// of the session.
 /// </summary>
 /// <param name="result">The speech-to-text result</param>
 void OnTextResult(SpeechToTextResult result)
 {
     if (m_WillDisplayReceivedResults)
     {
         // For the purposes of comparing results, this just uses the first alternative
         m_LastResultWasFinal = result.IsFinal;
         if (result.IsFinal)
         {
             m_PreviousFinalResults += result.TextAlternatives[0].Text;
             m_ResultsTextUI.color   = m_FinalTextResultColor;
             m_ResultsTextUI.text    = m_PreviousFinalResults;
             SmartLogger.Log(DebugFlags.SpeechToTextWidgets, m_SpeechToTextService.GetType().ToString() + " final result");
             if (m_WaitingForLastFinalResultOfSession)
             {
                 m_WaitingForLastFinalResultOfSession = false;
                 ProcessEndResults();
             }
         }
         else
         {
             m_ResultsTextUI.color = m_InterimTextResultColor;
             m_ResultsTextUI.text  = m_PreviousFinalResults + result.TextAlternatives[0].Text;
         }
     }
 }
예제 #2
0
        /// <summary>
        /// Waits until the last processed result is a final result.
        /// If this does not happen before the timeout, the last result is treated as a final result.
        /// </summary>
        /// <returns></returns>
        IEnumerator FinishSession()
        {
            SmartLogger.Log(DebugFlags.WindowsSpeechToText, "finish session");

            // Wait a specified number of seconds for a final result.
            float timeElapsedAfterRecording = 0;

            while (!m_LastResult.IsFinal && timeElapsedAfterRecording < m_SessionTimeoutAfterDoneRecording)
            {
                yield return(null);

                timeElapsedAfterRecording += Time.deltaTime;
            }

            // If still determining a final result, just treat the last result processed as a final result.
            if (!m_LastResult.IsFinal)
            {
                SmartLogger.Log(DebugFlags.WindowsSpeechToText, "treat last interim result as final");
                m_LastResult.IsFinal = true;
                if (m_OnTextResult != null)
                {
                    m_OnTextResult(m_LastResult);
                }
            }
        }
        /// <summary>
        /// Extract the speech-to-text result info from the next response JSON in the queue.
        /// </summary>
        void ProcessNextResponseJSON()
        {
            // Create a JSON object from the next string in the queue and process the speech-to-text result.
            var responseJSON = new JSONObject(m_ResponseJSONsQueue.Dequeue(), int.MaxValue);

            SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, responseJSON.ToString());

            string errorText = GoogleSpeechToTextResponseJSONParser.GetErrorFromResponseJSON(responseJSON);

            if (errorText != null)
            {
                if (m_OnError != null)
                {
                    m_OnError(errorText);
                }
            }

            JSONObject resultsJSON = responseJSON.GetField(Constants.GoogleResponseJSONResultsFieldKey);

            if (resultsJSON != null && resultsJSON.Count > 0)
            {
                JSONObject         resultJSON = resultsJSON[0];
                SpeechToTextResult textResult = GoogleSpeechToTextResponseJSONParser.GetTextResultFromResultJSON(resultJSON);
                bool isFinal = false;
                resultJSON.GetField(out isFinal, Constants.GoogleResponseJSONResultIsFinalFieldKey, isFinal);
                textResult.IsFinal = isFinal;

                SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "processing result - isFinal = " + isFinal);
                if (m_OnTextResult != null)
                {
                    m_OnTextResult(textResult);
                }
                m_LastResult = textResult;
            }
        }
 /// <summary>
 /// Function that is called when the recording times out.
 /// </summary>
 void OnSpeechToTextRecordingTimeout()
 {
     SmartLogger.Log(DebugFlags.SpeechToTextWidgets, SpeechToTextServiceString() + " call timeout");
     if (m_OnRecordingTimeout != null)
     {
         m_OnRecordingTimeout();
     }
 }
예제 #5
0
 /// <summary>
 /// Unregisters callbacks with each SpeechToTextServiceWidget.
 /// </summary>
 void UnregisterSpeechToTextServiceWidgetsCallbacks()
 {
     if (m_SpeechToTextServiceWidgets != null)
     {
         SmartLogger.Log(DebugFlags.SpeechToTextWidgets, "unregister service widgets callbacks");
         m_SpeechToTextServiceWidgets.RegisterOnRecordingTimeout(OnRecordTimeout);
         m_SpeechToTextServiceWidgets.RegisterOnReceivedLastResponse(OnSpeechToTextReceivedLastResponse);
     }
 }
예제 #6
0
 /// <summary>
 /// Clears the current results text and tells the speech-to-text service to start recording.
 /// </summary>
 public void StartRecording()
 {
     SmartLogger.Log(DebugFlags.SpeechToTextWidgets, "Start service widget recording");
     m_WillDisplayReceivedResults         = true;
     m_WaitingForLastFinalResultOfSession = false;
     m_LastResultWasFinal   = false;
     m_PreviousFinalResults = "";
     m_ResultsTextUI.text   = m_PreviousFinalResults;
     m_SpeechToTextService.StartRecording();
 }
 /// <summary>
 /// Function that is called when the MonoBehaviour will be destroyed.
 /// </summary>
 protected override void OnDestroy()
 {
     base.OnDestroy();
     m_TempAudioComponent.ClearTempAudioFiles();
     if (m_StreamingSpeechToTextProcessHasStarted && !m_StreamingSpeechToTextProcess.HasExited)
     {
         m_StreamingSpeechToTextProcess.Kill();
         SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "kill streaming speech-to-text process");
     }
 }
 /// <summary>
 /// Function that is called when the given SpeechToTextServiceWidget has gotten its last response. If there are no waiting
 /// SpeechToTextServiceWidgets left, then this function will wrap-up the current comparison session.
 /// </summary>
 /// <param name="serviceWidget">The speech-to-text service widget that received a last response</param>
 void OnSpeechToTextReceivedLastResponse(SpeechToTextServiceWidget serviceWidget)
 {
     SmartLogger.Log(DebugFlags.SpeechToTextWidgets, "Response from " + serviceWidget.SpeechToTextServiceString());
     m_WaitingSpeechToTextServiceWidgets.Remove(serviceWidget);
     if (m_WaitingSpeechToTextServiceWidgets.Count == 0)
     {
         SmartLogger.Log(DebugFlags.SpeechToTextWidgets, "Responses from everyone");
         FinishComparisonSession();
     }
 }
 /// <summary>
 /// Registers callbacks with each SpeechToTextServiceWidget.
 /// </summary>
 void RegisterSpeechToTextServiceWidgetsCallbacks()
 {
     if (m_SpeechToTextServiceWidgets != null)
     {
         SmartLogger.Log(DebugFlags.SpeechToTextWidgets, "register service widgets callbacks");
         foreach (var serviceWidget in m_SpeechToTextServiceWidgets)
         {
             SmartLogger.Log(DebugFlags.SpeechToTextWidgets, "register service widget callbacks");
             serviceWidget.RegisterOnRecordingTimeout(OnRecordTimeout);
             serviceWidget.RegisterOnReceivedLastResponse(OnSpeechToTextReceivedLastResponse);
         }
     }
 }
예제 #10
0
        /// <summary>
        /// Starts recording audio for each speech-to-text service widget if not already recording.
        /// </summary>
        void StartRecording()
        {
            if (!m_IsRecording)
            {
                SmartLogger.Log(DebugFlags.SpeechToTextWidgets, "Start comparison recording");
                m_IsCurrentlyInSpeechToTextSession = true;
                m_IsRecording = true;

                m_WaitingSpeechToTextServiceWidgets.Clear();

                SmartLogger.Log(DebugFlags.SpeechToTextWidgets, "tell service widget to start recording");
                m_SpeechToTextServiceWidgets.StartRecording();
            }
        }
 /// <summary>
 /// Does any final processing necessary for the results of the last started session and then
 /// stops the widget from displaying results until the start of the next session.
 /// </summary>
 void ProcessEndResults()
 {
     SmartLogger.Log(DebugFlags.SpeechToTextWidgets, m_SpeechToTextService.GetType().ToString() + " got last response");
     if (m_ComparisonPhrase != null)
     {
         DisplayAccuracyOfEndResults(m_ComparisonPhrase);
     }
     LogFileManager.Instance.WriteTextToFileIfShouldLog(SpeechToTextServiceString() + ": " + m_ResultsTextUI.text);
     if (m_OnReceivedLastResponse != null)
     {
         m_OnReceivedLastResponse(this);
     }
     m_WillDisplayReceivedResults = false;
 }
예제 #12
0
        /// <summary>
        /// Translates speech to text by making a request to the speech-to-text API.
        /// </summary>
        protected override IEnumerator TranslateRecordingToText()
        {
            m_TempAudioComponent.ClearTempAudioFiles();

            // Save recorded audio to a WAV file.
            string recordedAudioFilePath = SavWav.Save(m_TempAudioComponent.TempAudioRelativePath(), AudioRecordingManager.Instance.RecordedAudio);

            // Construct a request with the WAV file and send it.
            var request = new Request("POST", Constants.WitAiSpeechToTextBaseURL + "?" +
                                      Constants.WitAiVersionParameterName + "=" + DateTime.Now.ToString(Constants.WitAiVersionDateFormat));

            request.headers.Add("Authorization", "Bearer " + m_APIAccessToken);
            request.headers.Add("Content-Type", "audio/wav");
            request.Bytes = File.ReadAllBytes(recordedAudioFilePath);
            SmartLogger.Log(DebugFlags.WitAINonStreamingSpeechToText, "Sending request");
            request.Send();

            float startTime = Time.time;

            while (!request.isDone)
            {
                yield return(null);
            }
            SmartLogger.Log(DebugFlags.WitAINonStreamingSpeechToText, "response time: " + (Time.time - startTime));

            // Finally, grab the response JSON once the request is done.
            var responseJSON = new JSONObject(request.response.Text, int.MaxValue);

            SmartLogger.Log(DebugFlags.WitAINonStreamingSpeechToText, "Received request result");
            SmartLogger.Log(DebugFlags.WitAINonStreamingSpeechToText, responseJSON.ToString());

            string errorText = WitAiSpeechToTextResponseJSONParser.GetErrorFromResponseJSON(responseJSON);

            if (errorText != null)
            {
                if (m_OnError != null)
                {
                    m_OnError(errorText);
                }
            }

            if (m_OnTextResult != null)
            {
                m_OnTextResult(WitAiSpeechToTextResponseJSONParser.GetTextResultFromResponseJSON(responseJSON));
            }

            m_TempAudioComponent.ClearTempAudioFiles();
        }
        /// <summary>
        /// Computes the accuracy (percentage) of the end text results in comparison to the given phrase, by using
        /// the Levenshtein Distance between the two strings, and displays this percentage in the results text UI.
        /// </summary>
        /// <param name="originalPhrase">The phrase to compare against</param>
        void DisplayAccuracyOfEndResults(string originalPhrase)
        {
            string speechToTextResult = StringUtilities.TrimSpecialFormatting(m_ResultsTextUI.text, new HashSet <char>(),
                                                                              m_LeadingCharsForSpecialWords, m_SurroundingCharsForSpecialText);

            originalPhrase = StringUtilities.TrimSpecialFormatting(originalPhrase, new HashSet <char>(),
                                                                   m_LeadingCharsForSpecialWords, m_SurroundingCharsForSpecialText);

            int levenDistance = StringUtilities.LevenshteinDistance(speechToTextResult, originalPhrase);

            SmartLogger.Log(DebugFlags.SpeechToTextWidgets, m_SpeechToTextService.GetType().ToString() + " compute accuracy of text: \"" + speechToTextResult + "\"");
            float accuracy = Mathf.Max(0, 100f - (100f * (float)levenDistance / (float)originalPhrase.Length));

            m_PreviousFinalResults = "[Accuracy: " + accuracy + "%] " + m_PreviousFinalResults;
            m_ResultsTextUI.text   = m_PreviousFinalResults;
        }
 /// <summary>
 /// Starts recording audio for each speech-to-text service widget if not already recording.
 /// </summary>
 void StartRecording()
 {
     if (!m_IsRecording)
     {
         SmartLogger.Log(DebugFlags.SpeechToTextWidgets, "Start comparison recording");
         m_IsCurrentlyInSpeechToTextSession = true;
         m_IsRecording             = true;
         m_RecordButtonTextUI.text = m_RecordingText;
         m_RecordButtonImage.color = m_RecordingButtonColor;
         m_WaitingSpeechToTextServiceWidgets.Clear();
         foreach (var serviceWidget in m_SpeechToTextServiceWidgets)
         {
             SmartLogger.Log(DebugFlags.SpeechToTextWidgets, "tell service widget to start recording");
             serviceWidget.StartRecording();
             m_WaitingSpeechToTextServiceWidgets.Add(serviceWidget);
         }
     }
 }
 /// <summary>
 /// Callback function for when the streaming speech-to-text process receives output data.
 /// </summary>
 /// <param name="sender">Sender of this event</param>
 /// <param name="e">Arguments for data received event</param>
 void OnStreamingSpeechToTextProcessOutputDataReceived(object sender, DataReceivedEventArgs e)
 {
     if (e.Data != null)
     {
         string trimmedData = e.Data.Trim();
         SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "process output: " + trimmedData);
         if (trimmedData == k_ReadyToStreamDataOutputPrompt)
         {
             SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "set ready to stream data");
             m_ReadyToStreamData = true;
         }
         else if (trimmedData.StartsWith(k_ResponsePrefix))
         {
             trimmedData = trimmedData.Remove(0, k_ResponsePrefix.Length);
             m_ResponseJSONsQueue.Enqueue(trimmedData);
         }
     }
 }
예제 #16
0
        /// <summary>
        /// Computes the accuracy (percentage) of the end text results in comparison to the given phrase, by using
        /// the Levenshtein Distance between the two strings, and displays this percentage in the results text UI.
        /// </summary>
        /// <param name="originalPhrase">The phrase to compare against</param>
        void DisplayAccuracyOfEndResults(string[] originalPhrase)
        {
            print("The computer understood " + results);
            string speechToTextResult = StringUtilities.TrimSpecialFormatting(results, new HashSet <char>(),
                                                                              m_LeadingCharsForSpecialWords, m_SurroundingCharsForSpecialText);

            for (int i = 0; i < originalPhrase.Length; i++)
            {
                originalPhrase[i] = StringUtilities.TrimSpecialFormatting(originalPhrase[i], new HashSet <char>(),
                                                                          m_LeadingCharsForSpecialWords, m_SurroundingCharsForSpecialText);

                int levenDistance = StringUtilities.LevenshteinDistance(speechToTextResult, originalPhrase[i]);
                SmartLogger.Log(DebugFlags.SpeechToTextWidgets, m_SpeechToTextService.GetType().ToString() + " compute accuracy of text: \"" + speechToTextResult + "\"");
                float accuracy = Mathf.Max(0, 100f - (100f * (float)levenDistance / (float)originalPhrase[i].Length));
                m_PreviousFinalResults = "[Accuracy: " + accuracy + "%] " + m_PreviousFinalResults;

                speechAccuracy.Add(accuracy);
                print(accuracy);
            }

            results = m_PreviousFinalResults;
            OnResult.Invoke();
        }
예제 #17
0
        /// <summary>
        /// Translates speech to text by making a request to the speech-to-text API.
        /// </summary>
        protected override IEnumerator TranslateRecordingToText()
        {
            m_TempAudioComponent.ClearTempAudioFiles();

            // Save recorded audio to a WAV file.
            string recordedAudioFilePath = SavWav.Save(m_TempAudioComponent.TempAudioRelativePath(), AudioRecordingManager.Instance.RecordedAudio);

            //WWW request

            string _url = Constants.WitAiSpeechToTextBaseURL + "?" +
                          Constants.WitAiVersionParameterName + "=" + DateTime.Now.ToString(Constants.WitAiVersionDateFormat);
            UnityWebRequest www = new UnityWebRequest(_url, UnityWebRequest.kHttpVerbPOST);


            byte[]           bytes = File.ReadAllBytes(recordedAudioFilePath);
            UploadHandlerRaw uH    = new UploadHandlerRaw(bytes);

            uH.contentType      = "application/json";
            www.uploadHandler   = uH;
            www.downloadHandler = new DownloadHandlerBuffer();
            www.SetRequestHeader("Content-Type", "application/json");
            www.SetRequestHeader("Authorization", "Bearer " + m_APIAccessToken);

            SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, "sent request");
            float startTime = Time.time;

            yield return(www.Send());

            while (!www.isDone)
            {
                yield return(null);
            }

            if (www.isError)
            {
                SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, www.error);
            }
            else
            {
                SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, "Form upload complete!");
            }
            SmartLogger.Log(DebugFlags.WitAINonStreamingSpeechToText, "response time: " + (Time.time - startTime));
            // Grab the response JSON once the request is done and parse it.
            var responseJSON = new JSONObject(www.downloadHandler.text, int.MaxValue);

            //END WWW

            // Construct a request with the WAV file and send it.
            //var request = new Request("POST", Constants.WitAiSpeechToTextBaseURL + "?" +
            //    Constants.WitAiVersionParameterName + "=" + DateTime.Now.ToString(Constants.WitAiVersionDateFormat));
            //request.headers.Add("Authorization", "Bearer " + m_APIAccessToken);
            //request.headers.Add("Content-Type", "audio/wav");
            //request.Bytes = File.ReadAllBytes(recordedAudioFilePath);
            //SmartLogger.Log(DebugFlags.WitAINonStreamingSpeechToText, "Sending request");
            //request.Send();

            //while (!request.isDone)
            //{
            //    yield return null;
            //}
            //SmartLogger.Log(DebugFlags.WitAINonStreamingSpeechToText, "response time: " + (Time.time - startTime));

            // Finally, grab the response JSON once the request is done.
            //var responseJSON = new JSONObject(request.response.Text, int.MaxValue);
            SmartLogger.Log(DebugFlags.WitAINonStreamingSpeechToText, "Received request result");
            SmartLogger.Log(DebugFlags.WitAINonStreamingSpeechToText, responseJSON.ToString());

            string errorText = WitAiSpeechToTextResponseJSONParser.GetErrorFromResponseJSON(responseJSON);

            if (errorText != null)
            {
                if (m_OnError != null)
                {
                    m_OnError(errorText);
                }
            }

            if (m_OnTextResult != null)
            {
                m_OnTextResult(WitAiSpeechToTextResponseJSONParser.GetTextResultFromResponseJSON(responseJSON));
            }

            m_TempAudioComponent.ClearTempAudioFiles();
        }
        /// <summary>
        /// Sends queued chunks of audio to the server and listens for responses.
        /// </summary>
        protected override IEnumerator StreamAudioAndListenForResponses()
        {
            m_TempAudioComponent.ClearTempAudioFiles();
            m_ResponseJSONsQueue.Clear();
            m_StreamingSpeechToTextProcessHasStarted = false;
            m_ReadyToStreamData = false;

            string jsonCredentialsPath = Path.Combine(
                Path.Combine(Application.streamingAssetsPath, k_StreamingSpeechToTextApplicationFolderName),
                m_JSONCredentialsFileName);

            if (!File.Exists(jsonCredentialsPath))
            {
                if (m_OnError != null)
                {
                    m_OnError("Missing JSON credentials file in StreamingAssets/GoogleStreamingSpeechToTextProgram");
                }
                yield break;
            }

            // Initialize streaming speech-to-text process with appropriate start info, including the path to the credentials file.
            m_StreamingSpeechToTextProcess = new Process();
            m_StreamingSpeechToTextProcess.StartInfo.FileName = Path.Combine(
                Path.Combine(Application.streamingAssetsPath, k_StreamingSpeechToTextApplicationFolderName),
                k_StreamingSpeechToTextApplicationFileName);
            m_StreamingSpeechToTextProcess.StartInfo.Arguments              = jsonCredentialsPath;
            m_StreamingSpeechToTextProcess.StartInfo.CreateNoWindow         = true;
            m_StreamingSpeechToTextProcess.StartInfo.UseShellExecute        = false;
            m_StreamingSpeechToTextProcess.StartInfo.RedirectStandardInput  = true;
            m_StreamingSpeechToTextProcess.StartInfo.RedirectStandardOutput = true;
            m_StreamingSpeechToTextProcess.OutputDataReceived += OnStreamingSpeechToTextProcessOutputDataReceived;

            SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "start streaming speech-to-text process");
            m_StreamingSpeechToTextProcess.Start();
            m_StreamingSpeechToTextProcess.BeginOutputReadLine();
            m_StreamingSpeechToTextProcessHasStarted = true;

            while (!m_ReadyToStreamData)
            {
                yield return(null);
            }

            // TODO: I don't know why, but I need to write garbage text first.
            // For some reason the first standard input begins with "0x3F3F3F".
            SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "ready to stream data");
            m_StreamingSpeechToTextProcess.StandardInput.WriteLine("clear input stream");

            // Tell the process to start streaming.
            m_StreamingSpeechToTextProcess.StandardInput.WriteLine(k_StartStreamingDataInputPrompt);

            StartCoroutine(ProcessResponseJSONs());

            // While still recording, send chunks as they arrive in the queue.
            while (m_IsRecording)
            {
                while (m_AudioChunksQueue.Count == 0)
                {
                    yield return(null);
                }
                yield return(SaveAndSendNextChunk());
            }
            SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "stopped recording");

            // Send any remaining chunks.
            while (m_AudioChunksQueue.Count > 0)
            {
                yield return(SaveAndSendNextChunk());
            }
            SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "sent all chunks");

            // Tell the process to stop streaming.
            m_StreamingSpeechToTextProcess.StandardInput.WriteLine(k_StopStreamingDataInputPrompt);

            // Wait a specified number of seconds for a final result.
            float timeElapsedAfterRecording = 0;

            while (!m_LastResult.IsFinal && timeElapsedAfterRecording < m_SessionTimeoutAfterDoneRecording)
            {
                yield return(null);

                timeElapsedAfterRecording += Time.deltaTime;
            }
            SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "session timeout");

            // If still determining a final result, just treat the last result processed as a final result.
            if (!m_LastResult.IsFinal)
            {
                SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "treat last result as final result");
                m_LastResult.IsFinal = true;
                if (m_OnTextResult != null)
                {
                    m_OnTextResult(m_LastResult);
                }
            }

            while (!m_StreamingSpeechToTextProcess.HasExited)
            {
                yield return(null);
            }
            SmartLogger.Log(DebugFlags.GoogleStreamingSpeechToText, "streaming speech-to-text process exited");

            m_TempAudioComponent.ClearTempAudioFiles();
        }
 /// <summary>
 /// Removes a function from the recording timeout delegate.
 /// </summary>
 /// <param name="action">Function to unregister</param>
 public void UnregisterOnRecordingTimeout(Action action)
 {
     SmartLogger.Log(DebugFlags.SpeechToTextWidgets, SpeechToTextServiceString() + " unregister timeout");
     m_OnRecordingTimeout -= action;
 }
예제 #20
0
        /// <summary>
        /// Translates speech to text by making a request to the speech-to-text API.
        /// </summary>
        protected override IEnumerator TranslateRecordingToText()
        {
            m_TempAudioComponent.ClearTempAudioFiles();

            // Save recorded audio to a WAV file and convert it to FLAC format.
            string wavAudioFilePath  = SavWav.Save(m_TempAudioComponent.TempAudioRelativePath(), AudioRecordingManager.Instance.RecordedAudio);
            string flacAudioFilePath = IOUtilities.MakeFilePathUnique(Path.ChangeExtension(wavAudioFilePath, "flac"));

            SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, "converting audio");
            var audioConversionJob = new SoXAudioConversionJob(wavAudioFilePath, flacAudioFilePath, 16000);

            audioConversionJob.Start();
            yield return(StartCoroutine(audioConversionJob.WaitFor()));

            if (audioConversionJob.ErrorMessage != null)
            {
                if (m_OnError != null)
                {
                    m_OnError(audioConversionJob.ErrorMessage);
                }
                yield break;
            }

            var request = new Request("POST", Constants.GoogleNonStreamingSpeechToTextURL +
                                      "?" + Constants.GoogleAPIKeyParameterName + "=" + m_APIKey);

            request.headers.Add("Content-Type", "application/json");

            // Construct JSON request body.
            JSONObject requestJSON   = new JSONObject();
            JSONObject requestConfig = new JSONObject();

            requestConfig.AddField(Constants.GoogleRequestJSONConfigEncodingFieldKey, "FLAC");
            requestConfig.AddField(Constants.GoogleRequestJSONConfigSampleRateFieldKey, "16000");
            JSONObject requestAudio = new JSONObject();

            requestAudio.AddField(Constants.GoogleRequestJSONAudioContentFieldKey, Convert.ToBase64String(File.ReadAllBytes(flacAudioFilePath)));
            requestJSON.AddField(Constants.GoogleRequestJSONConfigFieldKey, requestConfig);
            requestJSON.AddField(Constants.GoogleRequestJSONAudioFieldKey, requestAudio);

            request.Text = requestJSON.ToString();
            request.Send();
            SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, "sent request");

            while (!request.isDone)
            {
                yield return(null);
            }

            // Grab the response JSON once the request is done and parse it.
            var responseJSON = new JSONObject(request.response.Text, int.MaxValue);

            SmartLogger.Log(DebugFlags.GoogleNonStreamingSpeechToText, responseJSON.ToString());

            string errorText = GoogleSpeechToTextResponseJSONParser.GetErrorFromResponseJSON(responseJSON);

            if (errorText != null)
            {
                if (m_OnError != null)
                {
                    m_OnError(errorText);
                }
            }

            SpeechToTextResult textResult;
            JSONObject         resultsJSON = responseJSON.GetField(Constants.GoogleResponseJSONResultsFieldKey);

            if (resultsJSON != null && resultsJSON.Count > 0)
            {
                JSONObject resultJSON = resultsJSON[0];
                textResult = GoogleSpeechToTextResponseJSONParser.GetTextResultFromResultJSON(resultJSON);
            }
            else
            {
                textResult = GoogleSpeechToTextResponseJSONParser.GetDefaultGoogleSpeechToTextResult();
            }
            if (m_OnTextResult != null)
            {
                m_OnTextResult(textResult);
            }

            m_TempAudioComponent.ClearTempAudioFiles();
        }