Esempio n. 1
0
    public void RequestSpeech(AudioClip audio, GameObject receiver, string callback)
    {
        float[] clipData = new float[audio.samples * audio.channels];
        audio.GetData(clipData, 0);
        WaveGen.WaveFormatChunk format = new WaveGen().MakeFormat(audio);

        try
        {
            string     filename = GetTempFileName() + ".wav";
            FileStream stream   = File.OpenWrite(filename);
            new WaveGen().Write(clipData, format, stream);
            stream.Close();

            Debug.Log("Request Start time: " + DateTime.Now.ToLongTimeString());

            if (requestFactory == null)
            {
                requestFactory = BuildRequestFactory(RequestFactory.ScopeTypes.Speech);
            }

            if (clientToken == null)
            {
                clientToken = GetAccessToken();
            }

            if (null != clientToken)
            {
                requestFactory.ClientCredential = clientToken;
            }

            ATT_MSSDK.Speechv3.SpeechResponse response = SpeechToTextService(filename, "Generic", "audio/wav");
            string speechOutput = response.Recognition.NBest[0].ResultText;
            if (clientToken == null)
            {
                clientToken = requestFactory.ClientCredential;
                SaveAccessToken();
            }

            Debug.Log("Response received time: " + DateTime.Now.ToLongTimeString());
            showProcess = false;
            Debug.Log("response: " + speechOutput);
            File.Delete(filename);
        }
        catch (System.Exception e)
        {
            Debug.LogError(e);
        }
    }
Esempio n. 2
0
    IEnumerator DoRecording()
    {
        Debug.Log("Recording");
        audio.clip = Microphone.Start(null, false, 5, 8000);
        yield return(new WaitForSeconds(5));

        Debug.Log("Playing");
        audio.Play();
        Microphone.End(null);

        float[] clipData = new float[audio.clip.samples * audio.clip.channels];
        audio.clip.GetData(clipData, 0);

        //Format to 8KHz sampling rate
        WaveGen.WaveFormatChunk format = new WaveGen().MakeFormat(audio.clip);

        string     filename = "recordedSpeech.wav";
        FileStream stream   = File.OpenWrite(filename);

        new WaveGen().Write(clipData, format, stream);
        stream.Close();

        ATT_MSSDK.Speechv3.SpeechResponse response = SpeechToTextService(filename, "Generic", "audio/wav");
        string speechOutput = response.Recognition.NBest[0].ResultText;

        Debug.Log(speechOutput);

        string text = speechOutput.ToLower();

        if (text.Contains("red"))
        {
            gameObject.renderer.material.color = Color.red;
        }

        if (text.Contains("green"))
        {
            gameObject.renderer.material.color = Color.green;
        }

        if (text.Contains("blue"))
        {
            gameObject.renderer.material.color = Color.blue;
        }
    }
Esempio n. 3
0
    /// <summary>
    /// Method that calls SpeechToText method of RequestFactory to transcribe to text
    /// </summary>
    /// <param name="FileName">Wave file to transcribe</param>
    private ATT_MSSDK.Speechv3.SpeechResponse SpeechToTextService(String FileName, String SpeechContext, String AudioContentType)
    {
        ATT_MSSDK.Speechv3.SpeechResponse response = null;

        try
        {
            if (string.IsNullOrEmpty(FileName))
            {
                Debug.Log("No sound file specified");
                return(null);
            }

            XSpeechContext speechContext   = XSpeechContext.Generic;
            string         contentLanguage = string.Empty;
            string         xArgData        = "ClientApp=SpeechApp";
            switch (SpeechContext)
            {
            case "Generic": speechContext = XSpeechContext.Generic; contentLanguage = "en-US"; break;

            case "BusinessSearch": speechContext = XSpeechContext.BusinessSearch; break;

            case "TV": speechContext = XSpeechContext.TV; xArgData = "Search=True,Lineup=91983"; break;

            case "Gaming": speechContext = XSpeechContext.Gaming; break;

            case "SocialMedia": speechContext = XSpeechContext.SocialMedia; xArgData = "ClientApp=SpeechApps"; break;

            case "WebSearch": speechContext = XSpeechContext.WebSearch; break;

            case "SMS": speechContext = XSpeechContext.SMS; break;

            case "VoiceMail": speechContext = XSpeechContext.VoiceMail; break;

            case "QuestionAndAnswer": speechContext = XSpeechContext.QuestionAndAnswer; break;
            }

            string subContext = string.Empty;

            response = this.requestFactory.SpeechToText(FileName, speechContext, xArgData, contentLanguage, subContext, AudioContentType);

            if (null != response)
            {
                return(response);
            }
        }
        catch (InvalidScopeException invalidscope)
        {
            Debug.Log(invalidscope.Message);
        }
        catch (ArgumentException argex)
        {
            Debug.Log(argex.Message);
        }
        catch (InvalidResponseException ie)
        {
            Debug.Log(ie.Body);
        }
        catch (Exception ex)
        {
            Debug.Log(ex.Message);
        }
        finally
        {
            Debug.Log("SpeechToTextService completed.");
        }

        return(response);
    }