Пример #1
0
    private IEnumerator RecordingHandler()
    {
        Log.Debug("ExampleStreaming.RecordingHandler()", "devices: {0}", Microphone.devices);
        _recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ);
        yield return(null);      // let _recordingRoutine get set..

        if (_recording == null)
        {
            StopRecording();
            yield break;
        }

        bool bFirstBlock = true;
        int  midPoint    = _recording.samples / 2;

        float[] samples = null;

        while (_recordingRoutine != 0 && _recording != null)
        {
            int writePos = Microphone.GetPosition(_microphoneID);
            if (writePos > _recording.samples || !Microphone.IsRecording(_microphoneID))
            {
                Log.Error("ExampleStreaming.RecordingHandler()", "Microphone disconnected.");

                StopRecording();
                yield break;
            }

            if ((bFirstBlock && writePos >= midPoint) ||
                (!bFirstBlock && writePos < midPoint))
            {
                // front block is recorded, make a RecordClip and pass it onto our callback.
                samples = new float[midPoint];
                _recording.GetData(samples, bFirstBlock ? 0 : midPoint);

                AudioData record = new AudioData();
                record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
                record.Clip     = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false);
                record.Clip.SetData(samples, 0);

                _speechToText.OnListen(record);

                bFirstBlock = !bFirstBlock;
            }
            else
            {
                // calculate the number of samples remaining until we ready for a block of audio,
                // and wait that amount of time it will take to record.
                int   remaining     = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos);
                float timeRemaining = (float)remaining / (float)_recordingHZ;

                yield return(new WaitForSeconds(timeRemaining));
            }
        }

        yield break;
    }
Пример #2
0
        private void OnAudio(Data data)
        {
            if (!Active)
            {
                Active = true;
            }

            m_SpeechToText.OnListen((AudioData)data);
        }
Пример #3
0
        private void OnAudio(Data data)
        {
            if (!Active)
            {
                Active = true;
            }

            m_STT.OnListen((AudioData)data);
        }
Пример #4
0
    private IEnumerator RecordingHandler()
    {
        Debug.LogFormat("Start recording. devices: {0}", microphoneID);
        recording = Microphone.Start(microphoneID, true, recordingBufferSize, recordingHZ);
        yield return(null);

        if (recording == null)
        {
            StopRecording();
            yield break;
        }

        bool bFirstBlock = true;
        int  midPoint    = recording.samples / 2;

        float[] samples = null;

        while (recordingRoutine != 0 && recording != null)
        {
            int writePos = Microphone.GetPosition(microphoneID);
            if (writePos > recording.samples || !Microphone.IsRecording(microphoneID))
            {
                Debug.LogErrorFormat("Recording Error. Microphone disconnected.");

                StopRecording();
                yield break;
            }

            if ((bFirstBlock && writePos >= midPoint) ||
                (!bFirstBlock && writePos < midPoint))
            {
                samples = new float[midPoint];
                recording.GetData(samples, bFirstBlock ? 0 : midPoint);

                AudioData record = new AudioData();
                record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
                record.Clip     = AudioClip.Create("Recording", midPoint, recording.channels, recordingHZ, false);
                record.Clip.SetData(samples, 0);

                sttService.OnListen(record);

                bFirstBlock = !bFirstBlock;
            }
            else
            {
                int   remaining     = bFirstBlock ? (midPoint - writePos) : (recording.samples - writePos);
                float timeRemaining = (float)remaining / (float)recordingHZ;

                yield return(new WaitForSeconds(timeRemaining));
            }
        }

        yield break;
    }
Пример #5
0
    private IEnumerator RecordingHandler()
    {
        Log.Debug("RecordingHandler()", "Aygıtlar: {0}", Microphone.devices);
        _recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ);
        yield return(null);

        if (_recording == null)
        {
            StopRecording();
            yield break;
        }

        bool bFirstBlock = true;
        int  midPoint    = _recording.samples / 2;

        float[] samples = null;

        while (_recordingRoutine != 0 && _recording != null)
        {
            int writePos = Microphone.GetPosition(_microphoneID);
            if (writePos > _recording.samples || !Microphone.IsRecording(_microphoneID))
            {
                Log.Error("RecordingHandler()", "Mikrofon bağlantısı kesildi.");

                StopRecording();
                yield break;
            }

            if ((bFirstBlock && writePos >= midPoint) ||
                (!bFirstBlock && writePos < midPoint))
            {
                samples = new float[midPoint];
                _recording.GetData(samples, bFirstBlock ? 0 : midPoint);

                AudioData record = new AudioData();
                record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
                record.Clip     = AudioClip.Create("Kaydediliyor", midPoint, _recording.channels, _recordingHZ, false);
                record.Clip.SetData(samples, 0);

                _speechToText.OnListen(record);

                bFirstBlock = !bFirstBlock;
            }
            else
            {
                int   remaining     = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos);
                float timeRemaining = (float)remaining / (float)_recordingHZ;

                yield return(new WaitForSeconds(timeRemaining));
            }
        }

        yield break;
    }
Пример #6
0
    private void PushAudioChunk()
    {
        int endPosition = Microphone.GetPosition(Microphone.devices[0]);

        if (endPosition == _audioChunkStartPosition)
        {
            //no data to send
            return;
        }

        AudioData recording = new AudioData();

        float[] speechAudioData;
        int     newClipLength;

        if (endPosition > _audioChunkStartPosition)
        {
            newClipLength   = endPosition - _audioChunkStartPosition + 1;
            speechAudioData = new float[newClipLength * _rollingAudioClip.channels];
            _rollingAudioClip.GetData(speechAudioData, _audioChunkStartPosition);
        }
        else
        {
            // We've wrapped around the rolling audio clip. We have to take the audio from start position till the end of the rolling clip. Then, add clip from 0 to endPosition;
            int newClipLengthLeft  = _rollingAudioClip.samples - _audioChunkStartPosition + 1;
            int newClipLengthRight = endPosition + 1;

            float[] speechAudioDataLeft  = new float[newClipLengthLeft * _rollingAudioClip.channels];
            float[] speechAudioDataRight = new float[newClipLengthRight * _rollingAudioClip.channels];

            _rollingAudioClip.GetData(speechAudioDataLeft, _audioChunkStartPosition);
            _rollingAudioClip.GetData(speechAudioDataRight, 0);

            newClipLength   = speechAudioDataLeft.Length + speechAudioDataRight.Length;
            speechAudioData = new float[newClipLength];

            Array.Copy(speechAudioDataLeft, speechAudioData, newClipLengthLeft);
            Array.Copy(speechAudioDataRight, 0, speechAudioData, newClipLengthLeft, newClipLengthRight);
        }

        if (PlayBackAudio)
        {
            _playBackAudioData.AddRange(speechAudioData);
        }

        recording.Clip = AudioClip.Create("clip", newClipLength, _rollingAudioClip.channels, MIC_FREQUENCY, false);
        recording.Clip.SetData(speechAudioData, 0);

        _audioChunkStartPosition = endPosition;

        recording.MaxLevel = Mathf.Max(speechAudioData);

        _speechToText.OnListen(recording);
    }
Пример #7
0
    private IEnumerator RecordingHandler()
    // derived from Watson Unity SDK: https://github.com/watson-developer-cloud/unity-sdk/blob/336ebba141337047fe95ece06e5034fa9818666e/Examples/ServiceExamples/Scripts/ExampleStreaming.cs#L115
    {
        _recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ);
        yield return(null);

        if (_recording == null)
        {
            StopRecording();
            yield break;
        }

        bool bFirstBlock = true;
        int  midPoint    = _recording.samples / 2;

        float[] samples = null;

        while (_recordingRoutine != 0 && _recording != null)
        {
            int writePos = Microphone.GetPosition(_microphoneID);
            if (writePos > _recording.samples || !Microphone.IsRecording(_microphoneID))
            {
                Log.Error("MicrophoneWidget", "Microphone disconnected.");

                StopRecording();
                yield break;
            }

            if ((bFirstBlock && writePos >= midPoint) ||
                (!bFirstBlock && writePos < midPoint))
            {
                samples = new float[midPoint];
                _recording.GetData(samples, bFirstBlock ? 0 : midPoint);

                AudioData record = new AudioData();
                record.MaxLevel = Mathf.Max(samples);
                record.Clip     = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false);
                record.Clip.SetData(samples, 0);

                _speechToText.OnListen(record);

                bFirstBlock = !bFirstBlock;
            }
            else
            {
                int   remaining     = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos);
                float timeRemaining = (float)remaining / (float)_recordingHZ;

                yield return(new WaitForSeconds(timeRemaining));
            }
        }

        yield break;
    }
    private IEnumerator RecordingHandler()
    {
        Log.Debug("ExampleStreamingSplitSamples.RecordingHandler()", "devices: {0}", Microphone.devices);
        //  Start recording
        _recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ);
        yield return(null);

        if (_recording == null)
        {
            StopRecording();
            yield break;
        }

#if ENABLE_TIME_LOGGING
        //  Set a reference to now to check timing
        DateTime now = DateTime.Now;
#endif

        //  Current sample segment number
        int sampleSegmentNum = 0;

        //  Size of the sample segment in samples
        int sampleSegmentSize = _recording.samples / _sampleSegments;

        //  Init samples
        float[] samples = null;

        while (_recordingRoutine != 0 && _recording != null)
        {
            //  Get the mic position
            int microphonePosition = Microphone.GetPosition(_microphoneID);
            if (microphonePosition > _recording.samples || !Microphone.IsRecording(_microphoneID))
            {
                Log.Error("ExampleStreamingSplitSamples.RecordingHandler()", "Microphone disconnected.");

                StopRecording();
                yield break;
            }

            int sampleStart = sampleSegmentSize * sampleSegmentNum;
            int sampleEnd   = sampleSegmentSize * (sampleSegmentNum + 1);

#if ENABLE_DEBUGGING
            Log.Debug("ExampleStreamingSplitSamples.RecordinHandler", "microphonePosition: {0} | sampleStart: {1} | sampleEnd: {2} | sampleSegmentNum: {3}",
                      microphonePosition.ToString(),
                      sampleStart.ToString(),
                      sampleEnd.ToString(),
                      sampleSegmentNum.ToString());
#endif
            //If the write position is past the end of the sample segment or if write position is before the start of the sample segment
            while (microphonePosition > sampleEnd || microphonePosition < sampleStart)
            {
                //  Init samples
                samples = new float[sampleSegmentSize];
                //  Write data from recording into samples starting from the sampleSegmentStart
                _recording.GetData(samples, sampleStart);

                //  Create AudioData and use the samples we just created
                AudioData record = new AudioData();
                record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
                record.Clip     = AudioClip.Create("Recording", sampleSegmentSize, _recording.channels, _recordingHZ, false);
                record.Clip.SetData(samples, 0);

                //  Send the newly created AudioData to the service
                _service.OnListen(record);

                //  Iterate or reset sampleSegmentNum
                if (sampleSegmentNum < _sampleSegments - 1)
                {
                    sampleSegmentNum++;
#if ENABLE_DEBUGGING
                    Log.Debug("ExampleStreamingSplitSamples.RecordingHandler()", "Iterating sampleSegmentNum: {0}", sampleSegmentNum);
#endif
                }
                else
                {
                    sampleSegmentNum = 0;
#if ENABLE_DEBUGGING
                    Log.Debug("ExampleStreamingSplitSamples.RecordingHandler()", "Resetting sampleSegmentNum: {0}", sampleSegmentNum);
#endif
                }

#if ENABLE_TIME_LOGGING
                Log.Debug("ExampleStreamingSplitSamples.RecordingHandler", "Sending data - time since last transmission: {0} ms", Mathf.Floor((float)(DateTime.Now - now).TotalMilliseconds));
                now = DateTime.Now;
#endif
                sampleStart = sampleSegmentSize * sampleSegmentNum;
                sampleEnd   = sampleSegmentSize * (sampleSegmentNum + 1);
            }

            yield return(0);
        }

        yield break;
    }
Пример #9
0
    private IEnumerator RecordingHandler()
    {
        //temp.text += " Recording handler called ";
        m_Recording = Microphone.Start(m_MicrophoneID, true, m_RecordingBufferSize, m_RecordingHZ);

        yield return(null);             // let m_RecordingRoutine get set..

        if (m_Recording == null)
        {
            //temp.text += " m_Recording is null ";
            StopRecording();
            yield break;
        }
        else
        {
            //temp.text += " mike is not null";
        }

        bool bFirstBlock = true;
        int  midPoint    = m_Recording.samples / 2;

        float[] samples = null;

        while (m_RecordingRoutine != 0 && m_Recording != null)
        {
            int writePos = Microphone.GetPosition(m_MicrophoneID);
            //temp.text += " writePos is " + writePos.ToString ();
            if (writePos > m_Recording.samples || !Microphone.IsRecording(m_MicrophoneID))
            {
                Log.Error("MicrophoneWidget", "Microphone disconnected.");
                temp.text += " Problem with the mike ";
                StopRecording();
                yield break;
            }

            if ((bFirstBlock && writePos >= midPoint) ||
                (!bFirstBlock && writePos < midPoint))
            {
                // front block is recorded, make a RecordClip and pass it onto our callback.
                samples = new float[midPoint];
                m_Recording.GetData(samples, bFirstBlock ? 0 : midPoint);
                //temp.text += " Passing to callback ";
                AudioData record = new AudioData();
                record.MaxLevel = Mathf.Max(samples);
                record.Clip     = AudioClip.Create("Recording", midPoint, m_Recording.channels, m_RecordingHZ, false);
                record.Clip.SetData(samples, 0);

                m_SpeechToText.OnListen(record);

                bFirstBlock = !bFirstBlock;
            }
            else
            {
                // calculate the number of samples remaining until we ready for a block of audio,
                // and wait that amount of time it will take to record.
                int   remaining     = bFirstBlock ? (midPoint - writePos) : (m_Recording.samples - writePos);
                float timeRemaining = (float)remaining / (float)m_RecordingHZ;
                //temp.text += " Waiting for audio sample to finish ";
                yield return(new WaitForSeconds(timeRemaining));
            }
        }

        yield break;
    }
Пример #10
0
    private IEnumerator RecordingHandler()
    {
        m_Recording = Microphone.Start(null, true, m_RecordingBufferSize, m_RecordingHZ);
        Debug.Log("Initialising microphone");
        yield return(null);      // let m_RecordingRoutine get set..


        //If the recording doesn't initialise properly
        if (m_Recording == null)
        {
            //Stop recording
            StopRecording();
            //Break out of function
            yield break;
        }

        bool bFirstBlock = true;
        int  midPoint    = m_Recording.samples / 2;

        float[] samples = null;

        //While our recording routine is still running and the recording isn't null
        while (m_RecordingRoutine != 0 && m_Recording != null)
        {
            //Get the position to write to
            int writePos = Microphone.GetPosition(null);
            //If we are going to overload the samples array or the mic isn't recording anymore
            if (writePos > m_Recording.samples || !Microphone.IsRecording(null))
            {
                Log.Error("MicrophoneWidget", "Microphone disconnected.");

                //Stop recording
                StopRecording();
                yield break;
            }

            //Recording is done in two halves for some reason
            if ((bFirstBlock && writePos >= midPoint) ||
                (!bFirstBlock && writePos < midPoint))
            {
                // front block is recorded, make a RecordClip and pass it onto our callback.
                samples = new float[midPoint];
                m_Recording.GetData(samples, bFirstBlock ? 0 : midPoint);

                AudioData record = new AudioData();
                record.MaxLevel = Mathf.Max(samples);
                record.Clip     = AudioClip.Create("Recording", midPoint, m_Recording.channels, m_RecordingHZ, false);
                record.Clip.SetData(samples, 0);

                m_SpeechToText.OnListen(record);

                bFirstBlock = !bFirstBlock;
            }
            else
            {
                // calculate the number of samples remaining until we ready for a block of audio,
                // and wait that amount of time it will take to record.
                int   remaining     = bFirstBlock ? (midPoint - writePos) : (m_Recording.samples - writePos);
                float timeRemaining = (float)remaining / (float)m_RecordingHZ;

                yield return(new WaitForSeconds(timeRemaining));
            }
        }

        yield break;
    }
Пример #11
0
/**
**/

using UnityEngine;
using System.Collections;
using IBM.Watson.DeveloperCloud.Logging;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;
using IBM.Watson.DeveloperCloud.Utilities;
using IBM.Watson.DeveloperCloud.DataTypes;
using System.Collections.Generic;
using UnityEngine.UI;

using IBM.Watson.DeveloperCloud.Services.TextToSpeech.v1;
using IBM.Watson.DeveloperCloud.Connection;
using System;

public class ExampleStreaming : MonoBehaviour
{

    // STT - BURNER CREDS - DELETE AFTER RECORDING
    private string _username_STT = "";
    private string _password_STT = "";
    private string _url_STT = "https://stream.watsonplatform.net/speech-to-text/api";
    public Text ResultsField;

    private int _recordingRoutine = 0;
    private string _microphoneID = null;
    private AudioClip _recording = null;
    private int _recordingBufferSize = 1;
    private int _recordingHZ = 22050;

    private SpeechToText _speechToText;

    // TEXT TO SPEECH - BURNER CREDENTIALS FOR PUBLIC DEMO I WILL DELETE AFTER RECORDING
    private string _username_TTS = "";
    private string _password_TTS = "";
    private string _url_TTS = "https://stream.watsonplatform.net/text-to-speech/api";

    TextToSpeech _textToSpeech;

    //string _testString = "<speak version=\"1.0\"><say-as interpret-as=\"letters\">I'm sorry</say-as>. <prosody pitch=\"150Hz\">This is Text to Speech!</prosody><express-as type=\"GoodNews\">I'm sorry. This is Text to Speech!</express-as></speak>";

    /// TEST STRINGS OK

    // Pitch Shifting
    //string _testString = "<speak version=\"1.0\"><prosody pitch=\"150Hz\">This is Text to Speech!</prosody></speak>";
    //string _testString = "<speak version=\"1.0\"><prosody pitch=\"250Hz\">This is Text to Speech!</prosody></speak>";
    //string _testString = "<speak version=\"1.0\"><prosody pitch=\"350Hz\">This is Text to Speech!</prosody></speak>";
    //string _testString = "<speak version=\"1.0\"><prosody pitch=\"350Hz\">hi</prosody></speak>";

    // Good news and sorrow and uncertainty - ref https://console.bluemix.net/docs/services/text-to-speech/SSML-expressive.html#expressive
    // <express-as type="GoodNews">This is Text to Speech!</express-as>
    string _testString = "<speak version=\"1.0\"><express-as type=\"GoodNews\">Hello! Good News! Text to Speech is Working!</express-as></speak>";
    //string _testString = "<speak version=\"1.0\"><express-as type=\"Apology\">I am terribly sorry for the quality of service you have received.</express-as></speak>";
    //string _testString = "<speak version=\"1.0\"><express-as type=\"Uncertainty\">Can you please explain it again? I am not sure I understand.</express-as></speak>";

    //string _testString = "<speak version=\"1.0\"><prosody pitch=\\\"350Hz\\\"><express-as type=\"Uncertainty\">Can you please explain it again? I am confused and I'm not sure I understand.</express-as></prosody></speak>";


    string _createdCustomizationId;
    CustomVoiceUpdate _customVoiceUpdate;
    string _customizationName = "unity-example-customization";
    string _customizationLanguage = "en-US";
    string _customizationDescription = "A text to speech voice customization created within Unity.";
    string _testWord = "Watson";

    private bool _synthesizeTested = false;
    private bool _getVoicesTested = false;
    private bool _getVoiceTested = false;
    private bool _getPronuciationTested = false;
    private bool _getCustomizationsTested = false;
    private bool _createCustomizationTested = false;
    private bool _deleteCustomizationTested = false;
    private bool _getCustomizationTested = false;
    private bool _updateCustomizationTested = false;
    private bool _getCustomizationWordsTested = false;
    private bool _addCustomizationWordsTested = false;
    private bool _deleteCustomizationWordTested = false;
    private bool _getCustomizationWordTested = false;



    void Start()
    {
        LogSystem.InstallDefaultReactors();

        //  Create credential and instantiate service
        Credentials credentials_STT = new Credentials(_username_STT, _password_STT, _url_STT);
        Credentials credentials_TTS = new Credentials(_username_TTS, _password_TTS, _url_TTS);

        _speechToText = new SpeechToText(credentials_STT);
        _textToSpeech = new TextToSpeech(credentials_TTS);

        Active = true;
        StartRecording();

        Runnable.Run(Examples());

    }

    public bool Active
    {
        get { return _speechToText.IsListening; }
        set
        {
            if (value && !_speechToText.IsListening)
            {
                _speechToText.DetectSilence = true;
                _speechToText.EnableWordConfidence = true;
                _speechToText.EnableTimestamps = true;
                _speechToText.SilenceThreshold = 0.01f;
                _speechToText.MaxAlternatives = 0;
                _speechToText.EnableInterimResults = true;
                _speechToText.OnError = OnError;
                _speechToText.InactivityTimeout = -1;
                _speechToText.ProfanityFilter = false;
                _speechToText.SmartFormatting = true;
                _speechToText.SpeakerLabels = false;
                _speechToText.WordAlternativesThreshold = null;
                _speechToText.StartListening(OnRecognize, OnRecognizeSpeaker);
            }
            else if (!value && _speechToText.IsListening)
            {
                _speechToText.StopListening();
            }
        }
    }

 
    private void StartRecording()
    {
        if (_recordingRoutine == 0)
        {
            UnityObjectUtil.StartDestroyQueue();
            _recordingRoutine = Runnable.Run(RecordingHandler());
        }
    }

    private void StopRecording()
    {
        if (_recordingRoutine != 0)
        {
            Microphone.End(_microphoneID);
            Runnable.Stop(_recordingRoutine);
            _recordingRoutine = 0;
        }
    }

    private void OnError(string error)
    {
        Active = false;

        Log.Debug("ExampleStreaming.OnError()", "Error! {0}", error);
    }

    private IEnumerator RecordingHandler()
    {
        Log.Debug("ExampleStreaming.RecordingHandler()", "devices: {0}", Microphone.devices);
        _recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ);
        yield return null;      // let _recordingRoutine get set..

        if (_recording == null)
        {
            StopRecording();
            yield break;
        }

        bool bFirstBlock = true;
        int midPoint = _recording.samples / 2;
        float[] samples = null;

        while (_recordingRoutine != 0 && _recording != null)
        {
            int writePos = Microphone.GetPosition(_microphoneID);
            if (writePos > _recording.samples || !Microphone.IsRecording(_microphoneID))
            {
                Log.Error("ExampleStreaming.RecordingHandler()", "Microphone disconnected.");

                StopRecording();
                yield break;
            }

            if ((bFirstBlock && writePos >= midPoint)
              || (!bFirstBlock && writePos < midPoint))
            {
                // front block is recorded, make a RecordClip and pass it onto our callback.
                samples = new float[midPoint];
                _recording.GetData(samples, bFirstBlock ? 0 : midPoint);

                AudioData record = new AudioData();
                record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
                record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false);
                record.Clip.SetData(samples, 0);

                _speechToText.OnListen(record);

                bFirstBlock = !bFirstBlock;
            }
            else
            {
                // calculate the number of samples remaining until we ready for a block of audio, 
                // and wait that amount of time it will take to record.
                int remaining = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos);
                float timeRemaining = (float)remaining / (float)_recordingHZ;

                yield return new WaitForSeconds(timeRemaining);
            }

        }

        yield break;
    }

    private void OnRecognize(SpeechRecognitionEvent result,Dictionary<string, object> customData)
    {
        if (result != null && result.results.Length > 0)
        {
            foreach (var res in result.results)
            {
                foreach (var alt in res.alternatives)
                {
                    string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence);
                    Log.Debug("ExampleStreaming.OnRecognize()", text);
                    ResultsField.text = text;

                    if (alt.transcript.Contains("inhabitants") && ResultsField.text.Contains("Final")) // needs to be final or ECHO happens
                    {
                        _testString = "<speak version=\"1.0\"><express-as type=\"GoodNews\">The original inhabitants of Gippsland, the Kurnai Aboriginal people, have lived in this region for 20,000 years. </express-as></speak>";
                        Runnable.Run(Examples());

                    }
                    if (alt.transcript.Contains("immigration") && ResultsField.text.Contains("Final")) // needs to be final or ECHO happens
                    {
                        _testString = "<speak version=\"1.0\"><prosody pitch=\\\"350Hz\\\"><express-as type=\"GoodNews\">Oh The next big immigration wave was the selectors who moved in from 1875 onwards to set up small dairy farms all over Gippsland, but mainly in the Strzelecki ranges of west and south Gippsland</express-as></prosody></speak>";
                        Runnable.Run(Examples());
                    }  // Cannot ECHO the trigger condition (or be ready for loop

                    if (alt.transcript.Contains("happy") && ResultsField.text.Contains("Final")) // needs to be final or ECHO happens
                    {
                        _testString = "<speak version=\"1.0\"><prosody pitch=\\\"250Hz\\\"><express-as type=\"GoodNews\">It is so glad to hear that!</express-as></prosody></speak>";
                        Runnable.Run(Examples());
                    }  // Cannot ECHO the trigger condition (or be ready for loop

                  



                }

                if (res.keywords_result != null && res.keywords_result.keyword != null)
                {
                    foreach (var keyword in res.keywords_result.keyword)
                    {
                        Log.Debug("ExampleStreaming.OnRecognize()", "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}", keyword.normalized_text, keyword.confidence, keyword.start_time, keyword.end_time);
                    }
                }

                if (res.word_alternatives != null)
                {
                    foreach (var wordAlternative in res.word_alternatives)
                    {
                        Log.Debug("ExampleStreaming.OnRecognize()", "Word alternatives found. Start time: {0} | EndTime: {1}", wordAlternative.start_time, wordAlternative.end_time);
                        foreach (var alternative in wordAlternative.alternatives)
                            Log.Debug("ExampleStreaming.OnRecognize()", "\t word: {0} | confidence: {1}", alternative.word, alternative.confidence);
                    }
                }
            }
        }
    }

    private void OnRecognizeSpeaker(SpeakerRecognitionEvent result,Dictionary<string, object> customData)
    {
        //throw new NotImplementedException();
        if (result != null)
        {
            foreach (SpeakerLabelsResult labelResult in result.speaker_labels)
            {
                Log.Debug("ExampleStreaming.OnRecognize()", string.Format("speaker result: {0} | confidence: {3} | from: {1} | to: {2}", labelResult.speaker, labelResult.from, labelResult.to, labelResult.confidence));
            }
        }
    }


    // TTS CODE
    private IEnumerator Examples()
    {
        //  Synthesize
        Log.Debug("ExampleTextToSpeech.Examples()", "Attempting synthesize.");
        _textToSpeech.Voice = VoiceType.en_US_Allison;
        _textToSpeech.ToSpeech(HandleToSpeechCallback, OnFail, _testString, true);
        while (!_synthesizeTested)
            yield return null;

        //  Get Voices
        Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to get voices.");
        _textToSpeech.GetVoices(OnGetVoices, OnFail);
        while (!_getVoicesTested)
            yield return null;

        //  Get Voice
        Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to get voice {0}.", VoiceType.en_US_Allison);
        _textToSpeech.GetVoice(OnGetVoice, OnFail, VoiceType.en_US_Allison);
        while (!_getVoiceTested)
            yield return null;

        //  Get Pronunciation
        Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to get pronunciation of {0}", _testWord);
        _textToSpeech.GetPronunciation(OnGetPronunciation, OnFail, _testWord, VoiceType.en_US_Allison);
        while (!_getPronuciationTested)
            yield return null;

        //  Get Customizations
        //      Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to get a list of customizations");
        //      _textToSpeech.GetCustomizations(OnGetCustomizations, OnFail);
        //      while (!_getCustomizationsTested)
        //          yield return null;

        //  Create Customization
        //      Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to create a customization");
        //      _textToSpeech.CreateCustomization(OnCreateCustomization, OnFail, _customizationName, _customizationLanguage, _customizationDescription);
        //      while (!_createCustomizationTested)
        //          yield return null;

        //  Get Customization
        //      Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to get a customization");
        //      if (!_textToSpeech.GetCustomization(OnGetCustomization, OnFail, _createdCustomizationId))
        //          Log.Debug("ExampleTextToSpeech.Examples()", "Failed to get custom voice model!");
        //      while (!_getCustomizationTested)
        //          yield return null;

        //  Update Customization
        //      Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to update a customization");
        //      Word[] wordsToUpdateCustomization =
        //      {
        //          new Word()
        //          {
        //              word = "hello",
        //              translation = "hullo"
        //          },
        //          new Word()
        //          {
        //              word = "goodbye",
        //              translation = "gbye"
        //          },
        //          new Word()
        //          {
        //              word = "hi",
        //              translation = "ohioooo"
        //          }
        //      };

        //      _customVoiceUpdate = new CustomVoiceUpdate()
        //      {
        //          words = wordsToUpdateCustomization,
        //          description = "My updated description",
        //          name = "My updated name"
        //      };

        if (!_textToSpeech.UpdateCustomization(OnUpdateCustomization, OnFail, _createdCustomizationId, _customVoiceUpdate))
            Log.Debug("ExampleTextToSpeech.Examples()", "Failed to update customization!");
        while (!_updateCustomizationTested)
            yield return null;

        //  Get Customization Words
        //      Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to get a customization's words");
        //      if (!_textToSpeech.GetCustomizationWords(OnGetCustomizationWords, OnFail, _createdCustomizationId))
        //          Log.Debug("ExampleTextToSpeech.GetCustomizationWords()", "Failed to get {0} words!", _createdCustomizationId);
        //      while (!_getCustomizationWordsTested)
        //          yield return null;

        //  Add Customization Words
        //      Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to add words to a customization");
        //      Word[] wordArrayToAddToCustomization =
        //      {
        //          new Word()
        //          {
        //              word = "bananna",
        //              translation = "arange"
        //          },
        //          new Word()
        //          {
        //              word = "orange",
        //              translation = "gbye"
        //          },
        //          new Word()
        //          {
        //              word = "tomato",
        //              translation = "tomahto"
        //          }
        //      };

        //      Words wordsToAddToCustomization = new Words()
        //      {
        //          words = wordArrayToAddToCustomization
        //      };

        //      if (!_textToSpeech.AddCustomizationWords(OnAddCustomizationWords, OnFail, _createdCustomizationId, wordsToAddToCustomization))
        //          Log.Debug("ExampleTextToSpeech.AddCustomizationWords()", "Failed to add words to {0}!", _createdCustomizationId);
        //      while (!_addCustomizationWordsTested)
        //          yield return null;

        //  Get Customization Word
        //      Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to get the translation of a custom voice model's word.");
        //      string customIdentifierWord = wordsToUpdateCustomization[0].word;
        //      if (!_textToSpeech.GetCustomizationWord(OnGetCustomizationWord, OnFail, _createdCustomizationId, customIdentifierWord))
        //          Log.Debug("ExampleTextToSpeech.GetCustomizationWord()", "Failed to get the translation of {0} from {1}!", customIdentifierWord, _createdCustomizationId);
        //      while (!_getCustomizationWordTested)
        //          yield return null;

        //  Delete Customization Word
        Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to delete customization word from custom voice model.");
        string wordToDelete = "goodbye";
        if (!_textToSpeech.DeleteCustomizationWord(OnDeleteCustomizationWord, OnFail, _createdCustomizationId, wordToDelete))
            Log.Debug("ExampleTextToSpeech.DeleteCustomizationWord()", "Failed to delete {0} from {1}!", wordToDelete, _createdCustomizationId);
        while (!_deleteCustomizationWordTested)
            yield return null;

        //  Delete Customization
        Log.Debug("ExampleTextToSpeech.Examples()", "Attempting to delete a customization");
        if (!_textToSpeech.DeleteCustomization(OnDeleteCustomization, OnFail, _createdCustomizationId))
            Log.Debug("ExampleTextToSpeech.DeleteCustomization()", "Failed to delete custom voice model!");
        while (!_deleteCustomizationTested)
            yield return null;

        Log.Debug("ExampleTextToSpeech.Examples()", "Text to Speech examples complete.");
    }






    void HandleToSpeechCallback(AudioClip clip, Dictionary<string, object> customData = null)
    {
        PlayClip(clip);
    }

    private void PlayClip(AudioClip clip)
    {
        if (Application.isPlaying && clip != null)
        {
            GameObject audioObject = new GameObject("AudioObject");
            AudioSource source = audioObject.AddComponent<AudioSource>();
            source.spatialBlend = 0.0f;
            source.loop = false;
            source.clip = clip;
            source.Play();

            Destroy(audioObject, clip.length);

            _synthesizeTested = true;
        }
    }

    private void OnGetVoices(Voices voices, Dictionary<string, object> customData = null)
    {
        Log.Debug("ExampleTextToSpeech.OnGetVoices()", "Text to Speech - Get voices response: {0}", customData["json"].ToString());
        _getVoicesTested = true;
    }

    private void OnGetVoice(Voice voice, Dictionary<string, object> customData = null)
    {
        Log.Debug("ExampleTextToSpeech.OnGetVoice()", "Text to Speech - Get voice  response: {0}", customData["json"].ToString());
        _getVoiceTested = true;
    }

    private void OnGetPronunciation(Pronunciation pronunciation, Dictionary<string, object> customData = null)
    {
        Log.Debug("ExampleTextToSpeech.OnGetPronunciation()", "Text to Speech - Get pronunciation response: {0}", customData["json"].ToString());
        _getPronuciationTested = true;
    }

    //  private void OnGetCustomizations(Customizations customizations, Dictionary<string, object> customData = null)
    //  {
    //      Log.Debug("ExampleTextToSpeech.OnGetCustomizations()", "Text to Speech - Get customizations response: {0}", customData["json"].ToString());
    //      _getCustomizationsTested = true;
    //  }

    //  private void OnCreateCustomization(CustomizationID customizationID, Dictionary<string, object> customData = null)
    //  {
    //      Log.Debug("ExampleTextToSpeech.OnCreateCustomization()", "Text to Speech - Create customization response: {0}", customData["json"].ToString());
    //      _createdCustomizationId = customizationID.customization_id;
    //      _createCustomizationTested = true;
    //  }

    private void OnDeleteCustomization(bool success, Dictionary<string, object> customData = null)
    {
        Log.Debug("ExampleTextToSpeech.OnDeleteCustomization()", "Text to Speech - Delete customization response: {0}", customData["json"].ToString());
        _createdCustomizationId = null;
        _deleteCustomizationTested = true;
    }

    //  private void OnGetCustomization(Customization customization, Dictionary<string, object> customData = null)
    //  {
    //      Log.Debug("ExampleTextToSpeech.OnGetCustomization()", "Text to Speech - Get customization response: {0}", customData["json"].ToString());
    //      _getCustomizationTested = true;
    //  }

    private void OnUpdateCustomization(bool success, Dictionary<string, object> customData = null)
    {
        Log.Debug("ExampleTextToSpeech.OnUpdateCustomization()", "Text to Speech - Update customization response: {0}", customData["json"].ToString());
        _updateCustomizationTested = true;
    }

    //  private void OnGetCustomizationWords(Words words, Dictionary<string, object> customData = null)
    //  {
    //      Log.Debug("ExampleTextToSpeech.OnGetCustomizationWords()", "Text to Speech - Get customization words response: {0}", customData["json"].ToString());
    //      _getCustomizationWordsTested = true;
    //  }

    private void OnAddCustomizationWords(bool success, Dictionary<string, object> customData = null)
    {
        Log.Debug("ExampleTextToSpeech.OnAddCustomizationWords()", "Text to Speech - Add customization words response: {0}", customData["json"].ToString());
        _addCustomizationWordsTested = true;
    }

    private void OnDeleteCustomizationWord(bool success, Dictionary<string, object> customData = null)
    {
        Log.Debug("ExampleTextToSpeech.OnDeleteCustomizationWord()", "Text to Speech - Delete customization word response: {0}", customData["json"].ToString());
        _deleteCustomizationWordTested = true;
    }

    private void OnGetCustomizationWord(Translation translation, Dictionary<string, object> customData = null)
    {
        Log.Debug("ExampleTextToSpeech.OnGetCustomizationWord()", "Text to Speech - Get customization word response: {0}", customData["json"].ToString());
        _getCustomizationWordTested = true;
    }

    private void OnFail(RESTConnector.Error error, Dictionary<string, object> customData)
    {
        Log.Error("ExampleTextToSpeech.OnFail()", "Error received: {0}", error.ToString());
    }

}
Пример #12
0
    /*
     *  RecordingHandler() records the real-time broadcast from the device's microphone
     *  Recording relies on the microphone recording sample rate
     *  Warning : recordingHZ must be an even integer multiple of chunkSize
     *  Thanks to Michael Pickering - RMichaelPickering (GitHub) for the explanation how to
     *  reduce the important recording delay.
     */
    private IEnumerator RecordingHandler()
    {
        // Allows to keep track how much has been already written
        // Starts at zero
        int chunckEnd = 0;
        // Current read position of broadcast
        int readPosition = 0;
        // Factor for downsampling
        // Value 1 allows to not downsample
        int downSampleFactor = 1;
        // RMS value for sound level
        // It will be calculated per chunck of samples
        float rmsValue;
        // DB value of sound level
        // It will be calculated per chunck of samples
        float dbValue;

        // Float array of samples chunck for processing each chuck of audio samples
        float[] samplesChunk = null;

        Log.Debug("{0}", "devices: {1}", runningProcess, Microphone.devices);

        // Start recording
        // boolean value is for allowing looping records
        recording = Microphone.Start(microphoneID, true, recordingBufferSize, recordingHZ);

        Log.Debug("{0}", " Microphone Ring Buffer includes: {1} channels with a total of: {2} samples.", runningProcess, recording.channels.ToString(), recording.samples.ToString());



        // Microphone.Start returns null only on failure
        // Testing if the recording failed
        if (recording == null)
        {
            StopRecording();
            yield break;
        }

        // End of the first chuck is calculated with 'chuck * downSampleFactor -1'
        // First sample is at position zero
        chunckEnd = chunkSize * downSampleFactor - 1;

        // Calculate how long to wait for at least 1 audioChuck is ready
        yield return(new WaitForSecondsRealtime(chunkSize * downSampleFactor / recordingHZ));


        while (recordingRoutine != 0 && recording != null)
        {
            // Get current writePosition of the microphone in the recording
            int writePosition = Microphone.GetPosition(microphoneID);
            // Testing if the microphone is still recording
            if (!Microphone.IsRecording(microphoneID))
            {
                Log.Error("MicrophoneWidget", "Microphone disconnected.");
                StopRecording();
                yield break;
            }

            // Make sure that at least chunckSize samples have been written
            while (writePosition > readPosition + chunckEnd || writePosition < readPosition)
            {
                // at least one chunk is recorded, make a RecordClip and pass it onto our callback.
                // We are now sure that at least one chuck is recorded
                // Creation of a RecordClip
                samplesChunk = new float[chunkSize * downSampleFactor];
                recording.GetData(samplesChunk, readPosition);


                AudioData record = new AudioData();
                // 20171018 RMPickering - The next statement seems to be setting the MaxLevel to the highest value from the samples, not taking into account the negative values.
                // record.MaxLevel = Mathf.Max(samples);


                // Calculate the max level of the highest value from the samples
                // Don't take into account the negative values (only absolute values)
                float sumSquaredSamples  = 0; // sum squared samples
                float sumAbsoluteSamples = 0; // sum absolute values

                // Implementation of an anti-aliasing filter
                // Must be lower than 8000 Hz
                float CUTOFF = 6500.0f;
                float RC     = 1.0f / (CUTOFF * 2.0f * 3.14f);
                // Using initial sample rate
                float dt    = 1.0f / 16000.0f;
                float alpha = dt / (RC + dt);

                // Calculate RMS and DB values
                sumSquaredSamples  += samplesChunk[0] * samplesChunk[0];
                sumAbsoluteSamples += Mathf.Abs(samplesChunk[0]);

                // Application of the low pass filter
                int i = 0;
                for (i = 1; i < chunkSize * downSampleFactor; i++)
                {
                    // Low pass filter allows smoothing audio recorded above the cutoff frequency
                    samplesChunk[i]     = samplesChunk[i - 1] + alpha * (samplesChunk[i] - samplesChunk[i - 1]);
                    sumSquaredSamples  += samplesChunk[i] * samplesChunk[i]; // sum squared samples
                    sumAbsoluteSamples += Mathf.Abs(samplesChunk[i]);
                }

                // Calculate the square root of average = rmsValue
                rmsValue = Mathf.Sqrt(sumSquaredSamples / chunkSize);
                // Calculate the DB value
                dbValue = 20 * Mathf.Log10(rmsValue / refValue);
                // Set minimum dbValue to -160 dB
                if (dbValue < -160)
                {
                    dbValue = -160;
                }

                // Set MaxLevel
                record.MaxLevel = rmsValue;

                // Set the clip recorded
                record.Clip = AudioClip.Create("audioChunk", chunkSize, 1, recordingHZ, false);

                // Copy the audio samples from the array samplesChuck into the clip recorded
                record.Clip.SetData(samplesChunk, 0);

                // Send the recorded clip to IBM Watson Speech To Text
                speechToText.OnListen(record);

                // Remember which block has been copied
                readPosition += chunkSize * downSampleFactor;
                if (readPosition > recordingHZ * recording.channels - 1)
                {
                    //Reset readPosition to initial value and chunckEnd to begin a new buffer
                    readPosition = 0;
                    chunckEnd    = chunkSize * downSampleFactor - 1;
                }
                else
                {
                    chunckEnd += chunkSize * downSampleFactor;
                }
            }

            // Calculate wait time for nex Update and continue streaming of the micrphone
            yield return(new WaitForSecondsRealtime(chunkSize * downSampleFactor / recordingHZ));
        }

        yield break;
    }