示例#1
0
    public static OVRLipSyncSequence CreateSequenceFromAudioClip(
        AudioClip clip, bool useOfflineModel = false)
    {
        OVRLipSyncSequence sequence = null;

        if (clip.channels > 2)
        {
            Debug.LogError(clip.name +
                           ": Cannot process phonemes from an audio clip with " +
                           "more than 2 channels");
            return(null);
        }

        if (clip.loadType != AudioClipLoadType.DecompressOnLoad)
        {
            Debug.LogError(clip.name +
                           ": Cannot process phonemes from an audio clip unless " +
                           "its load type is set to DecompressOnLoad.");
            return(null);
        }

        if (OVRLipSync.Initialize(clip.frequency, sSampleSize) != OVRLipSync.Result.Success)
        {
            Debug.LogError("Could not create Lip Sync engine.");
            return(null);
        }

        if (clip.loadState != AudioDataLoadState.Loaded)
        {
            Debug.LogError("Clip is not loaded!");
            return(null);
        }

        uint context = 0;

        OVRLipSync.Result result = useOfflineModel
            ? OVRLipSync.CreateContextWithModelFile(
            ref context,
            OVRLipSync.ContextProviders.Enhanced,
            Path.Combine(Application.dataPath, "Oculus/LipSync/Assets/OfflineModel/ovrlipsync_offline_model.pb"))
            : OVRLipSync.CreateContext(ref context, OVRLipSync.ContextProviders.Enhanced);

        if (result != OVRLipSync.Result.Success)
        {
            Debug.LogError("Could not create Phoneme context. (" + result + ")");
            OVRLipSync.Shutdown();
            return(null);
        }

        List <OVRLipSync.Frame> frames = new List <OVRLipSync.Frame>();

        float[] samples = new float[sSampleSize * clip.channels];

        OVRLipSync.Frame dummyFrame = new OVRLipSync.Frame();
        OVRLipSync.ProcessFrame(
            context,
            samples,
            dummyFrame,
            (clip.channels == 2) ? true : false
            );
        // frame delay in ms
        float frameDelayInMs = dummyFrame.frameDelay;

        int frameOffset = (int)(frameDelayInMs * clip.frequency / 1000);

        int totalSamples = clip.samples;

        for (int x = 0; x < totalSamples + frameOffset; x += sSampleSize)
        {
            int remainingSamples = totalSamples - x;
            if (remainingSamples >= sSampleSize)
            {
                clip.GetData(samples, x);
            }
            else if (remainingSamples > 0)
            {
                float[] samples_clip = new float[remainingSamples * clip.channels];
                clip.GetData(samples_clip, x);
                Array.Copy(samples_clip, samples, samples_clip.Length);
                Array.Clear(samples, samples_clip.Length, samples.Length - samples_clip.Length);
            }
            else
            {
                Array.Clear(samples, 0, samples.Length);
            }

            OVRLipSync.Frame frame = new OVRLipSync.Frame();
            if (clip.channels == 2)
            {
                // interleaved = stereo data, alternating floats
                OVRLipSync.ProcessFrame(context, samples, frame);
            }
            else
            {
                // mono
                OVRLipSync.ProcessFrame(context, samples, frame, false);
            }

            if (x < frameOffset)
            {
                continue;
            }

            frames.Add(frame);
        }

        Debug.Log(clip.name + " produced " + frames.Count +
                  " viseme frames, playback rate is " + (frames.Count / clip.length) +
                  " fps");
        OVRLipSync.DestroyContext(context);
        OVRLipSync.Shutdown();

        sequence         = ScriptableObject.CreateInstance <OVRLipSyncSequence>();
        sequence.entries = frames;
        sequence.length  = clip.length;

        return(sequence);
    }
示例#2
0
    public void InitializeLipSync()
    {
        active = false;
        if (catsData == null)
        {
            InitCatsData();
        }
        if (!isCanned && clip != null)
        {
            Microphone.End(lastMic);
        }

        partialAudio = null;
        partialPos   = 0;

        audioSource = GetComponent <AudioSource>();
        if (useCanned && audioSource != null && audioSource.clip != null)
        {
            isCanned     = true;
            clip         = audioSource.clip;
            channels     = clip.channels;
            partialAudio = new float[1024 * channels];
            freq         = audioSource.clip.frequency;
            if (!inited)
            {
                if (OVRLipSync.IsInitialized() == OVRLipSync.Result.Success)
                {
                    DestroyContext();
                    OVRLipSync.Shutdown();
                }
                OVRLipSync.Initialize(freq, 1024);
                CreateContext();
                OVRLipSync.SendSignal(context, OVRLipSync.Signals.VisemeSmoothing, smoothAmount, 0);
                inited = true;
            }
            active = true;
            return;
        }

        isCanned = false;

        int minFreq;
        int maxFreq = AudioSettings.outputSampleRate;

        freq = maxFreq;

        lastMic = mic;
        if (mic != null)
        {
            Microphone.GetDeviceCaps(lastMic, out minFreq, out maxFreq);
        }
        if (maxFreq > 0)
        {
            freq = maxFreq;
        }

        if (!inited)
        {
            if (OVRLipSync.IsInitialized() == OVRLipSync.Result.Success)
            {
                DestroyContext();
                OVRLipSync.Shutdown();
            }
            OVRLipSync.Initialize(freq, 1024);
            CreateContext();
            OVRLipSync.SendSignal(context, OVRLipSync.Signals.VisemeSmoothing, smoothAmount, 0);
            inited = true;
        }

        clip         = Microphone.Start(lastMic, true, 1, freq);
        channels     = clip.channels;
        partialAudio = new float[1024 * channels];
        lastPos      = 0;
        active       = true;
    }
    public static OVRLipSyncSequence CreateSequenceFromAudioClip(AudioClip clip)
    {
        OVRLipSyncSequence sequence = null;

        if (clip.loadType != AudioClipLoadType.DecompressOnLoad || clip.channels > 2)
        {
            // todo: just fix the clip
            Debug.LogError("Cannot process phonemes from an audio clip unless its load type is set to DecompressOnLoad.");
        }
        else
        {
            if (OVRLipSync.Initialize(clip.frequency, sSampleSize) != OVRLipSync.Result.Success)
            {
                Debug.LogError("Could not create Lip Sync engine.");
            }
            else
            {
                uint context             = 0;
                OVRLipSync.Result result = OVRLipSync.CreateContext(ref context, OVRLipSync.ContextProviders.Main);
                if (result != OVRLipSync.Result.Success)
                {
                    Debug.LogError("Could not create Phoneme context. (" + result + ")");
                    OVRLipSync.Shutdown();
                }
                else
                {
                    List <OVRLipSync.Frame> frames = new List <OVRLipSync.Frame>();
                    float[] samples      = new float[sSampleSize * clip.channels];
                    int     totalSamples = clip.samples;
                    for (int x = 0; x < totalSamples; x += sSampleSize)
                    {
                        // GetData loops at the end of the read.  Prevent that when it happens.
                        if (x + samples.Length > totalSamples)
                        {
                            samples = new float[(totalSamples - x) * clip.channels];
                        }
                        clip.GetData(samples, x);
                        OVRLipSync.Frame frame = new OVRLipSync.Frame();
                        if (clip.channels == 2)
                        {
                            // interleaved = stereo data, alternating floats
                            OVRLipSync.ProcessFrameInterleaved(context, samples, 0, frame);
                        }
                        else
                        {
                            // mono
                            OVRLipSync.ProcessFrame(context, samples, 0, frame);
                        }

                        frames.Add(frame);
                    }

                    Debug.Log(clip.name + " produced " + frames.Count + " viseme frames, playback rate is " + (frames.Count / clip.length) + " fps");
                    OVRLipSync.DestroyContext(context);
                    OVRLipSync.Shutdown();

                    sequence         = ScriptableObject.CreateInstance <OVRLipSyncSequence>();
                    sequence.entries = frames;
                    sequence.length  = clip.length;
                }
            }
        }
        return(sequence);
    }
示例#4
0
    public static OVRLipSyncSequence CreateSequenceFromAudioClip(
        AudioClip clip, bool useOfflineModel = false)
    {
        OVRLipSyncSequence sequence = null;

        if (clip.channels > 2)
        {
            Debug.LogError(clip.name +
                           ": Cannot process phonemes from an audio clip with " +
                           "more than 2 channels");
            return(null);
        }

        if (clip.loadType != AudioClipLoadType.DecompressOnLoad)
        {
            Debug.LogError(clip.name +
                           ": Cannot process phonemes from an audio clip unless " +
                           "its load type is set to DecompressOnLoad.");
            return(null);
        }

        if (OVRLipSync.Initialize(clip.frequency, sSampleSize) != OVRLipSync.Result.Success)
        {
            Debug.LogError("Could not create Lip Sync engine.");
            return(null);
        }

        if (clip.loadState != AudioDataLoadState.Loaded)
        {
            Debug.LogError("Clip is not loaded!");
            return(null);
        }

        uint context = 0;

        OVRLipSync.Result result = useOfflineModel
            ? OVRLipSync.CreateContextWithModelFile(
            ref context,
            OVRLipSync.ContextProviders.Enhanced,
            Path.Combine(Application.dataPath, "Oculus/LipSync/Assets/OfflineModel/ovrlipsync_offline_model.pb"))
            : OVRLipSync.CreateContext(ref context, OVRLipSync.ContextProviders.Enhanced);

        if (result != OVRLipSync.Result.Success)
        {
            Debug.LogError("Could not create Phoneme context. (" + result + ")");
            OVRLipSync.Shutdown();
            return(null);
        }

        List <OVRLipSync.Frame> frames = new List <OVRLipSync.Frame>();

        float[] samples      = new float[sSampleSize * clip.channels];
        int     totalSamples = clip.samples;

        for (int x = 0; x < totalSamples; x += sSampleSize)
        {
            // GetData loops at the end of the read.  Prevent that when it happens.
            if (x + samples.Length > totalSamples)
            {
                samples = new float[(totalSamples - x) * clip.channels];
            }
            clip.GetData(samples, x);
            OVRLipSync.Frame frame = new OVRLipSync.Frame();
            if (clip.channels == 2)
            {
                // interleaved = stereo data, alternating floats
                OVRLipSync.ProcessFrame(context, samples, frame);
            }
            else
            {
                // mono
                OVRLipSync.ProcessFrame(context, samples, frame, false);
            }

            frames.Add(frame);
        }

        Debug.Log(clip.name + " produced " + frames.Count +
                  " viseme frames, playback rate is " + (frames.Count / clip.length) +
                  " fps");
        OVRLipSync.DestroyContext(context);
        OVRLipSync.Shutdown();

        sequence         = ScriptableObject.CreateInstance <OVRLipSyncSequence>();
        sequence.entries = frames;
        sequence.length  = clip.length;

        return(sequence);
    }