public static OVRLipSyncSequence CreateSequenceFromAudioClip( AudioClip clip, bool useOfflineModel = false) { OVRLipSyncSequence sequence = null; if (clip.channels > 2) { Debug.LogError(clip.name + ": Cannot process phonemes from an audio clip with " + "more than 2 channels"); return(null); } if (clip.loadType != AudioClipLoadType.DecompressOnLoad) { Debug.LogError(clip.name + ": Cannot process phonemes from an audio clip unless " + "its load type is set to DecompressOnLoad."); return(null); } if (OVRLipSync.Initialize(clip.frequency, sSampleSize) != OVRLipSync.Result.Success) { Debug.LogError("Could not create Lip Sync engine."); return(null); } if (clip.loadState != AudioDataLoadState.Loaded) { Debug.LogError("Clip is not loaded!"); return(null); } uint context = 0; OVRLipSync.Result result = useOfflineModel ? OVRLipSync.CreateContextWithModelFile( ref context, OVRLipSync.ContextProviders.Enhanced, Path.Combine(Application.dataPath, "Oculus/LipSync/Assets/OfflineModel/ovrlipsync_offline_model.pb")) : OVRLipSync.CreateContext(ref context, OVRLipSync.ContextProviders.Enhanced); if (result != OVRLipSync.Result.Success) { Debug.LogError("Could not create Phoneme context. (" + result + ")"); OVRLipSync.Shutdown(); return(null); } List <OVRLipSync.Frame> frames = new List <OVRLipSync.Frame>(); float[] samples = new float[sSampleSize * clip.channels]; OVRLipSync.Frame dummyFrame = new OVRLipSync.Frame(); OVRLipSync.ProcessFrame( context, samples, dummyFrame, (clip.channels == 2) ? true : false ); // frame delay in ms float frameDelayInMs = dummyFrame.frameDelay; int frameOffset = (int)(frameDelayInMs * clip.frequency / 1000); int totalSamples = clip.samples; for (int x = 0; x < totalSamples + frameOffset; x += sSampleSize) { int remainingSamples = totalSamples - x; if (remainingSamples >= sSampleSize) { clip.GetData(samples, x); } else if (remainingSamples > 0) { float[] samples_clip = new float[remainingSamples * clip.channels]; clip.GetData(samples_clip, x); Array.Copy(samples_clip, samples, samples_clip.Length); Array.Clear(samples, samples_clip.Length, samples.Length - samples_clip.Length); } else { Array.Clear(samples, 0, samples.Length); } OVRLipSync.Frame frame = new OVRLipSync.Frame(); if (clip.channels == 2) { // interleaved = stereo data, alternating floats OVRLipSync.ProcessFrame(context, samples, frame); } else { // mono OVRLipSync.ProcessFrame(context, samples, frame, false); } if (x < frameOffset) { continue; } frames.Add(frame); } Debug.Log(clip.name + " produced " + frames.Count + " viseme frames, playback rate is " + (frames.Count / clip.length) + " fps"); OVRLipSync.DestroyContext(context); OVRLipSync.Shutdown(); sequence = ScriptableObject.CreateInstance <OVRLipSyncSequence>(); sequence.entries = frames; sequence.length = clip.length; return(sequence); }
public static OVRLipSyncSequence CreateSequenceFromAudioClip(AudioClip clip) { OVRLipSyncSequence sequence = null; if (clip.channels > 2) { Debug.LogError(clip.name + ": Cannot process phonemes from an audio clip with " + "more than 2 channels"); return(null); } if (clip.loadType != AudioClipLoadType.DecompressOnLoad) { Debug.LogError(clip.name + ": Cannot process phonemes from an audio clip unless " + "its load type is set to DecompressOnLoad."); return(null); } if (OVRLipSync.Initialize(clip.frequency, sSampleSize) != OVRLipSync.Result.Success) { Debug.LogError("Could not create Lip Sync engine."); return(null); } if (clip.loadState != AudioDataLoadState.Loaded) { Debug.LogError("Clip is not loaded!"); return(null); } uint context = 0; OVRLipSync.Result result = OVRLipSync.CreateContext(ref context, OVRLipSync.ContextProviders.Enhanced); if (result != OVRLipSync.Result.Success) { Debug.LogError("Could not create Phoneme context. (" + result + ")"); OVRLipSync.Shutdown(); return(null); } List <OVRLipSync.Frame> frames = new List <OVRLipSync.Frame>(); float[] samples = new float[sSampleSize * clip.channels]; int totalSamples = clip.samples; for (int x = 0; x < totalSamples; x += sSampleSize) { // GetData loops at the end of the read. Prevent that when it happens. if (x + samples.Length > totalSamples) { samples = new float[(totalSamples - x) * clip.channels]; } clip.GetData(samples, x); OVRLipSync.Frame frame = new OVRLipSync.Frame(); if (clip.channels == 2) { // interleaved = stereo data, alternating floats OVRLipSync.ProcessFrame(context, samples, frame); } else { // mono OVRLipSync.ProcessFrame(context, samples, frame, OVRLipSync.AudioDataType.F32_Mono); } frames.Add(frame); } Debug.Log(clip.name + " produced " + frames.Count + " viseme frames, playback rate is " + (frames.Count / clip.length) + " fps"); OVRLipSync.DestroyContext(context); OVRLipSync.Shutdown(); sequence = ScriptableObject.CreateInstance <OVRLipSyncSequence>(); sequence.entries = frames; sequence.length = clip.length; return(sequence); }