protected void Init(ERecognizerLanguage recognizingLanguage, int windowSize, float amplitudeThreshold) { switch (recognizingLanguage) { case ERecognizerLanguage.Japanese: currentVowels = vowelsByFormantJP; currentVowelFormantCeilValues = vowelFormantFloorJP; break; case ERecognizerLanguage.Chinese: currentVowels = vowelsByFormantCN; currentVowelFormantCeilValues = vowelFormantFloorCN; break; } this.windowSize = Mathf.ClosestPowerOfTwo(windowSize); this.amplitudeThreshold = amplitudeThreshold; this.smoothedAudioSpectrum = new float[this.windowSize]; this.peakValues = new float[FORMANT_COUNT]; this.peakPositions = new int[FORMANT_COUNT]; this.formantArray = new float[FORMANT_COUNT]; this.gaussianFilter = MathToolBox.GenerateGaussianFilter(FILTER_SIZE, FILTER_DEVIATION_SQUARE); }
private void OnEnable() { if (model == null) { model = new LpcModel(); window = 30; step = 15; } language = ERecognizerLanguage.Japanese; debugFolder = true; }
public LipSyncRuntimeRecognizer(ERecognizerLanguage recognizingLanguage, int windowSize, float amplitudeThreshold) { this.recognizingLanguage = recognizingLanguage; this.windowSize = Mathf.ClosestPowerOfTwo(windowSize); this.playingAudioData = new float[this.windowSize]; this.playingAudioSpectrum = new float[this.windowSize]; this.amplitudeThreshold = amplitudeThreshold; this.gaussianFilter = MathToolBox.GenerateGaussianFilter(FILTER_SIZE, FILTER_DEVIATION_SQUARE); this.smoothedAudioSpectrum = new float[this.windowSize]; this.peakValues = new float[FORMANT_COUNT]; this.peakPositions = new int[FORMANT_COUNT]; this.formantArray = new float[FORMANT_COUNT]; }
public LipSyncOfflineRecognizer(ERecognizerLanguage recognizingLanguage, float amplitudeThreshold, int windowSize, int shiftStepSize) { this.recognizingLanguage = recognizingLanguage; this.windowSize = Mathf.ClosestPowerOfTwo(windowSize); this.shiftStepSize = shiftStepSize; this.amplitudeThreshold = amplitudeThreshold; this.gaussianFilter = MathToolBox.GenerateGaussianFilter(FILTER_SIZE, FILTER_DEVIATION_SQUARE); this.windowArray = MathToolBox.GenerateWindow(windowSize, MathToolBox.EWindowType.Hamming); this.smoothedAudioSpectrum = new float[this.windowSize]; this.peakValues = new float[FORMANT_COUNT]; this.peakPositions = new int[FORMANT_COUNT]; this.formantArray = new float[FORMANT_COUNT]; }
public LipSyncOfflineRecognizer(ERecognizerLanguage recognizingLanguage, float amplitudeThreshold, int windowSize, int shiftStepSize) { base.Init(recognizingLanguage, windowSize, amplitudeThreshold); this.shiftStepSize = shiftStepSize; this.windowArray = MathToolBox.GenerateWindow(windowSize, MathToolBox.EWindowType.Hamming); }
private void OnGUI() { GUILayout.BeginVertical(); GUILayout.Space(10); audioClip = (AudioClip)EditorGUILayout.ObjectField("Audio Clip", audioClip, typeof(AudioClip), false); GUILayout.Space(4); EditorGUILayout.BeginVertical(EditorStyles.textField); { if (audioClip) { var pat = AssetDatabase.GetAssetPath(audioClip); pat = pat.Substring(pat.LastIndexOf('/') + 1); EditorGUILayout.BeginHorizontal(); EditorGUILayout.BeginVertical(); EditorGUILayout.LabelField(pat); EditorGUILayout.LabelField(string.Format("时 长 : {0:f2}", audioClip.length)); EditorGUILayout.LabelField("声 道 : " + audioClip.channels); int steps = CulSteps(out var w, out var s); EditorGUILayout.LabelField("帧 数 : " + steps); EditorGUILayout.LabelField(string.Format("窗 口 : {0}帧, {1:f2}秒", w, s)); EditorGUILayout.LabelField("采样率 : " + audioClip.frequency); EditorGUILayout.EndVertical(); EditorGUILayout.Space(); tex = AssetPreview.GetAssetPreview(audioClip); GUIContent content = new GUIContent(tex, "wave"); EditorGUILayout.LabelField(content, GUILayout.MinHeight(120)); EditorGUILayout.EndHorizontal(); model.fs = audioClip.frequency; } } EditorGUILayout.EndVertical(); window = EditorGUILayout.IntField("window", window); step = EditorGUILayout.IntField("step", step); language = (ERecognizerLanguage)EditorGUILayout.EnumPopup("language", language); switch (language) { case ERecognizerLanguage.Japanese: selectedVowels = LipSyncRecognizer.vowelsByFormantJP; currentVowelFormantCeilValues = LipSyncRecognizer.vowelFormantFloorJP; break; case ERecognizerLanguage.Chinese: selectedVowels = LipSyncRecognizer.vowelsByFormantCN; currentVowelFormantCeilValues = LipSyncRecognizer.vowelFormantFloorCN; break; } GUILayout.Space(4); if (GUILayout.Button("Analy")) { Normalize(); var rst = model.Analy(audioBuffer, window, step); VowelsInfo(rst); } GUILayout.Space(4); debugFolder = EditorGUILayout.Foldout(debugFolder, "debug tools"); if (debugFolder) { GUILayout.BeginHorizontal(); if (GUILayout.Button("root")) { float[] poly = new float[] { -4, 0, 1 }; var ret = model.FindRoots(poly); foreach (var it in ret) { Debug.Log(it); } } if (GUILayout.Button("c-root")) { double[] poly = new Double[] { 4, 0, 1 }; var roots = model.FindCRoots(poly); for (int i = 0; i < roots.Length; i++) { Debug.Log("i: " + roots[i]); } } if (GUILayout.Button("correlate")) { var a = new float[] { 0.3f, 0.1f, 0.2f, 0.4f, 0.3f, 0.5f, -1.6f, -2.5f, 1.6f, 3.2f, 1.34f, -4.1f, -5.34f }; var t = model.Correlate(a, a); string str = ""; for (int i = 0; i < t.Length; i++) { str += t[i].ToString("f3") + " "; } Debug.Log(str); } if (GUILayout.Button("toeplitz")) { var c = new double[] { 4, -2.6, 1.7, 4.3, 11, 21, 1.3, -3, 4, 11, 9, -4, 7, 12, 0.3, -7.0 }; ToeplitzMtrix toeplitzMtrix = new ToeplitzMtrix(c); Debug.Log(toeplitzMtrix); var t = toeplitzMtrix.Inverse(); int n = (int)Math.Sqrt((double)t.Length); string msg = "size: " + n; for (int i = 0; i < n; i++) { msg += "\n"; for (int j = 0; j < n; j++) { msg += t[i, j].ToString("f3") + "\t"; } } Debug.Log(msg); } GUILayout.EndHorizontal(); } GUILayout.Space(8); GUILayout.Label(vowelsInfo); GUILayout.EndVertical(); }
public LipSyncRuntimeRecognizer(ERecognizerLanguage recognizingLanguage, int windowSize, float amplitudeThreshold) { Init(recognizingLanguage, windowSize, amplitudeThreshold); playingAudioSpectrum = new float[this.windowSize]; }