예제 #1
0
        protected void Init(ERecognizerLanguage recognizingLanguage, int windowSize, float amplitudeThreshold)
        {
            switch (recognizingLanguage)
            {
            case ERecognizerLanguage.Japanese:
                currentVowels = vowelsByFormantJP;
                currentVowelFormantCeilValues = vowelFormantFloorJP;
                break;

            case ERecognizerLanguage.Chinese:
                currentVowels = vowelsByFormantCN;
                currentVowelFormantCeilValues = vowelFormantFloorCN;
                break;
            }

            this.windowSize         = Mathf.ClosestPowerOfTwo(windowSize);
            this.amplitudeThreshold = amplitudeThreshold;

            this.smoothedAudioSpectrum = new float[this.windowSize];
            this.peakValues            = new float[FORMANT_COUNT];
            this.peakPositions         = new int[FORMANT_COUNT];
            this.formantArray          = new float[FORMANT_COUNT];

            this.gaussianFilter = MathToolBox.GenerateGaussianFilter(FILTER_SIZE, FILTER_DEVIATION_SQUARE);
        }
예제 #2
0
 private void OnEnable()
 {
     if (model == null)
     {
         model  = new LpcModel();
         window = 30;
         step   = 15;
     }
     language    = ERecognizerLanguage.Japanese;
     debugFolder = true;
 }
        public LipSyncRuntimeRecognizer(ERecognizerLanguage recognizingLanguage, int windowSize, float amplitudeThreshold)
        {
            this.recognizingLanguage  = recognizingLanguage;
            this.windowSize           = Mathf.ClosestPowerOfTwo(windowSize);
            this.playingAudioData     = new float[this.windowSize];
            this.playingAudioSpectrum = new float[this.windowSize];
            this.amplitudeThreshold   = amplitudeThreshold;
            this.gaussianFilter       = MathToolBox.GenerateGaussianFilter(FILTER_SIZE, FILTER_DEVIATION_SQUARE);

            this.smoothedAudioSpectrum = new float[this.windowSize];
            this.peakValues            = new float[FORMANT_COUNT];
            this.peakPositions         = new int[FORMANT_COUNT];
            this.formantArray          = new float[FORMANT_COUNT];
        }
        public LipSyncOfflineRecognizer(ERecognizerLanguage recognizingLanguage, float amplitudeThreshold, int windowSize, int shiftStepSize)
        {
            this.recognizingLanguage = recognizingLanguage;
            this.windowSize          = Mathf.ClosestPowerOfTwo(windowSize);
            this.shiftStepSize       = shiftStepSize;

            this.amplitudeThreshold = amplitudeThreshold;
            this.gaussianFilter     = MathToolBox.GenerateGaussianFilter(FILTER_SIZE, FILTER_DEVIATION_SQUARE);
            this.windowArray        = MathToolBox.GenerateWindow(windowSize, MathToolBox.EWindowType.Hamming);

            this.smoothedAudioSpectrum = new float[this.windowSize];
            this.peakValues            = new float[FORMANT_COUNT];
            this.peakPositions         = new int[FORMANT_COUNT];
            this.formantArray          = new float[FORMANT_COUNT];
        }
예제 #5
0
 public LipSyncOfflineRecognizer(ERecognizerLanguage recognizingLanguage, float amplitudeThreshold, int windowSize, int shiftStepSize)
 {
     base.Init(recognizingLanguage, windowSize, amplitudeThreshold);
     this.shiftStepSize = shiftStepSize;
     this.windowArray   = MathToolBox.GenerateWindow(windowSize, MathToolBox.EWindowType.Hamming);
 }
예제 #6
0
        private void OnGUI()
        {
            GUILayout.BeginVertical();
            GUILayout.Space(10);
            audioClip = (AudioClip)EditorGUILayout.ObjectField("Audio Clip", audioClip, typeof(AudioClip), false);
            GUILayout.Space(4);
            EditorGUILayout.BeginVertical(EditorStyles.textField);
            {
                if (audioClip)
                {
                    var pat = AssetDatabase.GetAssetPath(audioClip);
                    pat = pat.Substring(pat.LastIndexOf('/') + 1);
                    EditorGUILayout.BeginHorizontal();
                    EditorGUILayout.BeginVertical();
                    EditorGUILayout.LabelField(pat);
                    EditorGUILayout.LabelField(string.Format("时 长  : {0:f2}", audioClip.length));
                    EditorGUILayout.LabelField("声 道  : " + audioClip.channels);
                    int steps = CulSteps(out var w, out var s);
                    EditorGUILayout.LabelField("帧 数  : " + steps);
                    EditorGUILayout.LabelField(string.Format("窗 口  : {0}帧, {1:f2}秒", w, s));
                    EditorGUILayout.LabelField("采样率 : " + audioClip.frequency);
                    EditorGUILayout.EndVertical();
                    EditorGUILayout.Space();
                    tex = AssetPreview.GetAssetPreview(audioClip);
                    GUIContent content = new GUIContent(tex, "wave");
                    EditorGUILayout.LabelField(content, GUILayout.MinHeight(120));
                    EditorGUILayout.EndHorizontal();
                    model.fs = audioClip.frequency;
                }
            }
            EditorGUILayout.EndVertical();
            window   = EditorGUILayout.IntField("window", window);
            step     = EditorGUILayout.IntField("step", step);
            language = (ERecognizerLanguage)EditorGUILayout.EnumPopup("language", language);
            switch (language)
            {
            case ERecognizerLanguage.Japanese:
                selectedVowels = LipSyncRecognizer.vowelsByFormantJP;
                currentVowelFormantCeilValues = LipSyncRecognizer.vowelFormantFloorJP;
                break;

            case ERecognizerLanguage.Chinese:
                selectedVowels = LipSyncRecognizer.vowelsByFormantCN;
                currentVowelFormantCeilValues = LipSyncRecognizer.vowelFormantFloorCN;
                break;
            }
            GUILayout.Space(4);

            if (GUILayout.Button("Analy"))
            {
                Normalize();
                var rst = model.Analy(audioBuffer, window, step);
                VowelsInfo(rst);
            }
            GUILayout.Space(4);
            debugFolder = EditorGUILayout.Foldout(debugFolder, "debug tools");
            if (debugFolder)
            {
                GUILayout.BeginHorizontal();
                if (GUILayout.Button("root"))
                {
                    float[] poly = new float[] { -4, 0, 1 };
                    var     ret  = model.FindRoots(poly);
                    foreach (var it in ret)
                    {
                        Debug.Log(it);
                    }
                }
                if (GUILayout.Button("c-root"))
                {
                    double[] poly  = new Double[] { 4, 0, 1 };
                    var      roots = model.FindCRoots(poly);
                    for (int i = 0; i < roots.Length; i++)
                    {
                        Debug.Log("i: " + roots[i]);
                    }
                }
                if (GUILayout.Button("correlate"))
                {
                    var    a   = new float[] { 0.3f, 0.1f, 0.2f, 0.4f, 0.3f, 0.5f, -1.6f, -2.5f, 1.6f, 3.2f, 1.34f, -4.1f, -5.34f };
                    var    t   = model.Correlate(a, a);
                    string str = "";
                    for (int i = 0; i < t.Length; i++)
                    {
                        str += t[i].ToString("f3") + " ";
                    }
                    Debug.Log(str);
                }
                if (GUILayout.Button("toeplitz"))
                {
                    var c = new double[]
                    {
                        4, -2.6, 1.7, 4.3, 11, 21, 1.3, -3, 4, 11, 9, -4, 7, 12, 0.3, -7.0
                    };
                    ToeplitzMtrix toeplitzMtrix = new ToeplitzMtrix(c);
                    Debug.Log(toeplitzMtrix);
                    var    t   = toeplitzMtrix.Inverse();
                    int    n   = (int)Math.Sqrt((double)t.Length);
                    string msg = "size: " + n;
                    for (int i = 0; i < n; i++)
                    {
                        msg += "\n";
                        for (int j = 0; j < n; j++)
                        {
                            msg += t[i, j].ToString("f3") + "\t";
                        }
                    }
                    Debug.Log(msg);
                }
                GUILayout.EndHorizontal();
            }

            GUILayout.Space(8);
            GUILayout.Label(vowelsInfo);
            GUILayout.EndVertical();
        }
예제 #7
0
 public LipSyncRuntimeRecognizer(ERecognizerLanguage recognizingLanguage, int windowSize, float amplitudeThreshold)
 {
     Init(recognizingLanguage, windowSize, amplitudeThreshold);
     playingAudioSpectrum = new float[this.windowSize];
 }