private void Recognize(ref string result, int sampleRate)
        {
            amplitudeSum = 0.0f;
            for (int i = 0; i < playingAudioSpectrum.Length; ++i)
            {
                amplitudeSum += playingAudioSpectrum[i];
            }
            if (amplitudeSum >= amplitudeThreshold)
            {
                MathToolBox.Convolute(playingAudioSpectrum, gaussianFilter, MathToolBox.EPaddleType.Repeat, smoothedAudioSpectrum);
                MathToolBox.FindLocalLargestPeaks(smoothedAudioSpectrum, peakValues, peakPositions);
                frequencyUnit = sampleRate / windowSize;
                for (int i = 0; i < formantArray.Length; ++i)
                {
                    formantArray[i] = peakPositions[i] * frequencyUnit;
                }

                for (int i = 0; i < currentVowelFormantCeilValues.Length; ++i)
                {
                    if (formantArray[0] > currentVowelFormantCeilValues[i])
                    {
                        result = currentVowels[i];
                    }
                }
            }
        }
Example #2
0
        protected void Init(ERecognizerLanguage recognizingLanguage, int windowSize, float amplitudeThreshold)
        {
            switch (recognizingLanguage)
            {
            case ERecognizerLanguage.Japanese:
                currentVowels = vowelsByFormantJP;
                currentVowelFormantCeilValues = vowelFormantFloorJP;
                break;

            case ERecognizerLanguage.Chinese:
                currentVowels = vowelsByFormantCN;
                currentVowelFormantCeilValues = vowelFormantFloorCN;
                break;
            }

            this.windowSize         = Mathf.ClosestPowerOfTwo(windowSize);
            this.amplitudeThreshold = amplitudeThreshold;

            this.smoothedAudioSpectrum = new float[this.windowSize];
            this.peakValues            = new float[FORMANT_COUNT];
            this.peakPositions         = new int[FORMANT_COUNT];
            this.formantArray          = new float[FORMANT_COUNT];

            this.gaussianFilter = MathToolBox.GenerateGaussianFilter(FILTER_SIZE, FILTER_DEVIATION_SQUARE);
        }
Example #3
0
        private List <double[]> Formants(List <float[]> splitting)
        {
            int             i   = 0;
            float           a   = 0.67f;
            List <double[]> ret = new List <double[]>();

            while (i < splitting.Count())
            {
                float[] FL = PreEmphasis(splitting[i], a);
                float[] w  = MathToolBox.GenerateWindow(window, MathToolBox.EWindowType.Hamming);
                for (int j = 0; j < window; j++)
                {
                    FL[j] = FL[j] * w[j];
                }
                Debug.Log(i);
                var coefficients = Estimate(FL, 2 + fs / 1000);
                var rts          = FindCRoots(coefficients).Where(x => x.imag >= 0.0);
                var frqs         = rts.Select(x => x.arg * (fs / (2 * Mathf.PI))).ToList();
                frqs.Sort();
                double[] fmts = { frqs[1], frqs[2] };
                ret.Add(fmts);
                i++;
            }
            return(ret);
        }
Example #4
0
        /// <summary>
        /// Convolute data and filter. Result is sent to output, which must not be shorter than data.
        /// </summary>
        /// <param name="output">Array to store output. Must not be shorter than data.</param>
        /// <param name="data">Source data array.</param>
        /// <param name="filter">Filter array.</param>
        /// <param name="paddleType">Paddle type.</param>
        public static void Convolute(float[] data, float[] filter, EPaddleType paddleType, float[] output)
        {
            int filterMiddlePoint = Mathf.FloorToInt(filter.Length / 2);

            for (int n = 0; n < data.Length; ++n)
            {
                output[n] = 0.0f;
                for (int m = 0; m < filter.Length; ++m)
                {
                    output[n] += MathToolBox.GetValueFromArray(data, n - filterMiddlePoint + m, paddleType) * filter[filter.Length - m - 1];
                }
            }
        }
Example #5
0
        public Complex[] FindCRoots(IEnumerable <double> dpoly)
        {
            int len  = dpoly.Count();
            int len2 = (len - 1) * 2;

            double[] ret = new double[len2];
            MathToolBox.poly_roots(len, dpoly.Reverse().ToArray(), ret);
            Complex[] cpx = new Complex[len - 1];
            for (int i = 0; i < len - 1; i++)
            {
                cpx[i] = new Complex(ret[2 * i], ret[2 * i + 1]);
            }
            return(cpx);
        }
        public string[] RecognizeAllByAudioClip(AudioClip audioClip)
        {
            int recognizeSampleCount = Mathf.CeilToInt((float)(audioClip.samples) / (float)(shiftStepSize));

            string[] result               = new string[recognizeSampleCount];
            float[]  currentAudioData     = new float[this.windowSize];
            float[]  currentAudioSpectrum = new float[this.windowSize];

            for (int i = 0; i < recognizeSampleCount; ++i)
            {
                audioClip.GetData(currentAudioData, i * shiftStepSize);
                for (int j = 0; j < windowSize; ++j)
                {
                    currentAudioData[j] *= windowArray[j];
                }
                currentAudioSpectrum = MathToolBox.DiscreteCosineTransform(currentAudioData);

                amplitudeSum = 0.0f;
                for (int k = 0; k < windowSize; ++k)
                {
                    amplitudeSum += currentAudioSpectrum[k];
                }

                if (amplitudeSum >= amplitudeThreshold)
                {
                    MathToolBox.Convolute(currentAudioSpectrum, gaussianFilter, MathToolBox.EPaddleType.Repeat, smoothedAudioSpectrum);
                    MathToolBox.FindLocalLargestPeaks(smoothedAudioSpectrum, peakValues, peakPositions);
                    frequencyUnit = audioClip.frequency / 2 / windowSize;
                    for (int l = 0; l < formantArray.Length; ++l)
                    {
                        formantArray[l] = peakPositions[l] * frequencyUnit;
                    }

                    for (int m = 0; m < currentVowelFormantCeilValues.Length; ++m)
                    {
                        if (formantArray[0] > currentVowelFormantCeilValues[m])
                        {
                            result[i] = currentVowels[m];
                        }
                    }
                }
            }
            return(result);
        }
Example #7
0
        private void Formant(List <float[]> splitting)
        {
            int   i   = 0;
            float a   = 0.67f;
            var   lpc = new LpcModel();

            info = String.Empty;
            while (i < splitting.Count())
            {
                var FL = PreEmphasis(splitting[i], a);
                var w  = MathToolBox.GenerateWindow(window, MathToolBox.EWindowType.Hamming);
                for (int j = 0; j < window; j++)
                {
                    FL[i] = FL[i] * w[i];
                }
                var coefficients = lpc.EstimateLpcCoefficients(FL, 2 + fs / 1000);
                var formants     = lpc.FindFormants(coefficients, fs);
                AppendInfo(i, formants);
                i++;
            }
        }
 public LipSyncOfflineRecognizer(ERecognizerLanguage recognizingLanguage, float amplitudeThreshold, int windowSize, int shiftStepSize)
 {
     base.Init(recognizingLanguage, windowSize, amplitudeThreshold);
     this.shiftStepSize = shiftStepSize;
     this.windowArray   = MathToolBox.GenerateWindow(windowSize, MathToolBox.EWindowType.Hamming);
 }