private void Recognize(ref string result, int sampleRate) { amplitudeSum = 0.0f; for (int i = 0; i < playingAudioSpectrum.Length; ++i) { amplitudeSum += playingAudioSpectrum[i]; } if (amplitudeSum >= amplitudeThreshold) { MathToolBox.Convolute(playingAudioSpectrum, gaussianFilter, MathToolBox.EPaddleType.Repeat, smoothedAudioSpectrum); MathToolBox.FindLocalLargestPeaks(smoothedAudioSpectrum, peakValues, peakPositions); frequencyUnit = sampleRate / windowSize; for (int i = 0; i < formantArray.Length; ++i) { formantArray[i] = peakPositions[i] * frequencyUnit; } for (int i = 0; i < currentVowelFormantCeilValues.Length; ++i) { if (formantArray[0] > currentVowelFormantCeilValues[i]) { result = currentVowels[i]; } } } }
public string[] RecognizeAllByAudioClip(AudioClip audioClip) { int recognizeSampleCount = Mathf.CeilToInt((float)(audioClip.samples) / (float)(shiftStepSize)); string[] result = new string[recognizeSampleCount]; float[] currentAudioData = new float[this.windowSize]; float[] currentAudioSpectrum = new float[this.windowSize]; for (int i = 0; i < recognizeSampleCount; ++i) { audioClip.GetData(currentAudioData, i * shiftStepSize); for (int j = 0; j < windowSize; ++j) { currentAudioData[j] *= windowArray[j]; } currentAudioSpectrum = MathToolBox.DiscreteCosineTransform(currentAudioData); amplitudeSum = 0.0f; for (int k = 0; k < windowSize; ++k) { amplitudeSum += currentAudioSpectrum[k]; } if (amplitudeSum >= amplitudeThreshold) { MathToolBox.Convolute(currentAudioSpectrum, gaussianFilter, MathToolBox.EPaddleType.Repeat, smoothedAudioSpectrum); MathToolBox.FindLocalLargestPeaks(smoothedAudioSpectrum, peakValues, peakPositions); frequencyUnit = audioClip.frequency / 2 / windowSize; for (int l = 0; l < formantArray.Length; ++l) { formantArray[l] = peakPositions[l] * frequencyUnit; } for (int m = 0; m < currentVowelFormantCeilValues.Length; ++m) { if (formantArray[0] > currentVowelFormantCeilValues[m]) { result[i] = currentVowels[m]; } } } } return(result); }