/// <summary> /// Scheduler /// </summary> public bool scheduler() { lock (recognizedAudioQueue) { if (recognizedAudioQueue.Count > 0) { // Get the audio sample that has been on the queue for the longest. RecognizedAudio recognizedAudio = recognizedAudioQueue[0]; recognizedAudioQueue.RemoveAt(0); // Calculate the emotion. int emotion = extractEmotion(recognizedAudio); // Message back with extracted emotion. if (EmotionRecognized != null) { syncContext.Post(delegate { EmotionRecognized(this, new EmotionRecognizedEventArgs(emotion)); }, null); } return(true); } } return(false); }
internal static void DisplayBasicPhraseInfo(string text, RecognizedPhrase result, SpeechRecognizer recognizer) { if (result != null && text != null) { // Blankif (recognizer != null) { //Clearlabel.Text += String.Format(" Recognizer currently at: {0} mSec\n" +" Audio Device currently at: {1} mSec\n",recognizer.RecognizerAudioPosition.TotalMilliseconds,recognizer.AudioPosition.TotalMilliseconds); } if (result != null) { RecognitionResult recResult = result as RecognitionResult; if (recResult != null) { RecognizedAudio resultRecognizedAudio = recResult.Audio; if (resultRecognizedAudio == null) { text += String.Format(" Emulated input\n"); } else { text += String.Format( " Candidate Phrase at: {0} mSec\n" + " Phrase Length: {1} mSec\n" + " Input State Time: {2}\n" + " Input Format: {3}\n", resultRecognizedAudio.AudioPosition.TotalMilliseconds, resultRecognizedAudio.Duration.TotalMilliseconds, resultRecognizedAudio.StartTime.ToShortTimeString(), resultRecognizedAudio.Format.EncodingFormat.ToString()); } } text += String.Format(" Confidence Level: {0}\n", result.Confidence); if (result.Grammar != null) { text += String.Format(" Recognizing Grammar: {0}\n" + " Recognizing Rule: {1}\n", ((result.Grammar.Name != null) ? (result.Grammar.Name) : "None"), ((result.Grammar.RuleName != null) ? (result.Grammar.RuleName) : "None")); } if (result.ReplacementWordUnits.Count != 0) { text += String.Format(" Replacement text:\n"); foreach (ReplacementText rep in result.ReplacementWordUnits) { text += String.Format(" At index {0} for {1} words. Text: {2}\n", rep.FirstWordIndex, rep.CountOfWords, rep.Text); } text += String.Format("\n\n"); Console.WriteLine(text); } } } }
// Handle the SpeechRecognized event of the name grammar. public static void NameSpeechRecognized( object sender, SpeechRecognizedEventArgs e) { //Console.WriteLine("Grammar ({0}) recognized speech: {1}", // e.Result.Grammar.Name, e.Result.Text); try { RecognizedAudio audio = e.Result.Audio; // Add code to verify and persist the audio. string path = @"C:\temp\passwordAudio.wav"; using (Stream outputStream = new FileStream(path, FileMode.Create)) { //RecognizedAudio passwordAudio = audio.GetRange(start, duration); RecognizedAudio passwordAudio = audio; passwordAudio.WriteToWaveStream(outputStream); outputStream.Close(); } Thread testThread = new Thread(new ParameterizedThreadStart(TestAudio)); testThread.Start(path); } catch (Exception ex) { Console.WriteLine("Exception thrown while processing audio:"); Console.WriteLine(ex.ToString()); } }
void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e) { RecognizedAudio audio = e.Result.Audio; TimeSpan start = audio.AudioPosition - audio.AudioPosition; TimeSpan duration = audio.Duration; // Add code to verify and persist the audio. string path = @"E:\Deep Learning\Understanding Simple Speech Commands\nameAudio.wav"; using (Stream outputStream = new FileStream(path, FileMode.Create)) { RecognizedAudio nameAudio = audio.GetRange(start, duration); nameAudio.WriteToWaveStream(outputStream); outputStream.Close(); } python_runner.python_path = @"C:\Users\admin\AppData\Local\Programs\Python\Python36\python.exe"; python_runner.script_path = @"E:\Deep Learning\Understanding Simple Speech Commands\SpeechRecognition\from_file.py"; python_runner.arguments.Add(path); python_runner.run(); string sphinx_result = python_runner.results; python_runner.python_path = @"C:\Users\admin\AppData\Local\Programs\Python\Python36\python.exe"; python_runner.script_path = @"E:\Deep Learning\Understanding Simple Speech Commands\deepspeech\client_from_file.py"; python_runner.arguments.Add(path); python_runner.run(); string deep_speech_result = python_runner.errors; textBox1.Text = e.Result.Text + "----------------" + sphinx_result + "----------------" + deep_speech_result; /* * txt_out.AppendText(e.Result.Text + ", " + (int)(100 * e.Result.Confidence) + Environment.NewLine); * * //char beginer = (char)7; * //char terminator = (char)10; * //string message =beginer+getCommandForWord(e.Result.Text)+terminator; * * string message = getCommandForWord(e.Result.Text, e.Result.Confidence); * * var bytes = ASCIIEncoding.ASCII.GetBytes(message); * * foreach (var c in clients) * { * c.GetStream().Write(bytes, 0, bytes.Length); * }*/ }
/// <summary> /// Called with new audio when a word is detected. /// </summary> /// <param name="inputAudio"></param> public void hereIsAudio(RecognizedAudio inputAudio) { // Add audio to array to be analyzed. lock (recognizedAudioQueue) { recognizedAudioQueue.Add(inputAudio); } stateChanged(); }
public short[] getArrayFromRecognizedAudio(RecognizedAudio inputAudio) { SpeechAudioFormatInfo speechAudioFormatInfo = inputAudio.Format; // Put the audio into an array. // Use a 16 bit short because 16 bits is the max sample size. MemoryStream audioStream = new MemoryStream(); inputAudio.WriteToAudioStream(audioStream); byte[] byteArray = audioStream.ToArray(); /* // For Debugging. * // Print out the byte audio. * String output = "audioByteArray: "; * for (int i = 0; i < byteArray.Length; ++i) * output += byteArray[i] + "."; * System.Diagnostics.Debug.WriteLine(output); */ // Convert byteArray[] to short[], keeping channels interleaved. long numSamplesInAudio = byteArray.Length / speechAudioFormatInfo.BlockAlign * speechAudioFormatInfo.ChannelCount; short[] audioArray = new short[numSamplesInAudio]; for (int i = 0; i < byteArray.Length; i += speechAudioFormatInfo.BlockAlign / speechAudioFormatInfo.ChannelCount) { if (speechAudioFormatInfo.BitsPerSample == 16) { int audioIndex = i / 2; audioArray[audioIndex] = 0; // The ordering of the bytes for each 16-bit sample is Little-Endian!!! audioArray[audioIndex] |= (short)(byteArray[i + 1] << 8); audioArray[audioIndex] |= (short)byteArray[i]; } else // if (speechAudioFormatInfo.BitsPerSample == 8) { audioArray[i] = (short)byteArray[i]; } } /* // For Debugging. * // Print out the short audio. * output = "audioshortArray: "; * for (int i = 0; i < numSamplesInAudio; ++i) * output += audioArray[i] + "."; * System.Diagnostics.Debug.WriteLine(output); */ return(audioArray); }
private static void DumpRecordedAudio( RecognizedAudio audio ) { if ( audio == null ) return; int fileId = 0; string filename; while ( File.Exists( (filename = "RetainedAudio_" + fileId + ".wav") ) ) fileId++; Console.WriteLine( "\nWriting file: {0}", filename ); using ( var file = new FileStream( filename, System.IO.FileMode.CreateNew ) ) audio.WriteToWaveStream( file ); }
public void SendVoice(RecognizedAudio audio) { using (var client = GetClient()) using (var audioStream = new MemoryStream()) { if (client == null) { return; } audio.WriteToAudioStream(audioStream); var response = client.PutAsync(_serverLocation, new StreamContent(audioStream)).Result; SendServerConnectionEvent(this, new ConnectionEventArgs(response.IsSuccessStatusCode ? ConnectionStatus.Success : ConnectionStatus.ServerBusy, response)); } }
public void handleSpeechRecognizedResult(float confidence, string textResult, string grammarName, string ruleName, KeyValuePair <string, SemanticValue>[] kvp, double audioDuration, RecognizedAudio audio) { string fileP = null; string relPath = null; //only write audio file when given path is not null if (saveAudioPath != null) { string indexerStr = waveFileNameIndexer + ""; while (indexerStr.Length < 4) { indexerStr = "0" + indexerStr; } fileP = saveAudioPath + "\\" + indexerStr + ".wav"; relPath = EBookUtil.convertAudioToRelativePath(@fileP); } ActivityExecutor.add(new InternalSpeechRecognitionResultActivity(confidence, textResult, false, kvp, grammarName, ruleName, audioDuration, relPath)); //only write audio file when given path is not null if (fileP != null) { //write audio to file FileStream stream = new FileStream(fileP, FileMode.Create); audio.WriteToWaveStream(stream); stream.Flush(); stream.Close(); unconfirmSaveAudioList.Add(fileP); Trace.WriteLine("write to file " + fileP); waveFileNameIndexer++; } String timeStamp = EBookUtil.GetTimestamp(); string text = "\n" + confidence + "\t" + textResult + "(complete)\t\t" + kvp.ToArray() + "\t" + grammarName + "\t" + timeStamp; Trace.WriteLine(text); }
//Useless private void DumpRecordedAudio(RecognizedAudio audio) { if (audio == null) { return; } int fileId = 0; string filename; while (File.Exists((filename = "RetainedAudio_" + fileId + ".wav"))) { fileId++; } _remoteOperation.message("\nWriting file: " + filename); using (var file = new FileStream(filename, System.IO.FileMode.CreateNew)) audio.WriteToWaveStream(file); }
private static void DumpRecordedAudio(RecognizedAudio audio) { if (audio == null) { return; } int fileId = 0; string filename; while (File.Exists((filename = "RetainedAudio_" + fileId + ".wav"))) { fileId++; } Console.WriteLine("\nWriting file: {0}", filename); using (var file = new FileStream(filename, System.IO.FileMode.CreateNew)) audio.WriteToWaveStream(file); }
static void SaveRecordedAudio(RecognizedAudio audio) { if (audio == null) { return; } string filename = "save_" + count + ".wav"; while (File.Exists(filename)) { count++; filename = "save_" + count + ".wav"; } Console.WriteLine("寫入檔案: " + filename); using (var file = new FileStream(filename, FileMode.CreateNew)) { audio.WriteToWaveStream(file); } }
//TODO: Abstract out Audio as well public RecognitionSuccess() { Semantics = new Dictionary<string, string>(); Confidence = 1.0F; WordConfidence = new Dictionary<int, Tuple<string, float>>(); Text = ""; Audio = null; GrammarName = ""; EngineName = ""; Engine = null; }
public override void manejar_comando_entrenamiento(SpeechRecognizedEventArgs e) { if (e.Result.Text.ToUpperInvariant() == siguiente_comando) { RecognizedAudio audio = e.Result.Audio; TimeSpan duration = audio.Duration; int resultado; string path = Path.GetTempFileName(); using (Stream outputStream = new FileStream(path, FileMode.Create)) { RecognizedAudio nameAudio = audio; nameAudio.WriteToWaveStream(outputStream); outputStream.Close(); } resultado = AV.avf_agregar_muestra_WAV(entrenador, 0, (dataGridView1[2, fila].Value.ToString().Split('_')[1] == "3") ? AV.AVP_MUESTRA_VALIDACION : (AV.AVP_MUESTRA_ENTRENAMIENTO | AV.AVP_MUESTRA_VALIDACION), path); #if DEBUG File.Copy(path, dataGridView1[2, fila].Value.ToString() + ".wav", true); #endif File.Delete(path); switch (resultado) { case AV.AVS_SIN_MEMORIA: Environment.Exit(1); return; case AV.AVS_FORMATO_ARCHIVO_NO_VALIDO: errorlabel.Text = "La grabación está dañada. \nPor favor, reintente la operación."; errorlabel.Visible = true; errorpanel.Visible = true; return; case AV.AVS_ARCHIVO_INACCESIBLE: errorlabel.Text = "No se pudo acceder a la voz grabada. \nPor favor, verifique que se pueda escribir en el disco \ny reintente la operación."; errorlabel.Visible = true; errorpanel.Visible = true; return; case AV.AVS_MUESTREO_DEMASIADO_BAJO: case AV.AVS_MUESTREO_NO_ES_MULTIPLO_DE_4_HZ: errorlabel.Text = "La grabación no puede ser utilizada por la aplicación. \n Por favor, utilice otro micrófono y \nreinicie el proceso de entrenamiento."; errorlabel.Visible = true; errorpanel.Visible = true; return; case AV.AVS_DURACION_MENOR_A_MEDIO_SEGUNDO: errorlabel.Text = "La grabación es demasiado corta. \nSe necesita una grabación de al menos medio segundo. \nPor favor, grabe el comando nuevamente, hablando lento y claro."; errorlabel.Visible = true; errorpanel.Visible = true; return; default: if (resultado >= 0) { break; } errorlabel.Text = "Ocurrió un error inesperado, por favor reintente."; errorlabel.Visible = true; errorpanel.Visible = true; return; } errorlabel.Visible = false; errorpanel.Visible = false; dataGridView1[1, fila].Value = "Reconocido"; dataGridView1.ClearSelection(); if (dataGridView1.RowCount == (fila + 1)) { lblTitle.Text = "Entrenando"; label1.Text = "El sistema se está entrenando para reconocer tu voz"; label2.Visible = true; label2.Text = "La operación tardará aproximadamente 20 minutos"; dataGridView1.Visible = false; pausaBtn.Visible = false; cafe.Visible = true; G.comando_form.Close(); entrenar(); } else { dataGridView1.Rows[++fila].Selected = true; dataGridView1.FirstDisplayedScrollingRowIndex = fila; siguiente_comando = dataGridView1[0, fila].Value.ToString().ToUpperInvariant(); } } }
/// <summary> /// Estimates the emotion present in recognizedAudio /// </summary> /// <param name="recognizedAudio"></param> /// <returns>An int representing the emotion excitement level in the signal</returns> public int extractEmotion(RecognizedAudio recognizedAudio) { System.Diagnostics.Debug.WriteLine("SpeechEmotionRecognitionEngine::extractEmotion()"); if (recognizedAudio == null) { System.Diagnostics.Debug.WriteLine("inputAudio is null"); return(-1); } /////////////////////////////////////////////////////// // Extract Features /////////////////////////////////////////////////////// int windowSize = 2048; short[] audioArray = audioUtilities.getArrayFromRecognizedAudio(recognizedAudio); int numWindows = audioArray.Length / (windowSize * recognizedAudio.Format.ChannelCount); // Calculate duration (in seconds). double duration = (double)audioArray.Length / recognizedAudio.Format.SamplesPerSecond; // MessageTextBox.AppendText("Audio Duration: " + duration + " seconds\n"); // Calculate fundamental frequency for each window. float[][] freqOut = new float[numWindows][]; for (int i = 0; i < numWindows; ++i) { freqOut[i] = new float[windowSize / 2]; } double[] fundamentalFrequencies = new double[numWindows]; for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex) { short[] inputAudio = new short[windowSize]; for (int i = 0; i < windowSize; ++i) { inputAudio[i] = audioArray[windowIndex * windowSize + i]; } audioUtilities.computeSpectrum(inputAudio, freqOut[windowIndex], recognizedAudio.Format); // for (int i = 0; i < windowSize / 2 - 1; ++i) // System.Diagnostics.Debug.WriteLine("freqOut[" + i + "]: " + freqOut[windowIndex][i]); int argmax = 0; for (int i = 1; i < windowSize / 2; ++i) { if (freqOut[windowIndex][i] > freqOut[windowIndex][argmax]) { argmax = i; } } double lag = (windowSize / 2 - 1) - argmax; fundamentalFrequencies[windowIndex] = recognizedAudio.Format.SamplesPerSecond / lag; } System.Diagnostics.Debug.WriteLine("Fundamental Frequency: " + fundamentalFrequencies[0]); // * (recognizedAudio.Format.SamplesPerSecond / ((double)(windowSize / 2) / recognizedAudio.Format.ChannelCount))); // graphSpectrum(freqOut[0], recognizedAudio.Format); // This isn't useful. // Calculate frequency response for each window. float[][] fftRealOutput = new float[numWindows][]; float[][] fftComplexOutput = new float[numWindows][]; for (int i = 0; i < numWindows; ++i) { fftRealOutput[i] = new float[windowSize]; fftComplexOutput[i] = new float[windowSize]; } for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex) { // Get float array for current window. float[] inputAudio = new float[windowSize]; for (int i = 0; i < windowSize; ++i) { inputAudio[i] = (float)audioArray[windowIndex * windowSize + i]; } // Calculate fft for current window. audioUtilities.fft(inputAudio, null, fftRealOutput[windowIndex], fftComplexOutput[windowIndex], 1, recognizedAudio.Format); if (fftRealOutput[windowIndex] == null) { break; } } // graphFrequencyResponse(fftRealOutput[0], fftComplexOutput[0], recognizedAudio.Format); // spectrographForm.drawSpectrograph(fftRealOutput, fftComplexOutput, // audioUtilities, recognizedAudio.Format); // Calculate the pitch mean. double pitchMean = 0; int n = 0; // MessageTextBox.AppendText("Estimated Fundamental Frequencies: \n"); for (int i = 0; i < numWindows; ++i) { // MessageTextBox.AppendText(" " + fundamentalFrequencies[i] + " Hz\n"); // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700) { pitchMean += fundamentalFrequencies[i]; n++; } } pitchMean /= n; // MessageTextBox.AppendText("Pitch Mean: " + pitchMean + "\n"); // TestPitchExtractionLabel.Text = "Estimated F0: " + pitchMean + " Hz"; // Calculate pitch standard deviation. double pitchStdDev = 0; n = 0; for (int i = 0; i < numWindows; ++i) { // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700) { pitchStdDev += Math.Pow(fundamentalFrequencies[i] - pitchMean, 2); n++; } } pitchStdDev /= n; pitchStdDev = Math.Pow(pitchStdDev, 0.5); // MessageTextBox.AppendText("Pitch Std Dev: " + pitchStdDev + " Hz\n"); // Calculate pitch velocity for each window. double[] pitchVelocities = new double[numWindows - 1]; n = 0; for (int i = 1; i < fundamentalFrequencies.Length; ++i) { // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700) { pitchVelocities[i - 1] = fundamentalFrequencies[i] - fundamentalFrequencies[i - 1]; n++; } } // Calculate pitch acceleration for each window. double[] pitchAccelerations = new double[numWindows - 1]; n = 0; for (int i = 1; i < pitchVelocities.Length; ++i) { // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700) { pitchAccelerations[i - 1] = pitchVelocities[i] - pitchVelocities[i - 1]; n++; } } // Calculate average pitch acceleration. double averagePitchAcceleration = 0; n = 0; for (int i = 0; i < pitchAccelerations.Length; ++i) { // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700) { averagePitchAcceleration += pitchAccelerations[i]; n++; } } averagePitchAcceleration /= n; // MessageTextBox.AppendText("Pitch Acceleration: " + averagePitchAcceleration + "\n"); // Calculate log energy for each window. double[] logEnergies = new double[numWindows]; n = 0; for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex) { short[] inputAudio = new short[windowSize]; for (int i = 0; i < windowSize; ++i) { inputAudio[i] = audioArray[windowIndex * windowSize + i]; } // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[windowIndex] > 40 && fundamentalFrequencies[windowIndex] < 700) { logEnergies[windowIndex] = audioUtilities.computeLogEnergy(inputAudio, recognizedAudio.Format); n++; } // System.Diagnostics.Debug.WriteLine("energy[" + windowIndex + "]: " + logEnergies[windowIndex]); } // Calculate average log energy. double logEnergyMean = 0; for (int i = 0; i < numWindows; ++i) { logEnergyMean += logEnergies[i]; } logEnergyMean /= n; // MessageTextBox.AppendText("Log Energy Mean: " + logEnergyMean + "\n"); // Calculate "Emotion Level" and update GUI. double emotionLevel = pitchStdDev * pitchStdDev; /* * EmotionLevelLabel.Text = "Emotion Level: " + emotionLevel + "\n"; * if (emotionLevel <= EmotionLevelProgressBar.Maximum && emotionLevel >= 0) * EmotionLevelProgressBar.Value = (int)emotionLevel; * else * EmotionLevelProgressBar.Value = EmotionLevelProgressBar.Maximum; */ // stateChanged(); return((int)emotionLevel); }
static void SaveRecordedAudio(RecognizedAudio audio) { if (audio == null) return; string filename = "save_" + count + ".wav" ; while (File.Exists(filename)) { count++; filename = "save_" + count + ".wav"; } Console.WriteLine("�g�J�ɮ�: " + filename); using (var file = new FileStream(filename, FileMode.CreateNew)) { audio.WriteToWaveStream(file); } }
public short[] getArrayFromRecognizedAudio(RecognizedAudio inputAudio) { SpeechAudioFormatInfo speechAudioFormatInfo = inputAudio.Format; // Put the audio into an array. // Use a 16 bit short because 16 bits is the max sample size. MemoryStream audioStream = new MemoryStream(); inputAudio.WriteToAudioStream(audioStream); byte[] byteArray = audioStream.ToArray(); /* // For Debugging. // Print out the byte audio. String output = "audioByteArray: "; for (int i = 0; i < byteArray.Length; ++i) output += byteArray[i] + "."; System.Diagnostics.Debug.WriteLine(output); */ // Convert byteArray[] to short[], keeping channels interleaved. long numSamplesInAudio = byteArray.Length / speechAudioFormatInfo.BlockAlign * speechAudioFormatInfo.ChannelCount; short[] audioArray = new short[numSamplesInAudio]; for (int i = 0; i < byteArray.Length; i += speechAudioFormatInfo.BlockAlign / speechAudioFormatInfo.ChannelCount) { if (speechAudioFormatInfo.BitsPerSample == 16) { int audioIndex = i / 2; audioArray[audioIndex] = 0; // The ordering of the bytes for each 16-bit sample is Little-Endian!!! audioArray[audioIndex] |= (short)(byteArray[i + 1] << 8); audioArray[audioIndex] |= (short)byteArray[i]; } else // if (speechAudioFormatInfo.BitsPerSample == 8) audioArray[i] = (short)byteArray[i]; } /* // For Debugging. // Print out the short audio. output = "audioshortArray: "; for (int i = 0; i < numSamplesInAudio; ++i) output += audioArray[i] + "."; System.Diagnostics.Debug.WriteLine(output); */ return audioArray; }
/// <summary> /// Estimates the emotion present in recognizedAudio /// </summary> /// <param name="recognizedAudio"></param> /// <returns>An int representing the emotion excitement level in the signal</returns> public int extractEmotion(RecognizedAudio recognizedAudio) { System.Diagnostics.Debug.WriteLine("SpeechEmotionRecognitionEngine::extractEmotion()"); if (recognizedAudio == null) { System.Diagnostics.Debug.WriteLine("inputAudio is null"); return -1; } /////////////////////////////////////////////////////// // Extract Features /////////////////////////////////////////////////////// int windowSize = 2048; short[] audioArray = audioUtilities.getArrayFromRecognizedAudio(recognizedAudio); int numWindows = audioArray.Length / (windowSize * recognizedAudio.Format.ChannelCount); // Calculate duration (in seconds). double duration = (double)audioArray.Length / recognizedAudio.Format.SamplesPerSecond; // MessageTextBox.AppendText("Audio Duration: " + duration + " seconds\n"); // Calculate fundamental frequency for each window. float[][] freqOut = new float[numWindows][]; for (int i = 0; i < numWindows; ++i) freqOut[i] = new float[windowSize / 2]; double[] fundamentalFrequencies = new double[numWindows]; for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex) { short[] inputAudio = new short[windowSize]; for (int i = 0; i < windowSize; ++i) inputAudio[i] = audioArray[windowIndex * windowSize + i]; audioUtilities.computeSpectrum(inputAudio, freqOut[windowIndex], recognizedAudio.Format); // for (int i = 0; i < windowSize / 2 - 1; ++i) // System.Diagnostics.Debug.WriteLine("freqOut[" + i + "]: " + freqOut[windowIndex][i]); int argmax = 0; for (int i = 1; i < windowSize / 2; ++i) { if (freqOut[windowIndex][i] > freqOut[windowIndex][argmax]) argmax = i; } double lag = (windowSize / 2 - 1) - argmax; fundamentalFrequencies[windowIndex] = recognizedAudio.Format.SamplesPerSecond / lag; } System.Diagnostics.Debug.WriteLine("Fundamental Frequency: " + fundamentalFrequencies[0]); // * (recognizedAudio.Format.SamplesPerSecond / ((double)(windowSize / 2) / recognizedAudio.Format.ChannelCount))); // graphSpectrum(freqOut[0], recognizedAudio.Format); // This isn't useful. // Calculate frequency response for each window. float[][] fftRealOutput = new float[numWindows][]; float[][] fftComplexOutput = new float[numWindows][]; for (int i = 0; i < numWindows; ++i) { fftRealOutput[i] = new float[windowSize]; fftComplexOutput[i] = new float[windowSize]; } for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex) { // Get float array for current window. float[] inputAudio = new float[windowSize]; for (int i = 0; i < windowSize; ++i) inputAudio[i] = (float)audioArray[windowIndex * windowSize + i]; // Calculate fft for current window. audioUtilities.fft(inputAudio, null, fftRealOutput[windowIndex], fftComplexOutput[windowIndex], 1, recognizedAudio.Format); if (fftRealOutput[windowIndex] == null) break; } // graphFrequencyResponse(fftRealOutput[0], fftComplexOutput[0], recognizedAudio.Format); // spectrographForm.drawSpectrograph(fftRealOutput, fftComplexOutput, // audioUtilities, recognizedAudio.Format); // Calculate the pitch mean. double pitchMean = 0; int n = 0; // MessageTextBox.AppendText("Estimated Fundamental Frequencies: \n"); for (int i = 0; i < numWindows; ++i) { // MessageTextBox.AppendText(" " + fundamentalFrequencies[i] + " Hz\n"); // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700) { pitchMean += fundamentalFrequencies[i]; n++; } } pitchMean /= n; // MessageTextBox.AppendText("Pitch Mean: " + pitchMean + "\n"); // TestPitchExtractionLabel.Text = "Estimated F0: " + pitchMean + " Hz"; // Calculate pitch standard deviation. double pitchStdDev = 0; n = 0; for (int i = 0; i < numWindows; ++i) { // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700) { pitchStdDev += Math.Pow(fundamentalFrequencies[i] - pitchMean, 2); n++; } } pitchStdDev /= n; pitchStdDev = Math.Pow(pitchStdDev, 0.5); // MessageTextBox.AppendText("Pitch Std Dev: " + pitchStdDev + " Hz\n"); // Calculate pitch velocity for each window. double[] pitchVelocities = new double[numWindows - 1]; n = 0; for (int i = 1; i < fundamentalFrequencies.Length; ++i) { // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700) { pitchVelocities[i - 1] = fundamentalFrequencies[i] - fundamentalFrequencies[i - 1]; n++; } } // Calculate pitch acceleration for each window. double[] pitchAccelerations = new double[numWindows - 1]; n = 0; for (int i = 1; i < pitchVelocities.Length; ++i) { // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700) { pitchAccelerations[i - 1] = pitchVelocities[i] - pitchVelocities[i - 1]; n++; } } // Calculate average pitch acceleration. double averagePitchAcceleration = 0; n = 0; for (int i = 0; i < pitchAccelerations.Length; ++i) { // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700) { averagePitchAcceleration += pitchAccelerations[i]; n++; } } averagePitchAcceleration /= n; // MessageTextBox.AppendText("Pitch Acceleration: " + averagePitchAcceleration + "\n"); // Calculate log energy for each window. double[] logEnergies = new double[numWindows]; n = 0; for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex) { short[] inputAudio = new short[windowSize]; for (int i = 0; i < windowSize; ++i) inputAudio[i] = audioArray[windowIndex * windowSize + i]; // Only include the fundamental frequencies that are within a reasonable range for human voice. if (fundamentalFrequencies[windowIndex] > 40 && fundamentalFrequencies[windowIndex] < 700) { logEnergies[windowIndex] = audioUtilities.computeLogEnergy(inputAudio, recognizedAudio.Format); n++; } // System.Diagnostics.Debug.WriteLine("energy[" + windowIndex + "]: " + logEnergies[windowIndex]); } // Calculate average log energy. double logEnergyMean = 0; for (int i = 0; i < numWindows; ++i) logEnergyMean += logEnergies[i]; logEnergyMean /= n; // MessageTextBox.AppendText("Log Energy Mean: " + logEnergyMean + "\n"); // Calculate "Emotion Level" and update GUI. double emotionLevel = pitchStdDev * pitchStdDev; /* EmotionLevelLabel.Text = "Emotion Level: " + emotionLevel + "\n"; if (emotionLevel <= EmotionLevelProgressBar.Maximum && emotionLevel >= 0) EmotionLevelProgressBar.Value = (int)emotionLevel; else EmotionLevelProgressBar.Value = EmotionLevelProgressBar.Maximum; */ // stateChanged(); return (int)emotionLevel; }
// Loads the recognizedAudio into a memory stream and creates a Soundplayer object // for playing the audio. Passing null just disables the button private void SetRecognizedAudio(RecognizedAudio recognizedAudio) { if (recognizedAudio == null) { _recognizedAudioStream = null; _recognizedAudioPlayer = null; _buttonRecognizedAudio.IsEnabled = false; } else { _recognizedAudioStream = new MemoryStream(); recognizedAudio.WriteToWaveStream(_recognizedAudioStream); _recognizedAudioStream.Position = 0; _recognizedAudioPlayer = new System.Media.SoundPlayer(_recognizedAudioStream); _buttonRecognizedAudio.IsEnabled = true; } }
public RecognitionSuccess(AudioRecog r, RecognitionResult rres) { EngineName = "SAPI"; Engine = r; Semantics = new Dictionary<string, string>(); if (rres.Semantics != null) { foreach (KeyValuePair<String, SemanticValue> s in rres.Semantics) { Semantics.Add(s.Key, s.Value.Value.ToString()); //need the ToString() as this may be an int etc } } Audio = rres.Audio; Text = rres.Text; GrammarName = rres.Grammar.Name; Confidence = rres.Confidence; WordConfidence = new Dictionary<int, Tuple<string, float>>(); int i = 0; foreach (System.Speech.Recognition.RecognizedWordUnit wd in rres.Words) { WordConfidence.Add(i, new Tuple<string, float>(wd.Text, wd.Confidence)); i++; } }