예제 #1
0
        /// <summary>
        /// Scheduler
        /// </summary>
        public bool scheduler()
        {
            lock (recognizedAudioQueue)
            {
                if (recognizedAudioQueue.Count > 0)
                {
                    // Get the audio sample that has been on the queue for the longest.
                    RecognizedAudio recognizedAudio = recognizedAudioQueue[0];
                    recognizedAudioQueue.RemoveAt(0);

                    // Calculate the emotion.
                    int emotion = extractEmotion(recognizedAudio);

                    // Message back with extracted emotion.
                    if (EmotionRecognized != null)
                    {
                        syncContext.Post(delegate { EmotionRecognized(this, new EmotionRecognizedEventArgs(emotion)); }, null);
                    }

                    return(true);
                }
            }

            return(false);
        }
예제 #2
0
        internal static void DisplayBasicPhraseInfo(string text, RecognizedPhrase result, SpeechRecognizer recognizer)
        {
            if (result != null && text != null)
            {
                // Blankif (recognizer != null)

                {
                    //Clearlabel.Text += String.Format(" Recognizer currently at: {0} mSec\n" +" Audio Device currently at: {1} mSec\n",recognizer.RecognizerAudioPosition.TotalMilliseconds,recognizer.AudioPosition.TotalMilliseconds);
                }

                if (result != null)
                {
                    RecognitionResult recResult = result as RecognitionResult;

                    if (recResult != null)
                    {
                        RecognizedAudio resultRecognizedAudio = recResult.Audio;

                        if (resultRecognizedAudio == null)
                        {
                            text += String.Format(" Emulated input\n");
                        }

                        else
                        {
                            text +=
                                String.Format(
                                    " Candidate Phrase at: {0} mSec\n" + " Phrase Length: {1} mSec\n" +
                                    " Input State Time: {2}\n" + " Input Format: {3}\n",
                                    resultRecognizedAudio.AudioPosition.TotalMilliseconds,
                                    resultRecognizedAudio.Duration.TotalMilliseconds,
                                    resultRecognizedAudio.StartTime.ToShortTimeString(),
                                    resultRecognizedAudio.Format.EncodingFormat.ToString());
                        }
                    }
                    text += String.Format(" Confidence Level: {0}\n", result.Confidence);

                    if (result.Grammar != null)
                    {
                        text += String.Format(" Recognizing Grammar: {0}\n" + " Recognizing Rule: {1}\n",
                                              ((result.Grammar.Name != null) ? (result.Grammar.Name) : "None"),
                                              ((result.Grammar.RuleName != null) ? (result.Grammar.RuleName) : "None"));
                    }

                    if (result.ReplacementWordUnits.Count != 0)
                    {
                        text += String.Format(" Replacement text:\n");

                        foreach (ReplacementText rep in result.ReplacementWordUnits)
                        {
                            text += String.Format(" At index {0} for {1} words. Text: {2}\n", rep.FirstWordIndex,
                                                  rep.CountOfWords, rep.Text);
                        }

                        text += String.Format("\n\n");
                        Console.WriteLine(text);
                    }
                }
            }
        }
        // Handle the SpeechRecognized event of the name grammar.
        public static void NameSpeechRecognized(
            object sender, SpeechRecognizedEventArgs e)
        {
            //Console.WriteLine("Grammar ({0}) recognized speech: {1}",
            //  e.Result.Grammar.Name, e.Result.Text);

            try
            {
                RecognizedAudio audio = e.Result.Audio;

                // Add code to verify and persist the audio.
                string path = @"C:\temp\passwordAudio.wav";
                using (Stream outputStream = new FileStream(path, FileMode.Create))
                {
                    //RecognizedAudio passwordAudio = audio.GetRange(start, duration);
                    RecognizedAudio passwordAudio = audio;
                    passwordAudio.WriteToWaveStream(outputStream);
                    outputStream.Close();
                }

                Thread testThread =
                    new Thread(new ParameterizedThreadStart(TestAudio));
                testThread.Start(path);
            }
            catch (Exception ex)
            {
                Console.WriteLine("Exception thrown while processing audio:");
                Console.WriteLine(ex.ToString());
            }
        }
예제 #4
0
        void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            RecognizedAudio audio = e.Result.Audio;

            TimeSpan start    = audio.AudioPosition - audio.AudioPosition;
            TimeSpan duration = audio.Duration;

            // Add code to verify and persist the audio.
            string path = @"E:\Deep Learning\Understanding Simple Speech Commands\nameAudio.wav";

            using (Stream outputStream = new FileStream(path, FileMode.Create))
            {
                RecognizedAudio nameAudio = audio.GetRange(start, duration);
                nameAudio.WriteToWaveStream(outputStream);
                outputStream.Close();
            }


            python_runner.python_path = @"C:\Users\admin\AppData\Local\Programs\Python\Python36\python.exe";
            python_runner.script_path = @"E:\Deep Learning\Understanding Simple Speech Commands\SpeechRecognition\from_file.py";

            python_runner.arguments.Add(path);

            python_runner.run();

            string sphinx_result = python_runner.results;


            python_runner.python_path = @"C:\Users\admin\AppData\Local\Programs\Python\Python36\python.exe";
            python_runner.script_path = @"E:\Deep Learning\Understanding Simple Speech Commands\deepspeech\client_from_file.py";

            python_runner.arguments.Add(path);

            python_runner.run();

            string deep_speech_result = python_runner.errors;

            textBox1.Text = e.Result.Text + "----------------" + sphinx_result + "----------------" + deep_speech_result;



            /*
             * txt_out.AppendText(e.Result.Text + ", " + (int)(100 * e.Result.Confidence) + Environment.NewLine);
             *
             * //char beginer = (char)7;
             * //char terminator = (char)10;
             * //string message =beginer+getCommandForWord(e.Result.Text)+terminator;
             *
             * string message = getCommandForWord(e.Result.Text, e.Result.Confidence);
             *
             * var bytes = ASCIIEncoding.ASCII.GetBytes(message);
             *
             * foreach (var c in clients)
             * {
             *  c.GetStream().Write(bytes, 0, bytes.Length);
             * }*/
        }
예제 #5
0
 /// <summary>
 /// Called with new audio when a word is detected.
 /// </summary>
 /// <param name="inputAudio"></param>
 public void hereIsAudio(RecognizedAudio inputAudio)
 {
     // Add audio to array to be analyzed.
     lock (recognizedAudioQueue)
     {
         recognizedAudioQueue.Add(inputAudio);
     }
     stateChanged();
 }
예제 #6
0
        public short[] getArrayFromRecognizedAudio(RecognizedAudio inputAudio)
        {
            SpeechAudioFormatInfo speechAudioFormatInfo = inputAudio.Format;

            // Put the audio into an array.
            // Use a 16 bit short because 16 bits is the max sample size.
            MemoryStream audioStream = new MemoryStream();

            inputAudio.WriteToAudioStream(audioStream);
            byte[] byteArray = audioStream.ToArray();

            /* // For Debugging.
             * // Print out the byte audio.
             * String output = "audioByteArray: ";
             * for (int i = 0; i < byteArray.Length; ++i)
             *  output += byteArray[i] + ".";
             * System.Diagnostics.Debug.WriteLine(output);
             */

            // Convert byteArray[] to short[], keeping channels interleaved.
            long numSamplesInAudio = byteArray.Length / speechAudioFormatInfo.BlockAlign * speechAudioFormatInfo.ChannelCount;

            short[] audioArray = new short[numSamplesInAudio];
            for (int i = 0; i < byteArray.Length; i += speechAudioFormatInfo.BlockAlign / speechAudioFormatInfo.ChannelCount)
            {
                if (speechAudioFormatInfo.BitsPerSample == 16)
                {
                    int audioIndex = i / 2;
                    audioArray[audioIndex] = 0;

                    // The ordering of the bytes for each 16-bit sample is Little-Endian!!!
                    audioArray[audioIndex] |= (short)(byteArray[i + 1] << 8);
                    audioArray[audioIndex] |= (short)byteArray[i];
                }
                else // if (speechAudioFormatInfo.BitsPerSample == 8)
                {
                    audioArray[i] = (short)byteArray[i];
                }
            }

            /* // For Debugging.
             * // Print out the short audio.
             * output = "audioshortArray: ";
             * for (int i = 0; i < numSamplesInAudio; ++i)
             *  output += audioArray[i] + ".";
             * System.Diagnostics.Debug.WriteLine(output);
             */

            return(audioArray);
        }
예제 #7
0
        private static void DumpRecordedAudio( RecognizedAudio audio )
        {
            if ( audio == null )
                return;

            int fileId = 0;
            string filename;
            while ( File.Exists( (filename = "RetainedAudio_" + fileId + ".wav") ) )
                fileId++;

            Console.WriteLine( "\nWriting file: {0}", filename );
            using ( var file = new FileStream( filename, System.IO.FileMode.CreateNew ) )
                audio.WriteToWaveStream( file );
        }
예제 #8
0
        public void SendVoice(RecognizedAudio audio)
        {
            using (var client = GetClient())
                using (var audioStream = new MemoryStream())
                {
                    if (client == null)
                    {
                        return;
                    }

                    audio.WriteToAudioStream(audioStream);
                    var response = client.PutAsync(_serverLocation, new StreamContent(audioStream)).Result;
                    SendServerConnectionEvent(this, new ConnectionEventArgs(response.IsSuccessStatusCode ? ConnectionStatus.Success : ConnectionStatus.ServerBusy, response));
                }
        }
        public void handleSpeechRecognizedResult(float confidence, string textResult,
                                                 string grammarName, string ruleName, KeyValuePair <string, SemanticValue>[] kvp,
                                                 double audioDuration, RecognizedAudio audio)
        {
            string fileP   = null;
            string relPath = null;

            //only write audio file when given path is not null
            if (saveAudioPath != null)
            {
                string indexerStr = waveFileNameIndexer + "";
                while (indexerStr.Length < 4)
                {
                    indexerStr = "0" + indexerStr;
                }

                fileP = saveAudioPath + "\\" + indexerStr + ".wav";

                relPath = EBookUtil.convertAudioToRelativePath(@fileP);
            }

            ActivityExecutor.add(new InternalSpeechRecognitionResultActivity(confidence, textResult, false,
                                                                             kvp, grammarName, ruleName, audioDuration, relPath));

            //only write audio file when given path is not null
            if (fileP != null)
            {
                //write audio to file
                FileStream stream = new FileStream(fileP, FileMode.Create);

                audio.WriteToWaveStream(stream);
                stream.Flush();
                stream.Close();
                unconfirmSaveAudioList.Add(fileP);
                Trace.WriteLine("write to file " + fileP);
                waveFileNameIndexer++;
            }
            String timeStamp = EBookUtil.GetTimestamp();

            string text = "\n" + confidence + "\t" + textResult + "(complete)\t\t" +
                          kvp.ToArray() + "\t" + grammarName + "\t" + timeStamp;

            Trace.WriteLine(text);
        }
예제 #10
0
        //Useless
        private void DumpRecordedAudio(RecognizedAudio audio)
        {
            if (audio == null)
            {
                return;
            }

            int    fileId = 0;
            string filename;

            while (File.Exists((filename = "RetainedAudio_" + fileId + ".wav")))
            {
                fileId++;
            }

            _remoteOperation.message("\nWriting file: " + filename);
            using (var file = new FileStream(filename, System.IO.FileMode.CreateNew))
                audio.WriteToWaveStream(file);
        }
예제 #11
0
        private static void DumpRecordedAudio(RecognizedAudio audio)
        {
            if (audio == null)
            {
                return;
            }

            int    fileId = 0;
            string filename;

            while (File.Exists((filename = "RetainedAudio_" + fileId + ".wav")))
            {
                fileId++;
            }

            Console.WriteLine("\nWriting file: {0}", filename);
            using (var file = new FileStream(filename, System.IO.FileMode.CreateNew))
                audio.WriteToWaveStream(file);
        }
예제 #12
0
        static void SaveRecordedAudio(RecognizedAudio audio)
        {
            if (audio == null)
            {
                return;
            }

            string filename = "save_" + count + ".wav";

            while (File.Exists(filename))
            {
                count++;
                filename = "save_" + count + ".wav";
            }

            Console.WriteLine("寫入檔案: " + filename);
            using (var file = new FileStream(filename, FileMode.CreateNew))
            {
                audio.WriteToWaveStream(file);
            }
        }
예제 #13
0
 //TODO: Abstract out Audio as well
 public RecognitionSuccess()
 {
     Semantics = new Dictionary<string, string>();
     Confidence = 1.0F;
     WordConfidence = new Dictionary<int, Tuple<string, float>>();
     Text = "";
     Audio = null;
     GrammarName = "";
     EngineName = "";
     Engine = null;
 }
        public override void manejar_comando_entrenamiento(SpeechRecognizedEventArgs e)
        {
            if (e.Result.Text.ToUpperInvariant() == siguiente_comando)
            {
                RecognizedAudio audio    = e.Result.Audio;
                TimeSpan        duration = audio.Duration;
                int             resultado;

                string path = Path.GetTempFileName();

                using (Stream outputStream = new FileStream(path, FileMode.Create))
                {
                    RecognizedAudio nameAudio = audio;
                    nameAudio.WriteToWaveStream(outputStream);
                    outputStream.Close();
                }

                resultado = AV.avf_agregar_muestra_WAV(entrenador, 0,
                                                       (dataGridView1[2, fila].Value.ToString().Split('_')[1] == "3") ? AV.AVP_MUESTRA_VALIDACION :
                                                       (AV.AVP_MUESTRA_ENTRENAMIENTO | AV.AVP_MUESTRA_VALIDACION), path);

                #if DEBUG
                File.Copy(path, dataGridView1[2, fila].Value.ToString() + ".wav", true);
                #endif
                File.Delete(path);

                switch (resultado)
                {
                case AV.AVS_SIN_MEMORIA:
                    Environment.Exit(1);
                    return;

                case AV.AVS_FORMATO_ARCHIVO_NO_VALIDO:
                    errorlabel.Text    = "La grabación está dañada. \nPor favor, reintente la operación.";
                    errorlabel.Visible = true;
                    errorpanel.Visible = true;
                    return;

                case AV.AVS_ARCHIVO_INACCESIBLE:
                    errorlabel.Text    = "No se pudo acceder a la voz grabada. \nPor favor, verifique que se pueda escribir en el disco \ny reintente la operación.";
                    errorlabel.Visible = true;
                    errorpanel.Visible = true;
                    return;

                case AV.AVS_MUESTREO_DEMASIADO_BAJO:
                case AV.AVS_MUESTREO_NO_ES_MULTIPLO_DE_4_HZ:
                    errorlabel.Text    = "La grabación no puede ser utilizada por la aplicación. \n Por favor, utilice otro micrófono y \nreinicie el proceso de entrenamiento.";
                    errorlabel.Visible = true;
                    errorpanel.Visible = true;
                    return;

                case AV.AVS_DURACION_MENOR_A_MEDIO_SEGUNDO:
                    errorlabel.Text    = "La grabación es demasiado corta. \nSe necesita una grabación de al menos medio segundo. \nPor favor, grabe el comando nuevamente, hablando lento y claro.";
                    errorlabel.Visible = true;
                    errorpanel.Visible = true;
                    return;

                default:
                    if (resultado >= 0)
                    {
                        break;
                    }
                    errorlabel.Text    = "Ocurrió un error inesperado, por favor reintente.";
                    errorlabel.Visible = true;
                    errorpanel.Visible = true;
                    return;
                }

                errorlabel.Visible = false;
                errorpanel.Visible = false;

                dataGridView1[1, fila].Value = "Reconocido";
                dataGridView1.ClearSelection();
                if (dataGridView1.RowCount == (fila + 1))
                {
                    lblTitle.Text         = "Entrenando";
                    label1.Text           = "El sistema se está entrenando para reconocer tu voz";
                    label2.Visible        = true;
                    label2.Text           = "La operación tardará aproximadamente 20 minutos";
                    dataGridView1.Visible = false;
                    pausaBtn.Visible      = false;
                    cafe.Visible          = true;
                    G.comando_form.Close();
                    entrenar();
                }
                else
                {
                    dataGridView1.Rows[++fila].Selected           = true;
                    dataGridView1.FirstDisplayedScrollingRowIndex = fila;
                    siguiente_comando = dataGridView1[0, fila].Value.ToString().ToUpperInvariant();
                }
            }
        }
예제 #15
0
        /// <summary>
        /// Estimates the emotion present in recognizedAudio
        /// </summary>
        /// <param name="recognizedAudio"></param>
        /// <returns>An int representing the emotion excitement level in the signal</returns>
        public int extractEmotion(RecognizedAudio recognizedAudio)
        {
            System.Diagnostics.Debug.WriteLine("SpeechEmotionRecognitionEngine::extractEmotion()");
            if (recognizedAudio == null)
            {
                System.Diagnostics.Debug.WriteLine("inputAudio is null");
                return(-1);
            }

            ///////////////////////////////////////////////////////
            // Extract Features
            ///////////////////////////////////////////////////////
            int windowSize = 2048;

            short[] audioArray = audioUtilities.getArrayFromRecognizedAudio(recognizedAudio);
            int     numWindows = audioArray.Length / (windowSize * recognizedAudio.Format.ChannelCount);

            // Calculate duration (in seconds).
            double duration = (double)audioArray.Length / recognizedAudio.Format.SamplesPerSecond;

            // MessageTextBox.AppendText("Audio Duration: " + duration + " seconds\n");

            // Calculate fundamental frequency for each window.
            float[][] freqOut = new float[numWindows][];
            for (int i = 0; i < numWindows; ++i)
            {
                freqOut[i] = new float[windowSize / 2];
            }

            double[] fundamentalFrequencies = new double[numWindows];
            for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex)
            {
                short[] inputAudio = new short[windowSize];
                for (int i = 0; i < windowSize; ++i)
                {
                    inputAudio[i] = audioArray[windowIndex * windowSize + i];
                }

                audioUtilities.computeSpectrum(inputAudio, freqOut[windowIndex], recognizedAudio.Format);
                // for (int i = 0; i < windowSize / 2 - 1; ++i)
                //     System.Diagnostics.Debug.WriteLine("freqOut[" + i + "]: " + freqOut[windowIndex][i]);

                int argmax = 0;
                for (int i = 1; i < windowSize / 2; ++i)
                {
                    if (freqOut[windowIndex][i] > freqOut[windowIndex][argmax])
                    {
                        argmax = i;
                    }
                }

                double lag = (windowSize / 2 - 1) - argmax;
                fundamentalFrequencies[windowIndex] = recognizedAudio.Format.SamplesPerSecond / lag;
            }
            System.Diagnostics.Debug.WriteLine("Fundamental Frequency: " + fundamentalFrequencies[0]); // * (recognizedAudio.Format.SamplesPerSecond / ((double)(windowSize / 2) / recognizedAudio.Format.ChannelCount)));
            // graphSpectrum(freqOut[0], recognizedAudio.Format);    // This isn't useful.

            // Calculate frequency response for each window.
            float[][] fftRealOutput    = new float[numWindows][];
            float[][] fftComplexOutput = new float[numWindows][];
            for (int i = 0; i < numWindows; ++i)
            {
                fftRealOutput[i]    = new float[windowSize];
                fftComplexOutput[i] = new float[windowSize];
            }

            for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex)
            {
                // Get float array for current window.
                float[] inputAudio = new float[windowSize];
                for (int i = 0; i < windowSize; ++i)
                {
                    inputAudio[i] = (float)audioArray[windowIndex * windowSize + i];
                }

                // Calculate fft for current window.
                audioUtilities.fft(inputAudio, null, fftRealOutput[windowIndex], fftComplexOutput[windowIndex], 1, recognizedAudio.Format);
                if (fftRealOutput[windowIndex] == null)
                {
                    break;
                }
            }
            // graphFrequencyResponse(fftRealOutput[0], fftComplexOutput[0], recognizedAudio.Format);
            // spectrographForm.drawSpectrograph(fftRealOutput, fftComplexOutput,
            //                                   audioUtilities, recognizedAudio.Format);

            // Calculate the pitch mean.
            double pitchMean = 0;
            int    n         = 0;

            // MessageTextBox.AppendText("Estimated Fundamental Frequencies: \n");
            for (int i = 0; i < numWindows; ++i)
            {
                // MessageTextBox.AppendText("  " + fundamentalFrequencies[i] + " Hz\n");
                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700)
                {
                    pitchMean += fundamentalFrequencies[i];
                    n++;
                }
            }
            pitchMean /= n;
            // MessageTextBox.AppendText("Pitch Mean: " + pitchMean + "\n");
            // TestPitchExtractionLabel.Text = "Estimated F0: " + pitchMean + " Hz";

            // Calculate pitch standard deviation.
            double pitchStdDev = 0;

            n = 0;
            for (int i = 0; i < numWindows; ++i)
            {
                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700)
                {
                    pitchStdDev += Math.Pow(fundamentalFrequencies[i] - pitchMean, 2);
                    n++;
                }
            }
            pitchStdDev /= n;
            pitchStdDev  = Math.Pow(pitchStdDev, 0.5);
            // MessageTextBox.AppendText("Pitch Std Dev: " + pitchStdDev + " Hz\n");

            // Calculate pitch velocity for each window.
            double[] pitchVelocities = new double[numWindows - 1];
            n = 0;
            for (int i = 1; i < fundamentalFrequencies.Length; ++i)
            {
                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700)
                {
                    pitchVelocities[i - 1] = fundamentalFrequencies[i] - fundamentalFrequencies[i - 1];
                    n++;
                }
            }

            // Calculate pitch acceleration for each window.
            double[] pitchAccelerations = new double[numWindows - 1];
            n = 0;
            for (int i = 1; i < pitchVelocities.Length; ++i)
            {
                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700)
                {
                    pitchAccelerations[i - 1] = pitchVelocities[i] - pitchVelocities[i - 1];
                    n++;
                }
            }

            // Calculate average pitch acceleration.
            double averagePitchAcceleration = 0;

            n = 0;
            for (int i = 0; i < pitchAccelerations.Length; ++i)
            {
                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700)
                {
                    averagePitchAcceleration += pitchAccelerations[i];
                    n++;
                }
            }

            averagePitchAcceleration /= n;
            // MessageTextBox.AppendText("Pitch Acceleration: " + averagePitchAcceleration + "\n");

            // Calculate log energy for each window.
            double[] logEnergies = new double[numWindows];
            n = 0;
            for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex)
            {
                short[] inputAudio = new short[windowSize];
                for (int i = 0; i < windowSize; ++i)
                {
                    inputAudio[i] = audioArray[windowIndex * windowSize + i];
                }

                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[windowIndex] > 40 && fundamentalFrequencies[windowIndex] < 700)
                {
                    logEnergies[windowIndex] = audioUtilities.computeLogEnergy(inputAudio, recognizedAudio.Format);
                    n++;
                }
                // System.Diagnostics.Debug.WriteLine("energy[" + windowIndex + "]: " + logEnergies[windowIndex]);
            }

            // Calculate average log energy.
            double logEnergyMean = 0;

            for (int i = 0; i < numWindows; ++i)
            {
                logEnergyMean += logEnergies[i];
            }
            logEnergyMean /= n;
            // MessageTextBox.AppendText("Log Energy Mean: " + logEnergyMean + "\n");

            // Calculate "Emotion Level" and update GUI.
            double emotionLevel = pitchStdDev * pitchStdDev;

            /*
             * EmotionLevelLabel.Text = "Emotion Level: " + emotionLevel + "\n";
             * if (emotionLevel <= EmotionLevelProgressBar.Maximum && emotionLevel >= 0)
             *  EmotionLevelProgressBar.Value = (int)emotionLevel;
             * else
             *  EmotionLevelProgressBar.Value = EmotionLevelProgressBar.Maximum;
             */

            // stateChanged();
            return((int)emotionLevel);
        }
        static void SaveRecordedAudio(RecognizedAudio audio)
        {
            if (audio == null)
                return;

            string filename = "save_" + count + ".wav" ;
            while (File.Exists(filename))
            {
                count++;
                filename = "save_" + count + ".wav";
            }

            Console.WriteLine("�g�J�ɮ�: " +  filename);
            using (var file = new FileStream(filename, FileMode.CreateNew))
            {
                audio.WriteToWaveStream(file);
            }
        }
예제 #17
0
        public short[] getArrayFromRecognizedAudio(RecognizedAudio inputAudio)
        {
            SpeechAudioFormatInfo speechAudioFormatInfo = inputAudio.Format;

            // Put the audio into an array.
            // Use a 16 bit short because 16 bits is the max sample size.
            MemoryStream audioStream = new MemoryStream();
            inputAudio.WriteToAudioStream(audioStream);
            byte[] byteArray = audioStream.ToArray();

            /* // For Debugging.
            // Print out the byte audio.
            String output = "audioByteArray: ";
            for (int i = 0; i < byteArray.Length; ++i)
                output += byteArray[i] + ".";
            System.Diagnostics.Debug.WriteLine(output);
            */

            // Convert byteArray[] to short[], keeping channels interleaved.
            long numSamplesInAudio = byteArray.Length / speechAudioFormatInfo.BlockAlign * speechAudioFormatInfo.ChannelCount;
            short[] audioArray = new short[numSamplesInAudio];
            for (int i = 0; i < byteArray.Length; i += speechAudioFormatInfo.BlockAlign / speechAudioFormatInfo.ChannelCount)
            {
                if (speechAudioFormatInfo.BitsPerSample == 16)
                {
                    int audioIndex = i / 2;
                    audioArray[audioIndex] = 0;

                    // The ordering of the bytes for each 16-bit sample is Little-Endian!!!
                    audioArray[audioIndex] |= (short)(byteArray[i + 1] << 8);
                    audioArray[audioIndex] |= (short)byteArray[i];
                }
                else // if (speechAudioFormatInfo.BitsPerSample == 8)
                    audioArray[i] = (short)byteArray[i];
            }

            /* // For Debugging.
            // Print out the short audio.
            output = "audioshortArray: ";
            for (int i = 0; i < numSamplesInAudio; ++i)
                output += audioArray[i] + ".";
            System.Diagnostics.Debug.WriteLine(output);
            */

            return audioArray;
        }
        /// <summary>
        /// Estimates the emotion present in recognizedAudio
        /// </summary>
        /// <param name="recognizedAudio"></param>
        /// <returns>An int representing the emotion excitement level in the signal</returns>
        public int extractEmotion(RecognizedAudio recognizedAudio)
        {
            System.Diagnostics.Debug.WriteLine("SpeechEmotionRecognitionEngine::extractEmotion()");
            if (recognizedAudio == null)
            {
                System.Diagnostics.Debug.WriteLine("inputAudio is null");
                return -1;
            }

            ///////////////////////////////////////////////////////
            // Extract Features
            ///////////////////////////////////////////////////////
            int windowSize = 2048;
            short[] audioArray = audioUtilities.getArrayFromRecognizedAudio(recognizedAudio);
            int numWindows = audioArray.Length / (windowSize * recognizedAudio.Format.ChannelCount);

            // Calculate duration (in seconds).
            double duration = (double)audioArray.Length / recognizedAudio.Format.SamplesPerSecond;
            // MessageTextBox.AppendText("Audio Duration: " + duration + " seconds\n");

            // Calculate fundamental frequency for each window.
            float[][] freqOut = new float[numWindows][];
            for (int i = 0; i < numWindows; ++i)
                freqOut[i] = new float[windowSize / 2];

            double[] fundamentalFrequencies = new double[numWindows];
            for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex)
            {
                short[] inputAudio = new short[windowSize];
                for (int i = 0; i < windowSize; ++i)
                    inputAudio[i] = audioArray[windowIndex * windowSize + i];

                audioUtilities.computeSpectrum(inputAudio, freqOut[windowIndex], recognizedAudio.Format);
                // for (int i = 0; i < windowSize / 2 - 1; ++i)
                //     System.Diagnostics.Debug.WriteLine("freqOut[" + i + "]: " + freqOut[windowIndex][i]);

                int argmax = 0;
                for (int i = 1; i < windowSize / 2; ++i)
                {
                    if (freqOut[windowIndex][i] > freqOut[windowIndex][argmax])
                        argmax = i;
                }

                double lag = (windowSize / 2 - 1) - argmax;
                fundamentalFrequencies[windowIndex] = recognizedAudio.Format.SamplesPerSecond / lag;
            }
            System.Diagnostics.Debug.WriteLine("Fundamental Frequency: " + fundamentalFrequencies[0]); // * (recognizedAudio.Format.SamplesPerSecond / ((double)(windowSize / 2) / recognizedAudio.Format.ChannelCount)));
            // graphSpectrum(freqOut[0], recognizedAudio.Format);    // This isn't useful.

            // Calculate frequency response for each window.
            float[][] fftRealOutput = new float[numWindows][];
            float[][] fftComplexOutput = new float[numWindows][];
            for (int i = 0; i < numWindows; ++i)
            {
                fftRealOutput[i] = new float[windowSize];
                fftComplexOutput[i] = new float[windowSize];
            }

            for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex)
            {
                // Get float array for current window.
                float[] inputAudio = new float[windowSize];
                for (int i = 0; i < windowSize; ++i)
                    inputAudio[i] = (float)audioArray[windowIndex * windowSize + i];

                // Calculate fft for current window.
                audioUtilities.fft(inputAudio, null, fftRealOutput[windowIndex], fftComplexOutput[windowIndex], 1, recognizedAudio.Format);
                if (fftRealOutput[windowIndex] == null)
                    break;
            }
            // graphFrequencyResponse(fftRealOutput[0], fftComplexOutput[0], recognizedAudio.Format);
            // spectrographForm.drawSpectrograph(fftRealOutput, fftComplexOutput,
            //                                   audioUtilities, recognizedAudio.Format);

            // Calculate the pitch mean.
            double pitchMean = 0;
            int n = 0;
            // MessageTextBox.AppendText("Estimated Fundamental Frequencies: \n");
            for (int i = 0; i < numWindows; ++i)
            {
                // MessageTextBox.AppendText("  " + fundamentalFrequencies[i] + " Hz\n");
                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700)
                {
                    pitchMean += fundamentalFrequencies[i];
                    n++;
                }
            }
            pitchMean /= n;
            // MessageTextBox.AppendText("Pitch Mean: " + pitchMean + "\n");
            // TestPitchExtractionLabel.Text = "Estimated F0: " + pitchMean + " Hz";

            // Calculate pitch standard deviation.
            double pitchStdDev = 0;
            n = 0;
            for (int i = 0; i < numWindows; ++i)
            {
                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700)
                {
                    pitchStdDev += Math.Pow(fundamentalFrequencies[i] - pitchMean, 2);
                    n++;
                }
            }
            pitchStdDev /= n;
            pitchStdDev = Math.Pow(pitchStdDev, 0.5);
            // MessageTextBox.AppendText("Pitch Std Dev: " + pitchStdDev + " Hz\n");

            // Calculate pitch velocity for each window.
            double[] pitchVelocities = new double[numWindows - 1];
            n = 0;
            for (int i = 1; i < fundamentalFrequencies.Length; ++i)
            {
                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700)
                {
                    pitchVelocities[i - 1] = fundamentalFrequencies[i] - fundamentalFrequencies[i - 1];
                    n++;
                }
            }

            // Calculate pitch acceleration for each window.
            double[] pitchAccelerations = new double[numWindows - 1];
            n = 0;
            for (int i = 1; i < pitchVelocities.Length; ++i)
            {
                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700)
                {
                    pitchAccelerations[i - 1] = pitchVelocities[i] - pitchVelocities[i - 1];
                    n++;
                }
            }

            // Calculate average pitch acceleration.
            double averagePitchAcceleration = 0;
            n = 0;
            for (int i = 0; i < pitchAccelerations.Length; ++i)
            {
                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[i] > 40 && fundamentalFrequencies[i] < 700)
                {
                    averagePitchAcceleration += pitchAccelerations[i];
                    n++;
                }
            }

            averagePitchAcceleration /= n;
            // MessageTextBox.AppendText("Pitch Acceleration: " + averagePitchAcceleration + "\n");

            // Calculate log energy for each window.
            double[] logEnergies = new double[numWindows];
            n = 0;
            for (int windowIndex = 0; windowIndex < numWindows; ++windowIndex)
            {
                short[] inputAudio = new short[windowSize];
                for (int i = 0; i < windowSize; ++i)
                    inputAudio[i] = audioArray[windowIndex * windowSize + i];

                // Only include the fundamental frequencies that are within a reasonable range for human voice.
                if (fundamentalFrequencies[windowIndex] > 40 && fundamentalFrequencies[windowIndex] < 700)
                {
                    logEnergies[windowIndex] = audioUtilities.computeLogEnergy(inputAudio, recognizedAudio.Format);
                    n++;
                }
                // System.Diagnostics.Debug.WriteLine("energy[" + windowIndex + "]: " + logEnergies[windowIndex]);
            }

            // Calculate average log energy.
            double logEnergyMean = 0;
            for (int i = 0; i < numWindows; ++i)
                logEnergyMean += logEnergies[i];
            logEnergyMean /= n;
            // MessageTextBox.AppendText("Log Energy Mean: " + logEnergyMean + "\n");

            // Calculate "Emotion Level" and update GUI.
            double emotionLevel = pitchStdDev * pitchStdDev;

            /*
            EmotionLevelLabel.Text = "Emotion Level: " + emotionLevel + "\n";
            if (emotionLevel <= EmotionLevelProgressBar.Maximum && emotionLevel >= 0)
                EmotionLevelProgressBar.Value = (int)emotionLevel;
            else
                EmotionLevelProgressBar.Value = EmotionLevelProgressBar.Maximum;
            */

            // stateChanged();
            return (int)emotionLevel;
        }
 /// <summary>
 /// Called with new audio when a word is detected.
 /// </summary>
 /// <param name="inputAudio"></param>
 public void hereIsAudio(RecognizedAudio inputAudio)
 {
     // Add audio to array to be analyzed.
     lock (recognizedAudioQueue)
     {
         recognizedAudioQueue.Add(inputAudio);
     }
     stateChanged();
 }
        // Loads the recognizedAudio into a memory stream and creates a Soundplayer object
        // for playing the audio. Passing null just disables the button
        private void SetRecognizedAudio(RecognizedAudio recognizedAudio)
        {
            if (recognizedAudio == null)
            {
                _recognizedAudioStream = null;
                _recognizedAudioPlayer = null;

                _buttonRecognizedAudio.IsEnabled = false;
            }
            else
            {
                _recognizedAudioStream = new MemoryStream();
                recognizedAudio.WriteToWaveStream(_recognizedAudioStream);
                _recognizedAudioStream.Position = 0;
                _recognizedAudioPlayer = new System.Media.SoundPlayer(_recognizedAudioStream);

                _buttonRecognizedAudio.IsEnabled = true;
            }
        }
예제 #21
0
 public RecognitionSuccess(AudioRecog r, RecognitionResult rres)
 {
     EngineName = "SAPI";
     Engine = r;
     Semantics = new Dictionary<string, string>();
     if (rres.Semantics != null)
     {
         foreach (KeyValuePair<String, SemanticValue> s in rres.Semantics)
         {
             Semantics.Add(s.Key, s.Value.Value.ToString()); //need the ToString() as this may be an int etc
         }
     }
     Audio = rres.Audio;
     Text = rres.Text;
     GrammarName = rres.Grammar.Name;
     Confidence = rres.Confidence;
     WordConfidence = new Dictionary<int, Tuple<string, float>>();
     int i = 0;
     foreach (System.Speech.Recognition.RecognizedWordUnit wd in rres.Words)
     {
         WordConfidence.Add(i, new Tuple<string, float>(wd.Text, wd.Confidence));
         i++;
     }
 }