/// <summary> /// Remove the silent segment from the given audio signal /// </summary> /// <param name="signal">original signal</param> /// <returns>signal after removing the silent segment(s) from it</returns> public static AudioSignal RemoveSilence(AudioSignal signal) { AudioSignal filteredSignal = new AudioSignal(); filteredSignal.sampleRate = signal.sampleRate; filteredSignal.signalLengthInMilliSec = signal.signalLengthInMilliSec; filteredSignal.data = MFCC.MFCC.RemoveSilence(signal.data, signal.sampleRate, signal.signalLengthInMilliSec, 20); return filteredSignal; }
/// <summary> /// Open the given audio file and return an "AudioSignal" with the following info: /// 1. data[]: array of audio samples /// 2. sample rate /// 3. signal length in milli sec /// </summary> /// <param name="filePath">audio file path</param> /// <returns>AudioSignal containing its data, sample rate and length in ms</returns> public static AudioSignal OpenAudioFile(string filePath) { WaveDecoder waveDecoder = new WaveDecoder(filePath); AudioSignal signal = new AudioSignal(); signal.sampleRate = waveDecoder.SampleRate; signal.signalLengthInMilliSec = waveDecoder.Duration; Signal tempSignal = waveDecoder.Decode(waveDecoder.Frames); signal.data = new double[waveDecoder.Frames]; tempSignal.CopyTo(signal.data); return signal; }
public static void SaveSequenceInDatabase(Sequence toBeSavedSequence, string username, AudioSignal signal) { //UPDATE //you should save the four values in the last row before the username, with the order (first, last, min, max) respectively FileStream SavingStream = new FileStream("savedSequences.txt", FileMode.Append); StreamWriter Saving = new StreamWriter(SavingStream); double TempFeature = 0f; StringBuilder FramesRow = new StringBuilder(); int size = signal.data.Length; double maxElement = double.MinValue, minElement = double.MaxValue, firstElement, lastElement; for (int i = 0; i < 13; i++) { for (int j = 0; j < toBeSavedSequence.Frames.Length; j++) { TempFeature = toBeSavedSequence.Frames[j].Features[i]; FramesRow.Append(TempFeature.ToString() + "|"); } Saving.WriteLine(FramesRow); FramesRow.Clear(); //clear it to start a new row (VIP) } firstElement = signal.data[0]; lastElement = signal.data[size - 1]; for (int i = 0; i < size; i++) { maxElement = Math.Max(maxElement, signal.data[i]); minElement = Math.Min(minElement, signal.data[i]); } // UPDATE: On 26/12 @ 6:50 - Writing the 4 values into the file Saving.WriteLine(firstElement + " " + lastElement + " " + minElement + " " + maxElement); Saving.WriteLine("Username:" + username); Saving.Close(); }
//Identify button opens the file explorer to choose a pre existing audio file or recorded sound to be identified private void btnIdentify_Click(object sender, EventArgs e) { ClosestMatch User = new ClosestMatch(); if (sequence != null && RecordRadio.Checked == false) { var watch = Stopwatch.StartNew(); User = FileOperations.GetUserName(sequence, signal, WithPruningRadioBTN.Checked); watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine("Elapsed Milliseconds = " + elapsedMs); MessageBox.Show("Username: "******"\nWith Minimum Difference: " + User.MinimumDistance.ToString()); } else if (SavedRadio.Checked) { OpenFileDialog open = new OpenFileDialog(); if (open.ShowDialog() == DialogResult.OK) { isRecorded = false; path = open.FileName; //Open the selected audio file signal = AudioOperations.OpenAudioFile(path); sequence = AudioOperations.ExtractFeatures(ref signal); var watch = Stopwatch.StartNew(); User = FileOperations.GetUserName(sequence, signal, WithPruningRadioBTN.Checked); watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine("Elapsed Milliseconds = " + elapsedMs); MessageBox.Show("Username: "******"\nWith Minimum Difference: " + User.MinimumDistance.ToString()); } } //Dev: Omar Moataz Abdel-Wahed Attia else { if (isRecorded) { InitializeDecoder(); //Initializes a decoder to get the value of the recorded stream. AudioSignal signal = new AudioSignal(); //Signal sent to Feature extraction function. signal.data = new double[this.decoder.frames]; //Reserve space for double array that will be filled later. signal.sampleRate = this.decoder.GetTempSignal().SampleRate; //TempSignal has the double array I need to extract features, Check function Decoder::getWholeSignal() for more explanation. this.decoder.GetTempSignal().CopyTo(signal.data); //Copies the values of the signal to an object "signal" of type AudioSignal which is sent to feature extraction. sequence = AudioOperations.ExtractFeatures(ref signal); //Get name of user that has the closest match. var watch = Stopwatch.StartNew(); User = FileOperations.GetUserName(sequence, signal, WithPruningRadioBTN.Checked); watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine("Elapsed Milliseconds = " + elapsedMs); MessageBox.Show("Username: "******"\nWith Minimum Difference: " + User.MinimumDistance.ToString()); } else { MessageBox.Show("Please record your voice first!"); //In case the user tries to identify without recording any sound. } } sequence = null; updateButtons(); }
public void openToolStripMenuItem_Click(object sender, EventArgs e) { OpenFileDialog open = new OpenFileDialog(); if (open.ShowDialog() == DialogResult.OK) { isRecorded = false; path = open.FileName; //Open the selected audio file signal = AudioOperations.OpenAudioFile(path); sequence = AudioOperations.ExtractFeatures(ref signal); updateButtons(); } }
private void saveFileDialog1_FileOk(object sender, System.ComponentModel.CancelEventArgs e) { if (this.encoder != null) { Stream fileStream = saveFileDialog1.OpenFile(); this.encoder.Save(fileStream); path = saveFileDialog1.FileName; signal = AudioOperations.OpenAudioFile(path); sequence = AudioOperations.ExtractFeatures(ref signal); } }
//====================================================== /* Dev: Omar Moataz Abdel-Wahed Attia Last Edit: 12/8/2015 To understand the code in function GetUserName, you need to understand the file structure I'm looping over. The file will contain 13 lines which represent a Frame (0-12) (Each Column is a frame) on the 14th line, it will contain the name of the person that's tied to the previous sequence. */ //====================================================== public static ClosestMatch GetUserName(Sequence sequence, AudioSignal signal, bool pruned) { ClosestMatch User = new ClosestMatch(); //Opening the file. using (StreamReader Reader = new StreamReader("savedSequences.txt")) { //Initializing a new sequence. Sequence ToBeCompared = new Sequence(); //This line string contains every line I go through in the file string Line; //Holds the value of the current frame int Index = 0; bool flag = true, Updated = false; //Variables used in lowerbounding double FirstElement = 0, LastElement = 0, MaxElement = 0, MinElement = 0; while ((Line = Reader.ReadLine()) != null) { if (Index == 13) { double TrueDistance; string[] Temp = Line.Split(' '); /*Just a string array that holds the values I'll take into FirstElement, LastElement, MinElement and MaxElement.*/ FirstElement = double.Parse(Temp[0]); LastElement = double.Parse(Temp[1]); MinElement = double.Parse(Temp[2]); MaxElement = double.Parse(Temp[3]); if (pruned) { double LowerBoundDistance = DynamicTimeWarpingOperations.LowerBound_Kim(signal, FirstElement, LastElement, MinElement, MaxElement); if (LowerBoundDistance > User.MinimumDistance) goto skip; TrueDistance = DynamicTimeWarpingOperations.Pruned_DTW_Distance(ToBeCompared, sequence); } else { TrueDistance = DynamicTimeWarpingOperations.DTW_Distance(ToBeCompared, sequence); } //Here I compare the two Distances together to see if I need to update the minimum or not. if (TrueDistance < User.MinimumDistance) { //Here I update the minimum distance between two values. User.MinimumDistance = TrueDistance; Updated = true; } skip: //This is a reinitialization just to clear out old values from the previous iteration flag = true; ToBeCompared = new Sequence(); } else if(Index == 14) { if (Updated) { //I update the name of the person to line because on the 13th index line, it'll have the name of the person. User.Username = Line.Substring(9, Line.Length - 9); } Updated = false; //resetting the update value. Index = -1; //So, it goes back to 0 when the loop continues. } else { string[] ExtractedStringsFromLine = Line.Split('|'); if (flag == true) { ToBeCompared.Frames = new MFCCFrame[ExtractedStringsFromLine.Length - 1]; } //Here I split all the values from every line to an array of Strings. for (int i = 0; i < ExtractedStringsFromLine.Length - 1; i++) { if (flag == true) { ToBeCompared.Frames[i] = new MFCCFrame(); } ToBeCompared.Frames[i].Features[Index] = double.Parse(ExtractedStringsFromLine[i]); } flag = false; } //I increment the index of the 2D array for the next iteration through the file. ++Index; } } //I return type ClosestMatch. return User; }
static private AudioSignal openNISTWav(string filename) { int sample_rate = 0, sample_count =0, sample_n_bytes = 0; StreamReader reader = new StreamReader(filename); while(true) { string line = reader.ReadLine(); var splittedLine = line.Split(' '); if (splittedLine[0] == "sample_count") { sample_count = int.Parse(splittedLine[2]); } else if (splittedLine[0] == "sample_rate") { sample_rate = int.Parse(splittedLine[2]); } else if (splittedLine[0] == "sample_n_bytes") { sample_n_bytes = int.Parse(splittedLine[2]); } else if (splittedLine[0] == "end_head") break; } reader.Close(); byte[] wav = File.ReadAllBytes(filename); //header offset. int pos = 1024; int samples = (wav.Length - pos) / sample_n_bytes; // 2 bytes per sample (16 bit sound mono) int altsamples = sample_count / sample_n_bytes; double[] data = new double[sample_count]; // Write to double array: int i = 0; while (pos < wav.Length) { data[i] = bytesToDouble(wav[pos], wav[pos + 1]); pos += 2; i++; } AudioSignal signal = new AudioSignal(); signal.sampleRate = sample_rate; signal.data = data; signal.signalLengthInMilliSec = (double) 1000.0 * sample_count / sample_rate ; return signal; }
/// <summary> /// Extract MFCC coefficients of the sequence of frames for the given AudioSignal. /// Each frame (feature) consists of 13 coefficients /// </summary> /// <param name="signal">Audio signal to extract its features</param> /// <returns>Sequence of features (13 x NumOfFrames)</returns> public static Sequence ExtractFeatures(ref AudioSignal signal) { //signal = RemoveSilence(signal); return MFCC.MFCC.ExtractFeatures(signal.data, signal.sampleRate); }
//Lower bounding function used for pruning public static double LowerBound_Kim(AudioSignal signal, double firstElement, double lastElement, double minElement, double maxElement) { int size= signal.data.Length; double maxElement1 = double.MinValue, minElement1 = double.MaxValue, firstElement1, lastElement1; firstElement1 = signal.data[0]; lastElement1 = signal.data[size - 1]; for (int i = 0; i < size; i++) { maxElement1 = Math.Max(maxElement1, signal.data[i]); minElement1 = Math.Min(minElement1, signal.data[i]); } double differenceBetweenFirsts = Math.Abs(firstElement1 - firstElement); double differenceBetweenLasts = Math.Abs(lastElement1 - lastElement); double differenceBetweenMinimums = Math.Abs(minElement1 - minElement); double differenceBetweenMaximums = Math.Abs(maxElement1 - maxElement); double lowerBoundValue = Math.Max( Math.Max(differenceBetweenFirsts * differenceBetweenFirsts, differenceBetweenLasts * differenceBetweenLasts), Math.Max(differenceBetweenMinimums * differenceBetweenMinimums, differenceBetweenMaximums * differenceBetweenMaximums)); //return maximum squared difference of the two sequences first, last, minimum and maximum elements return lowerBoundValue; }
public AddUser(Sequence sequence_, AudioSignal signal_) { sequence = sequence_; signal = signal_; InitializeComponent(); }