public static float[] DecodeUsingMplayerAndSox(string fileIn, int srate, int secondsToAnalyze) { lock (_locker) { using (Process tosoxreadable = new Process()) { fileIn = Regex.Replace(fileIn, "%20", " "); DbgTimer t = new DbgTimer(); t.Start(); String curdir = System.Environment.CurrentDirectory; Dbg.WriteLine("Decoding: " + fileIn); String tempFile = System.IO.Path.GetTempFileName(); String soxreadablewav = tempFile + ".wav"; Dbg.WriteLine("Temporary wav file: " + soxreadablewav); tosoxreadable.StartInfo.FileName = "./NativeLibraries\\mplayer\\mplayer.exe"; tosoxreadable.StartInfo.Arguments = " -quiet -vc null -vo null -ao pcm:fast:waveheader \"" + fileIn + "\" -ao pcm:file=\\\"" + soxreadablewav + "\\\""; tosoxreadable.StartInfo.UseShellExecute = false; tosoxreadable.StartInfo.RedirectStandardOutput = true; tosoxreadable.StartInfo.RedirectStandardError = true; tosoxreadable.Start(); tosoxreadable.WaitForExit(); int exitCode = tosoxreadable.ExitCode; // 0 = succesfull // 1 = partially succesful // 2 = failed if (exitCode != 0) { string standardError = tosoxreadable.StandardError.ReadToEnd(); Console.Out.WriteLine(standardError); return(null); } #if DEBUG string standardOutput = tosoxreadable.StandardOutput.ReadToEnd(); Console.Out.WriteLine(standardOutput); #endif float[] floatBuffer = null; if (File.Exists(soxreadablewav)) { floatBuffer = DecodeUsingSox(soxreadablewav, srate, secondsToAnalyze); try { File.Delete(tempFile); File.Delete(soxreadablewav); } catch (IOException io) { Console.WriteLine(io); } } Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(floatBuffer); } } }
public static float[] DecodeUsingMplayer(string fileIn, int srate) { lock (_locker) { using (Process towav = new Process()) { fileIn = Regex.Replace(fileIn, "%20", " "); DbgTimer t = new DbgTimer(); t.Start(); String curdir = System.Environment.CurrentDirectory; Dbg.WriteLine("Decoding: " + fileIn); String tempFile = System.IO.Path.GetTempFileName(); String wav = tempFile + ".wav"; Dbg.WriteLine("Temporary wav file: " + wav); towav.StartInfo.FileName = "./NativeLibraries\\mplayer\\mplayer.exe"; towav.StartInfo.Arguments = " -quiet -ao pcm:fast:waveheader \"" + fileIn + "\" -format floatle -af resample=" + srate + ":0:2,pan=1:0.5:0.5 -channels 1 -vo null -vc null -ao pcm:file=\\\"" + wav + "\\\""; towav.StartInfo.UseShellExecute = false; towav.StartInfo.RedirectStandardOutput = true; towav.StartInfo.RedirectStandardError = true; towav.Start(); towav.WaitForExit(); int exitCode = towav.ExitCode; // 0 = succesfull // 1 = partially succesful // 2 = failed if (exitCode != 0) { string standardError = towav.StandardError.ReadToEnd(); Console.Out.WriteLine(standardError); return(null); } #if DEBUG string standardOutput = towav.StandardOutput.ReadToEnd(); Console.Out.WriteLine(standardOutput); #endif RiffRead riff = new RiffRead(wav); riff.Process(); float[] floatBuffer = riff.SoundData[0]; try { File.Delete(tempFile); //File.Delete(wav); } catch (IOException io) { Console.WriteLine(io); } Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(floatBuffer); } } }
public Matrix Apply(ref Matrix m) { DbgTimer t = new DbgTimer(); t.Start(); Matrix mel = new Matrix(filterWeights.rows, m.columns); /* * // Performance optimization of ... * mel = filterWeights.Multiply(m); * for (int i = 0; i < mel.rows; i++) { * for (int j = 0; j < mel.columns; j++) { * mel.d[i, j] = (mel.d[i, j] < 1.0f ? 0 : (float)(10.0 * Math.Log10(mel.d[i, j]))); * //mel.d[i, j] = (float)(10.0 * Math.Log10(mel.d[i, j])); * } * } */ int mc = m.columns; int mr = m.rows; int melcolumns = mel.columns; int fwc = filterWeights.columns; int fwr = filterWeights.rows; unsafe { fixed(float *md = m.d, fwd = filterWeights.d, meld = mel.d) { for (int i = 0; i < mc; i++) { for (int k = 0; k < fwr; k++) { int idx = k * melcolumns + i; int kfwc = k * fwc; for (int j = 0; j < mr; j++) { meld[idx] += fwd[kfwc + j] * md[j * mc + i]; } meld[idx] = (meld[idx] < 1.0f ? 0 : (float)(10.0 * Math.Log10(meld[idx]))); } } } } Matrix mfcc = dct.Multiply(mel); Dbg.WriteLine("mfcc (MfccLessOptimized) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(mfcc); }
/// <summary> /// Computes a Scms model from the MFCC representation of a song. /// </summary> /// <param name="mfcc">Comirva.Audio.Util.Maths.Matrix mfcc</param> /// <returns></returns> public static Scms GetScmsNoInverse(Comirva.Audio.Util.Maths.Matrix mfccs, string name) { DbgTimer t = new DbgTimer(); t.Start(); Comirva.Audio.Util.Maths.Matrix mean = mfccs.Mean(2); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { mean.WriteText(name + "_mean.txt"); } mean.DrawMatrixGraph(name + "_mean.png"); } #endif // Covariance Comirva.Audio.Util.Maths.Matrix covarMatrix = mfccs.Cov(mean); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { covarMatrix.WriteText(name + "_covariance.txt"); } covarMatrix.DrawMatrixGraph(name + "_covariance.png"); } #endif Comirva.Audio.Util.Maths.Matrix covarMatrixInv = new Comirva.Audio.Util.Maths.Matrix(covarMatrix.Rows, covarMatrix.Columns); // Store the Mean, Covariance, Inverse Covariance in an optimal format. int dim = mean.Rows; Scms s = new Scms(dim); int l = 0; for (int i = 0; i < dim; i++) { s.mean[i] = (float)mean.MatrixData[i][0]; for (int j = i; j < dim; j++) { s.cov[l] = (float)covarMatrix.MatrixData[i][j]; s.icov[l] = (float)covarMatrixInv.MatrixData[i][j]; l++; } } Dbg.WriteLine("GetScmsNoInverse - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return(s); }
public Matrix Apply(ref Matrix m) { DbgTimer t = new DbgTimer(); t.Start(); Matrix mel = new Matrix(filterWeights.rows, m.columns); int mc = m.columns; int melcolumns = mel.columns; int fwc = filterWeights.columns; int fwr = filterWeights.rows; unsafe { fixed(float *md = m.d, fwd = filterWeights.d, meld = mel.d) { for (int i = 0; i < mc; i++) { for (int k = 0; k < fwr; k++) { int idx = k * melcolumns + i; int kfwc = k * fwc; // The filter weights matrix is mostly 0. // So only multiply non-zero elements! for (int j = fwFT[k, 0]; j < fwFT[k, 1]; j++) { meld[idx] += fwd[kfwc + j] * md[j * mc + i]; } meld[idx] = (meld[idx] < 1.0f ? 0 : (float)(10.0 * Math.Log10(meld[idx]))); } } } } try { Matrix mfcc = dct.Multiply(mel); long stop = 0; t.Stop(ref stop); Dbg.WriteLine("Mirage - mfcc Execution Time: {0}ms", stop); return(mfcc); } catch (MatrixDimensionMismatchException) { throw new MfccFailedException(); } }
public static Scms Analyze(string file_path) { DbgTimer t = new DbgTimer(); t.Start(); Matrix stftdata = ad.Decode(file_path); Matrix mfccdata = mfcc.Apply(ref stftdata); Scms scms = Scms.GetScms(mfccdata); long stop = 0; t.Stop(ref stop); Dbg.WriteLine("Mirage - Total Execution Time: {0}ms", stop); return(scms); }
// Computes a Scms model from the MFCC representation of a song. public static Scms GetScms(Matrix mfcc) { DbgTimer t = new DbgTimer(); t.Start(); // Mean Vector m = mfcc.Mean(); // Covariance Matrix c = mfcc.Covariance(m); // Inverse Covariance Matrix ic; try { ic = c.Inverse(); } catch (MatrixSingularException) { throw new ScmsImpossibleException(); } // Store the Mean, Covariance, Inverse Covariance in an optimal format. int dim = m.rows; Scms s = new Scms(dim); int l = 0; for (int i = 0; i < dim; i++) { s.mean[i] = m.d[i, 0]; for (int j = i; j < dim; j++) { s.cov[l] = c.d[i, j]; s.icov[l] = ic.d[i, j]; l++; } } long stop = 0; t.Stop(ref stop); Dbg.WriteLine("Mirage - scms created in: {0}ms", stop); return(s); }
/// <summary> /// Apply the STFT on the audiodata /// </summary> /// <param name="audiodata">Audiodata to apply the STFT on</param> /// <returns>A matrix with the result of the STFT</returns> public Matrix Apply(float[] audiodata) { DbgTimer t = new DbgTimer(); t.Start(); // calculate how many hops (bands) we have using the current overlap (hopsize) int hops = (audiodata.Length - winsize) / hopsize; // Create a Matrix with "winsize" Rows and "hops" Columns // Matrix[Row, Column] Matrix stft = new Matrix(winsize / 2 + 1, hops); for (int i = 0; i < hops; i++) { fft.ComputeMirageMatrixUsingFftw(ref stft, i, audiodata, i * hopsize); } Dbg.WriteLine("Stft (ComputeMirageMatrix) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(stft); }
public Matrix Decode(string file) { int frames = 0; int size = 0; int ret = 0; IntPtr data = mirageaudio_decode(ma, file, ref frames, ref size, ref ret); if (ret == -1) { throw new AudioDecoderErrorException(); } else if (ret == -2) { throw new AudioDecoderCanceledException(); } else if ((frames <= 0) || (size <= 0)) { throw new AudioDecoderErrorException(); // No data } Dbg.WriteLine("Mirage - decoded frames={0},size={1}", frames, size); // Sort the frames by total energy (frame selection) float [] frameselection = new float[frames]; int [] framepos = new int[frames]; unsafe { float *stft_unsafe = (float *)data; for (int j = 0; j < frames; j++) { frameselection[j] = 0; framepos[j] = j; for (int i = 0; i < size; i++) { frameselection[j] += stft_unsafe[i * frames + j]; } } } Array.Sort(frameselection, framepos); // Save the high energy frames to the Matrix int copyframes = frames / 2; Matrix stft = new Matrix(size, copyframes); unsafe { float *stft_unsafe = (float *)data; fixed(float *stftd = stft.d) { for (int j = 0; j < copyframes; j++) { for (int i = 0; i < size; i++) { stftd[i * copyframes + j] = stft_unsafe[i * frames + framepos[copyframes + j]]; } } } } return(stft); }
public static float[] Decode(string fileIn, int srate, int secondsToAnalyze) { DbgTimer t = new DbgTimer(); t.Start(); float[] floatBuffer = null; // check if file exists if (fileIn != null && fileIn != "") { FileInfo fi = new FileInfo(fileIn); if (!fi.Exists) { Console.Out.WriteLine("No file found {0}!", fileIn); return(null); } } // Try to use Un4Seen Bass BassProxy bass = BassProxy.Instance; double duration = bass.GetDurationInSeconds(fileIn); if (duration > 0) { Dbg.WriteLine("Using BASS to decode the file ..."); // duration in seconds if (duration > secondsToAnalyze) { // find segment to extract double startSeconds = (duration / 2 - (secondsToAnalyze / 2)); if (startSeconds < 0) { startSeconds = 0; } floatBuffer = bass.ReadMonoFromFile(fileIn, srate, secondsToAnalyze * 1000, (int)(startSeconds * 1000)); // if this failes, the duration read from the tags was wrong or it is something wrong with the audio file if (floatBuffer == null) { IOUtils.LogMessageToFile(Mir.WARNING_FILES_LOG, fileIn); } } else { // return whole file floatBuffer = bass.ReadMonoFromFile(fileIn, srate, 0, 0); // if this failes, the duration read from the tags was wrong or it is something wrong with the audio file if (floatBuffer == null) { IOUtils.LogMessageToFile(Mir.WARNING_FILES_LOG, fileIn); } } } // Bass failed reading or never even tried, so use another alternative if (floatBuffer == null) { Dbg.WriteLine("Using MPlayer and SOX to decode the file ..."); fileIn = Regex.Replace(fileIn, "%20", " "); floatBuffer = DecodeUsingMplayerAndSox(fileIn, srate, secondsToAnalyze); } return(floatBuffer); }
public static float[] DecodeUsingSox(string fileIn, int srate, int secondsToAnalyze) { lock (_locker) { using (Process toraw = new Process()) { fileIn = Regex.Replace(fileIn, "%20", " "); DbgTimer t = new DbgTimer(); t.Start(); String curdir = System.Environment.CurrentDirectory; Dbg.WriteLine("Decoding: " + fileIn); String tempFile = System.IO.Path.GetTempFileName(); String raw = tempFile + "_raw.wav"; Dbg.WriteLine("Temporary raw file: " + raw); toraw.StartInfo.FileName = "./NativeLibraries\\sox\\sox.exe"; toraw.StartInfo.Arguments = " \"" + fileIn + "\" -r " + srate + " -e float -b 32 -G -t raw \"" + raw + "\" channels 1"; toraw.StartInfo.UseShellExecute = false; toraw.StartInfo.RedirectStandardOutput = true; toraw.StartInfo.RedirectStandardError = true; toraw.Start(); toraw.WaitForExit(); int exitCode = toraw.ExitCode; // 0 = succesfull // 1 = partially succesful // 2 = failed if (exitCode != 0) { string standardError = toraw.StandardError.ReadToEnd(); Console.Out.WriteLine(standardError); return(null); } #if DEBUG string standardOutput = toraw.StandardOutput.ReadToEnd(); Console.Out.WriteLine(standardOutput); #endif float[] floatBuffer; FileStream fs = null; try { FileInfo fi = new FileInfo(raw); fs = fi.OpenRead(); int bytes = (int)fi.Length; int samples = bytes / sizeof(float); if ((samples * sizeof(float)) != bytes) { return(null); } // if the audio file is larger than seconds to analyze, // find a proper section to exctract if (bytes > secondsToAnalyze * srate * sizeof(float)) { int seekto = (bytes / 2) - ((secondsToAnalyze / 2) * sizeof(float) * srate); Dbg.WriteLine("Extracting section: seekto = " + seekto); bytes = (secondsToAnalyze) * srate * sizeof(float); fs.Seek((samples / 2 - (secondsToAnalyze / 2) * srate) * sizeof(float), SeekOrigin.Begin); } BinaryReader br = new BinaryReader(fs); byte[] bytesBuffer = new byte[bytes]; br.Read(bytesBuffer, 0, bytesBuffer.Length); int items = (int)bytes / sizeof(float); floatBuffer = new float[items]; for (int i = 0; i < items; i++) { floatBuffer[i] = BitConverter.ToSingle(bytesBuffer, i * sizeof(float)); // * 65536.0f; } } catch (System.IO.FileNotFoundException) { floatBuffer = null; } finally { if (fs != null) { fs.Close(); } try { File.Delete(tempFile); File.Delete(raw); } catch (IOException io) { Console.WriteLine(io); } Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms"); } return(floatBuffer); } } }
private void GaussJordan(ref decimal [,] a, int n, ref decimal [,] b, int m) { int [] indxc = new int[n + 1]; int [] indxr = new int[n + 1]; int [] ipiv = new int[n + 1]; int i, icol = 0, irow = 0, j, k, l, ll; decimal big, dum, pivinv, temp; for (j = 1; j <= n; j++) { ipiv[j] = 0; } for (i = 1; i <= n; i++) { big = 0; for (j = 1; j <= n; j++) { if (ipiv[j] != 1) { for (k = 1; k <= n; k++) { if (ipiv[k] == 0) { if (Math.Abs(a[j, k]) >= big) { big = Math.Abs(a[j, k]); irow = j; icol = k; } } else if (ipiv[k] > 1) { Dbg.WriteLine("Mirage - Gauss/Jordan Singular Matrix (1)"); throw new MatrixSingularException(); } } } } ipiv[icol]++; if (irow != icol) { for (l = 1; l <= n; l++) { temp = a[irow, l]; a[irow, l] = a[icol, l]; a[icol, l] = temp; } for (l = 1; l <= m; l++) { temp = b[irow, l]; b[irow, l] = b[icol, l]; b[icol, l] = temp; } } indxr[i] = irow; indxc[i] = icol; if (a[icol, icol] == 0) { Dbg.WriteLine("Mirage - Gauss/Jordan Singular Matrix (2)"); throw new MatrixSingularException(); } pivinv = 1 / a[icol, icol]; a[icol, icol] = 1; for (l = 1; l <= n; l++) { a[icol, l] *= pivinv; } for (l = 1; l <= m; l++) { b[icol, l] *= pivinv; } for (ll = 1; ll <= n; ll++) { if (ll != icol) { dum = a[ll, icol]; a[ll, icol] = 0; for (l = 1; l <= n; l++) { a[ll, l] -= a[icol, l] * dum; } for (l = 1; l <= m; l++) { b[ll, l] -= b[icol, l] * dum; } } } } for (l = n; l >= 1; l--) { if (indxr[l] != indxc[l]) { for (k = 1; k <= n; k++) { temp = a[k, indxr[l]]; a[k, indxr[l]] = a[k, indxc[l]]; a[k, indxc[l]] = temp; } } } }
/// <summary> /// Computes a Scms model from the MFCC representation of a song. /// </summary> /// <param name="mfcc">Comirva.Audio.Util.Maths.Matrix mfcc</param> /// <returns></returns> public static Scms GetScms(Comirva.Audio.Util.Maths.Matrix mfccs, string name) { DbgTimer t = new DbgTimer(); t.Start(); Comirva.Audio.Util.Maths.Matrix mean = mfccs.Mean(2); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { mean.WriteText(name + "_mean.txt"); } mean.DrawMatrixGraph(name + "_mean.png"); } #endif // Covariance Comirva.Audio.Util.Maths.Matrix covarMatrix = mfccs.Cov(mean); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { covarMatrix.WriteText(name + "_covariance.txt"); } covarMatrix.DrawMatrixGraph(name + "_covariance.png"); } #endif // Inverse Covariance Comirva.Audio.Util.Maths.Matrix covarMatrixInv; try { covarMatrixInv = covarMatrix.InverseGausJordan(); } catch (Exception) { Dbg.WriteLine("MatrixSingularException - Scms failed!"); return(null); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { covarMatrixInv.WriteAscii(name + "_inverse_covariance.ascii"); } covarMatrixInv.DrawMatrixGraph(name + "_inverse_covariance.png"); } #endif // Store the Mean, Covariance, Inverse Covariance in an optimal format. int dim = mean.Rows; Scms s = new Scms(dim); int l = 0; for (int i = 0; i < dim; i++) { s.mean[i] = (float)mean.MatrixData[i][0]; for (int j = i; j < dim; j++) { s.cov[l] = (float)covarMatrix.MatrixData[i][j]; s.icov[l] = (float)covarMatrixInv.MatrixData[i][j]; l++; } } Dbg.WriteLine("Compute Scms - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return(s); }
/// <summary> /// Computes a Scms model from the MFCC representation of a song. /// </summary> /// <param name="mfcc">Mirage.Matrix mfcc</param> /// <returns></returns> public static Scms GetScms(Matrix mfcc, string name) { DbgTimer t = new DbgTimer(); t.Start(); // Mean Vector m = mfcc.Mean(); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { m.WriteText(name + "_mean_orig.txt"); } m.DrawMatrixGraph(name + "_mean_orig.png"); } #endif // Covariance Matrix c = mfcc.Covariance(m); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { c.WriteText(name + "_covariance_orig.txt"); } c.DrawMatrixGraph(name + "_covariance_orig.png"); } #endif // Inverse Covariance Matrix ic; try { ic = c.Inverse(); } catch (MatrixSingularException) { //throw new ScmsImpossibleException(); Dbg.WriteLine("MatrixSingularException - Scms failed!"); return(null); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { ic.WriteAscii(name + "_inverse_covariance_orig.txt"); } ic.DrawMatrixGraph(name + "_inverse_covariance_orig.png"); } #endif // Store the Mean, Covariance, Inverse Covariance in an optimal format. int dim = m.rows; Scms s = new Scms(dim); int l = 0; for (int i = 0; i < dim; i++) { s.mean[i] = m.d[i, 0]; for (int j = i; j < dim; j++) { s.cov[l] = c.d[i, j]; s.icov[l] = ic.d[i, j]; l++; } } Dbg.WriteLine("(Mirage) - scms created in: {0} ms", t.Stop().TotalMilliseconds); return(s); }