public static float[] DecodeUsingMplayerAndSox(string fileIn, int srate, int secondsToAnalyze) { lock (_locker) { using (Process tosoxreadable = new Process()) { fileIn = Regex.Replace(fileIn, "%20", " "); DbgTimer t = new DbgTimer(); t.Start(); String curdir = System.Environment.CurrentDirectory; Dbg.WriteLine("Decoding: " + fileIn); String tempFile = System.IO.Path.GetTempFileName(); String soxreadablewav = tempFile + ".wav"; Dbg.WriteLine("Temporary wav file: " + soxreadablewav); tosoxreadable.StartInfo.FileName = "./NativeLibraries\\mplayer\\mplayer.exe"; tosoxreadable.StartInfo.Arguments = " -quiet -vc null -vo null -ao pcm:fast:waveheader \"" + fileIn + "\" -ao pcm:file=\\\"" + soxreadablewav + "\\\""; tosoxreadable.StartInfo.UseShellExecute = false; tosoxreadable.StartInfo.RedirectStandardOutput = true; tosoxreadable.StartInfo.RedirectStandardError = true; tosoxreadable.Start(); tosoxreadable.WaitForExit(); int exitCode = tosoxreadable.ExitCode; // 0 = succesfull // 1 = partially succesful // 2 = failed if (exitCode != 0) { string standardError = tosoxreadable.StandardError.ReadToEnd(); Console.Out.WriteLine(standardError); return(null); } #if DEBUG string standardOutput = tosoxreadable.StandardOutput.ReadToEnd(); Console.Out.WriteLine(standardOutput); #endif float[] floatBuffer = null; if (File.Exists(soxreadablewav)) { floatBuffer = DecodeUsingSox(soxreadablewav, srate, secondsToAnalyze); try { File.Delete(tempFile); File.Delete(soxreadablewav); } catch (IOException io) { Console.WriteLine(io); } } Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(floatBuffer); } } }
public static float[] DecodeUsingMplayer(string fileIn, int srate) { lock (_locker) { using (Process towav = new Process()) { fileIn = Regex.Replace(fileIn, "%20", " "); DbgTimer t = new DbgTimer(); t.Start(); String curdir = System.Environment.CurrentDirectory; Dbg.WriteLine("Decoding: " + fileIn); String tempFile = System.IO.Path.GetTempFileName(); String wav = tempFile + ".wav"; Dbg.WriteLine("Temporary wav file: " + wav); towav.StartInfo.FileName = "./NativeLibraries\\mplayer\\mplayer.exe"; towav.StartInfo.Arguments = " -quiet -ao pcm:fast:waveheader \"" + fileIn + "\" -format floatle -af resample=" + srate + ":0:2,pan=1:0.5:0.5 -channels 1 -vo null -vc null -ao pcm:file=\\\"" + wav + "\\\""; towav.StartInfo.UseShellExecute = false; towav.StartInfo.RedirectStandardOutput = true; towav.StartInfo.RedirectStandardError = true; towav.Start(); towav.WaitForExit(); int exitCode = towav.ExitCode; // 0 = succesfull // 1 = partially succesful // 2 = failed if (exitCode != 0) { string standardError = towav.StandardError.ReadToEnd(); Console.Out.WriteLine(standardError); return(null); } #if DEBUG string standardOutput = towav.StandardOutput.ReadToEnd(); Console.Out.WriteLine(standardOutput); #endif RiffRead riff = new RiffRead(wav); riff.Process(); float[] floatBuffer = riff.SoundData[0]; try { File.Delete(tempFile); //File.Delete(wav); } catch (IOException io) { Console.WriteLine(io); } Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(floatBuffer); } } }
public Matrix Apply(ref Matrix m) { DbgTimer t = new DbgTimer(); t.Start(); Matrix mel = new Matrix(filterWeights.rows, m.columns); /* * // Performance optimization of ... * mel = filterWeights.Multiply(m); * for (int i = 0; i < mel.rows; i++) { * for (int j = 0; j < mel.columns; j++) { * mel.d[i, j] = (mel.d[i, j] < 1.0f ? 0 : (float)(10.0 * Math.Log10(mel.d[i, j]))); * //mel.d[i, j] = (float)(10.0 * Math.Log10(mel.d[i, j])); * } * } */ int mc = m.columns; int mr = m.rows; int melcolumns = mel.columns; int fwc = filterWeights.columns; int fwr = filterWeights.rows; unsafe { fixed(float *md = m.d, fwd = filterWeights.d, meld = mel.d) { for (int i = 0; i < mc; i++) { for (int k = 0; k < fwr; k++) { int idx = k * melcolumns + i; int kfwc = k * fwc; for (int j = 0; j < mr; j++) { meld[idx] += fwd[kfwc + j] * md[j * mc + i]; } meld[idx] = (meld[idx] < 1.0f ? 0 : (float)(10.0 * Math.Log10(meld[idx]))); } } } } Matrix mfcc = dct.Multiply(mel); Dbg.WriteLine("mfcc (MfccLessOptimized) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(mfcc); }
/// <summary> /// Computes a Scms model from the MFCC representation of a song. /// </summary> /// <param name="mfcc">Comirva.Audio.Util.Maths.Matrix mfcc</param> /// <returns></returns> public static Scms GetScmsNoInverse(Comirva.Audio.Util.Maths.Matrix mfccs, string name) { DbgTimer t = new DbgTimer(); t.Start(); Comirva.Audio.Util.Maths.Matrix mean = mfccs.Mean(2); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { mean.WriteText(name + "_mean.txt"); } mean.DrawMatrixGraph(name + "_mean.png"); } #endif // Covariance Comirva.Audio.Util.Maths.Matrix covarMatrix = mfccs.Cov(mean); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { covarMatrix.WriteText(name + "_covariance.txt"); } covarMatrix.DrawMatrixGraph(name + "_covariance.png"); } #endif Comirva.Audio.Util.Maths.Matrix covarMatrixInv = new Comirva.Audio.Util.Maths.Matrix(covarMatrix.Rows, covarMatrix.Columns); // Store the Mean, Covariance, Inverse Covariance in an optimal format. int dim = mean.Rows; Scms s = new Scms(dim); int l = 0; for (int i = 0; i < dim; i++) { s.mean[i] = (float)mean.MatrixData[i][0]; for (int j = i; j < dim; j++) { s.cov[l] = (float)covarMatrix.MatrixData[i][j]; s.icov[l] = (float)covarMatrixInv.MatrixData[i][j]; l++; } } Dbg.WriteLine("GetScmsNoInverse - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return(s); }
public Matrix Apply(ref Matrix m) { DbgTimer t = new DbgTimer(); t.Start(); Matrix mel = new Matrix(filterWeights.rows, m.columns); int mc = m.columns; int melcolumns = mel.columns; int fwc = filterWeights.columns; int fwr = filterWeights.rows; unsafe { fixed(float *md = m.d, fwd = filterWeights.d, meld = mel.d) { for (int i = 0; i < mc; i++) { for (int k = 0; k < fwr; k++) { int idx = k * melcolumns + i; int kfwc = k * fwc; // The filter weights matrix is mostly 0. // So only multiply non-zero elements! for (int j = fwFT[k, 0]; j < fwFT[k, 1]; j++) { meld[idx] += fwd[kfwc + j] * md[j * mc + i]; } meld[idx] = (meld[idx] < 1.0f ? 0 : (float)(10.0 * Math.Log10(meld[idx]))); } } } } try { Matrix mfcc = dct.Multiply(mel); long stop = 0; t.Stop(ref stop); Dbg.WriteLine("Mirage - mfcc Execution Time: {0}ms", stop); return(mfcc); } catch (MatrixDimensionMismatchException) { throw new MfccFailedException(); } }
public static Scms Analyze (string file_path) { DbgTimer t = new DbgTimer (); t.Start (); Matrix stftdata = ad.Decode (file_path); Matrix mfccdata = mfcc.Apply (ref stftdata); Scms scms = Scms.GetScms (mfccdata); long stop = 0; t.Stop (ref stop); Dbg.WriteLine ("Mirage - Total Execution Time: {0}ms", stop); return scms; }
// EDIT ! public static Matrix AnalyzeMFCC (string file_path) { Hyena.Log.Debug("AnalyzeMFCC called for " + file_path); DbgTimer t = new DbgTimer (); t.Start (); Matrix stftdata = ad.Decode (file_path); Matrix mfccdata = mfcc.Apply (ref stftdata); long stop = 0; t.Stop (ref stop); Dbg.WriteLine ("Mirage - Total Execution Time: {0}ms", stop); return mfccdata; }
public static Scms Analyze(string file_path) { DbgTimer t = new DbgTimer(); t.Start(); Matrix stftdata = ad.Decode(file_path); Matrix mfccdata = mfcc.Apply(ref stftdata); Scms scms = Scms.GetScms(mfccdata); long stop = 0; t.Stop(ref stop); Dbg.WriteLine("Mirage - Total Execution Time: {0}ms", stop); return(scms); }
// Computes a Scms model from the MFCC representation of a song. public static Scms GetScms(Matrix mfcc) { DbgTimer t = new DbgTimer(); t.Start(); // Mean Vector m = mfcc.Mean(); // Covariance Matrix c = mfcc.Covariance(m); // Inverse Covariance Matrix ic; try { ic = c.Inverse(); } catch (MatrixSingularException) { throw new ScmsImpossibleException(); } // Store the Mean, Covariance, Inverse Covariance in an optimal format. int dim = m.rows; Scms s = new Scms(dim); int l = 0; for (int i = 0; i < dim; i++) { s.mean[i] = m.d[i, 0]; for (int j = i; j < dim; j++) { s.cov[l] = c.d[i, j]; s.icov[l] = ic.d[i, j]; l++; } } long stop = 0; t.Stop(ref stop); Dbg.WriteLine("Mirage - scms created in: {0}ms", stop); return(s); }
/// <summary> /// Apply the STFT on the audiodata /// </summary> /// <param name="audiodata">Audiodata to apply the STFT on</param> /// <returns>A matrix with the result of the STFT</returns> public Matrix Apply(float[] audiodata) { DbgTimer t = new DbgTimer(); t.Start(); // calculate how many hops (bands) we have using the current overlap (hopsize) int hops = (audiodata.Length - winsize)/ hopsize; // Create a Matrix with "winsize" Rows and "hops" Columns // Matrix[Row, Column] Matrix stft = new Matrix(winsize/2 +1, hops); for (int i = 0; i < hops; i++) { fft.ComputeMirageMatrixUsingFftw(ref stft, i, audiodata, i*hopsize); } Dbg.WriteLine("Stft (ComputeMirageMatrix) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return stft; }
/// <summary> /// Apply the STFT on the audiodata /// </summary> /// <param name="audiodata">Audiodata to apply the STFT on</param> /// <returns>A matrix with the result of the STFT</returns> public Matrix Apply(float[] audiodata) { DbgTimer t = new DbgTimer(); t.Start(); // calculate how many hops (bands) we have using the current overlap (hopsize) int hops = (audiodata.Length - winsize) / hopsize; // Create a Matrix with "winsize" Rows and "hops" Columns // Matrix[Row, Column] Matrix stft = new Matrix(winsize / 2 + 1, hops); for (int i = 0; i < hops; i++) { fft.ComputeMirageMatrixUsingFftw(ref stft, i, audiodata, i * hopsize); } Dbg.WriteLine("Stft (ComputeMirageMatrix) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return(stft); }
public static AudioFeature AnalyzeMandelEllis(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); // Calculate the audio feature AudioFeature audioFeature = mandelEllisExtractor.Calculate(MathUtils.FloatToDouble(param.AudioSamples)); if (audioFeature != null) { // Store duration audioFeature.Duration = (long) param.DurationInMs; // Store file name audioFeature.Name = filePath.Name; } Dbg.WriteLine ("MandelEllisExtractor - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return audioFeature; }
/// <summary> /// Method to analyze and add using the soundfingerprinting methods /// </summary> /// <param name="filePath">full file path</param> /// <param name="repository">Soundfingerprinting Repository</param> /// <param name="doOutputDebugInfo">decide whether to output debug info like spectrogram and audiofile (default value can be set)</param> /// <param name="useHaarWavelet">decide whether to use haar wavelet compression or DCT compression</param> /// <returns>true if successful</returns> public static bool AnalyzeAndAddSoundfingerprinting(FileInfo filePath, Repository repository, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); param.FingerprintingConfiguration = fingerprintingConfigCreation; string fileName = param.FileName; // build track Track track = new Track(); track.Title = param.FileName; track.TrackLengthMs = (int) param.DurationInMs; track.FilePath = param.PathToAudioFile; track.Tags = param.Tags; track.Id = -1; // this will be set by the insert method // Get fingerprint signatures using the Soundfingerprinting methods double[][] logSpectrogram; List<bool[]> fingerprints; List<double[][]> spectralImages; if (repository.InsertTrackInDatabaseUsingSamples(track, param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, param, out logSpectrogram, out fingerprints, out spectralImages)) { // store logSpectrogram as Matrix Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); #region Debug for Soundfingerprinting Method if (doOutputDebugInfo) { // Image Service ImageService imageService = new ImageService(repository.FingerprintService.SpectrumService, repository.FingerprintService.WaveletService); imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfigCreation.Stride, fingerprintingConfigCreation.FingerprintLength, fingerprintingConfigCreation.Overlap, 2).Save(fileName + "_specgram_logimages.png"); logSpectrogramMatrix.DrawMatrixImageLogValues(fileName + "_specgram_logimage.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(fileName + "_specgram_log.csv", ";"); } } #endregion } else { // failed Console.Out.WriteLine("Failed! Could not compute the soundfingerprint {0}!", fileName); return false; } Dbg.WriteLine ("AnalyzeAndAddSoundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
/// <summary> /// Method to analyse and add all the different types of audio features /// </summary> /// <param name="filePath">full file path</param> /// <param name="db">Scms database (Mirage)</param> /// <param name="repository">Soundfingerprinting Repository</param> /// <param name="doOutputDebugInfo">decide whether to output debug info like spectrogram and audiofile (default value can be set)</param> /// <param name="useHaarWavelet">decide whether to use haar wavelet compression or DCT compression</param> /// <returns>true if successful</returns> public static bool AnalyzeAndAddComplete(FileInfo filePath, Db db, Repository repository, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); if (param == null) return false; param.FingerprintingConfiguration = fingerprintingConfigCreation; string fileName = param.FileName; // build track Track track = new Track(); track.Title = param.FileName; track.TrackLengthMs = (int) param.DurationInMs; track.FilePath = param.PathToAudioFile; track.Tags = param.Tags; track.Id = -1; // this will be set by the insert method double[][] logSpectrogram; List<bool[]> fingerprints; if (repository.InsertTrackInDatabaseUsingSamples(track, param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, param, out logSpectrogram, out fingerprints)) { // store logSpectrogram as Matrix try { Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); #region Output debugging information (Saving spectrograms and/or csv files) if (doOutputDebugInfo) { logSpectrogramMatrix.DrawMatrixImageLogValues(fileName + "_matrix_spectrogram.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(fileName + "_matrix_spectrogram.csv", ";"); } // Save debug images using fingerprinting methods SaveFingerprintingDebugImages(fileName, logSpectrogram, fingerprints, repository.FingerprintService, param.FingerprintingConfiguration); } #endregion // Insert Statistical Cluster Model Similarity Audio Feature as well if (!AnalyseAndAddScmsUsingLogSpectrogram(logSpectrogramMatrix, param, db, track.Id, doOutputDebugInfo, useHaarWavelet)) { Dbg.WriteLine("AnalyzeAndAddComplete - Failed inserting Statistical Cluster Model Similarity Audio Feature"); // Failed, but ignore! } } catch (Exception e) { Dbg.WriteLine("AnalyzeAndAddComplete - Failed creating Statistical Cluster Model Similarity Audio Feature"); Dbg.WriteLine(e.Message); // Failed, but ignore! } } else { // Failed return false; } Dbg.WriteLine("AnalyzeAndAddComplete - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
/// <summary> /// Computes a Scms model from the MFCC representation of a song. /// </summary> /// <param name="mfcc">Comirva.Audio.Util.Maths.Matrix mfcc</param> /// <returns></returns> public static Scms GetScms(Comirva.Audio.Util.Maths.Matrix mfccs, string name) { DbgTimer t = new DbgTimer(); t.Start(); Comirva.Audio.Util.Maths.Matrix mean = mfccs.Mean(2); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { mean.WriteText(name + "_mean.txt"); } mean.DrawMatrixGraph(name + "_mean.png"); } #endif // Covariance Comirva.Audio.Util.Maths.Matrix covarMatrix = mfccs.Cov(mean); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { covarMatrix.WriteText(name + "_covariance.txt"); } covarMatrix.DrawMatrixGraph(name + "_covariance.png"); } #endif // Inverse Covariance Comirva.Audio.Util.Maths.Matrix covarMatrixInv; try { covarMatrixInv = covarMatrix.InverseGausJordan(); } catch (Exception) { Dbg.WriteLine("MatrixSingularException - Scms failed!"); return(null); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { covarMatrixInv.WriteAscii(name + "_inverse_covariance.ascii"); } covarMatrixInv.DrawMatrixGraph(name + "_inverse_covariance.png"); } #endif // Store the Mean, Covariance, Inverse Covariance in an optimal format. int dim = mean.Rows; Scms s = new Scms(dim); int l = 0; for (int i = 0; i < dim; i++) { s.mean[i] = (float)mean.MatrixData[i][0]; for (int j = i; j < dim; j++) { s.cov[l] = (float)covarMatrix.MatrixData[i][j]; s.icov[l] = (float)covarMatrixInv.MatrixData[i][j]; l++; } } Dbg.WriteLine("Compute Scms - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return(s); }
private static bool AnalyseAndAddScmsUsingFingerprints(List<bool[]> fingerprints, WorkUnitParameterObject param, Db db, int trackId, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO) { DbgTimer t = new DbgTimer(); t.Start (); // Insert Statistical Cluster Model Similarity Audio Feature string fileName = param.FileName; int fingerprintWidth = param.FingerprintingConfiguration.FingerprintLength; int fingerprintHeight = param.FingerprintingConfiguration.LogBins; int fingerprintCount = 0; foreach (bool[] fingerprint in fingerprints) { fingerprintCount++; Comirva.Audio.Util.Maths.Matrix scmsMatrix = new Comirva.Audio.Util.Maths.Matrix(fingerprintWidth, fingerprintHeight); for (int i = 0; i < fingerprintWidth /*128*/; i++) { for (int j = 0; j < fingerprintHeight /*32*/; j++) { // Negative Numbers = 01 // Positive Numbers = 10 // Zeros = 00 bool v1 = fingerprint[(2 * fingerprintHeight * i) + (2 * j)]; bool v2 = fingerprint[(2 * fingerprintHeight * i) + (2 * j) + 1]; if (v1) { scmsMatrix.MatrixData[i][j] = 2.0; } else if (v2) { scmsMatrix.MatrixData[i][j] = 0.0; } else { scmsMatrix.MatrixData[i][j] = 1.0; } } } if (doOutputDebugInfo) { scmsMatrix.DrawMatrixImage(String.Format("{0}_fingerprint_{1}.png", fileName, fingerprintCount), fingerprintWidth, fingerprintHeight); } #region Store in a Statistical Cluster Model Similarity class. Scms audioFeature = Scms.GetScmsNoInverse(scmsMatrix, fileName); if (audioFeature != null) { // Store bitstring hash as well audioFeature.BitString = GetBitString(fingerprint); // Store duration audioFeature.Duration = (long) param.DurationInMs; // Store file name audioFeature.Name = param.PathToAudioFile; // Add to database int id = trackId; if (db.AddTrack(audioFeature) == -1) { Console.Out.WriteLine("Failed! Could not add audio feature to database ({0})!", fileName); return false; } } else { return false; } #endregion } Dbg.WriteLine ("AnalyseAndAddScmsUsingFingerprints - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
/// <summary> /// Query the database for perceptually similar tracks using the sound fingerprinting methods /// </summary> /// <param name="filePath">input file</param> /// <returns>a dictionary of similar tracks</returns> public static Dictionary<Track, double> SimilarTracksSoundfingerprinting(FileInfo filePath, Repository repository) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); param.FingerprintingConfiguration = fingerprintingConfigQuerying; Dictionary<Track, double> candidates = repository.FindSimilarFromAudioSamples(param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, 1, param); Dbg.WriteLine ("SimilarTracksSoundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return candidates; }
/// <summary> /// Computes a Scms model from the MFCC representation of a song. /// </summary> /// <param name="mfcc">Comirva.Audio.Util.Maths.Matrix mfcc</param> /// <returns></returns> public static Scms GetScms(Comirva.Audio.Util.Maths.Matrix mfccs, string name) { DbgTimer t = new DbgTimer(); t.Start(); Comirva.Audio.Util.Maths.Matrix mean = mfccs.Mean(2); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { mean.WriteText(name + "_mean.txt"); mean.DrawMatrixGraph(name + "_mean.png"); } #endif // Covariance Comirva.Audio.Util.Maths.Matrix covarMatrix = mfccs.Cov(mean); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { covarMatrix.WriteText(name + "_covariance.txt"); covarMatrix.DrawMatrixGraph(name + "_covariance.png"); } #endif // Inverse Covariance Comirva.Audio.Util.Maths.Matrix covarMatrixInv; try { covarMatrixInv = covarMatrix.InverseGausJordan(); } catch (Exception) { Dbg.WriteLine("MatrixSingularException - Scms failed!"); return null; } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { covarMatrixInv.WriteAscii(name + "_inverse_covariance.ascii"); covarMatrixInv.DrawMatrixGraph(name + "_inverse_covariance.png"); } #endif // Store the Mean, Covariance, Inverse Covariance in an optimal format. int dim = mean.Rows; Scms s = new Scms(dim); int l = 0; for (int i = 0; i < dim; i++) { s.mean[i] = (float) mean.MatrixData[i][0]; for (int j = i; j < dim; j++) { s.cov[l] = (float) covarMatrix.MatrixData[i][j]; s.icov[l] = (float) covarMatrixInv.MatrixData[i][j]; l++; } } Dbg.WriteLine("(Comirva) - scms created in: {0} ms", t.Stop().TotalMilliseconds); return s; }
public double[][] CreateLogSpectrogram( float[] samples, IWindowFunction windowFunction, AudioServiceConfiguration configuration) { DbgTimer t = new DbgTimer(); t.Start (); if (configuration.NormalizeSignal) { NormalizeInPlace(samples); } int width = (samples.Length - configuration.WindowSize) / configuration.Overlap; /*width of the image*/ double[][] frames = new double[width][]; int[] logFrequenciesIndexes = GenerateLogFrequencies(configuration); double[] window = windowFunction.GetWindow(); for (int i = 0; i < width; i++) { double[] complexSignal = new double[2 * configuration.WindowSize]; /*even - Re, odd - Img, thats how Exocortex works*/ // take 371 ms each 11.6 ms (2048 samples each 64 samples, samplerate 5512) // or 256 ms each 16 ms (8192 samples each 512 samples, samplerate 32000) for (int j = 0; j < configuration.WindowSize; j++) { // Weight by Hann Window complexSignal[2 * j] = window[j] * samples[(i * configuration.Overlap) + j]; // need to clear out as fft modifies buffer (phase) complexSignal[(2 * j) + 1] = 0; } lomonFFT.TableFFT(complexSignal, true); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } Dbg.WriteLine ("Create Log Spectrogram - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return frames; }
// TODO: Rememeber to use another stride when querying public static Dictionary<Track, double> SimilarTracksSoundfingerprinting(FileInfo filePath) { DbgTimer t = new DbgTimer(); t.Start (); FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance; float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE); if (audiodata == null || audiodata.Length == 0) { Dbg.WriteLine("Error! - No Audio Found"); return null; } // Name of file being processed string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name)); // Calculate duration in ms double duration = (double) audiodata.Length / SAMPLING_RATE * 1000; // Explode samples to the range of 16 bit shorts (–32,768 to 32,767) // Matlab multiplies with 2^15 (32768) // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end; MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // 65536 // zero pad if the audio file is too short to perform a mfcc if (audiodata.Length < (fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap)) { int lenNew = fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap; Array.Resize<float>(ref audiodata, lenNew); } // Get fingerprint signatures using the Soundfingerprinting methods // Get database DatabaseService databaseService = DatabaseService.Instance; IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); Repository repository = new Repository(permutations, databaseService, fingerprintService); // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.FingerprintingConfiguration = fingerprintingConfig; param.PathToAudioFile = filePath.FullName; param.AudioSamples = audiodata; param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000; param.StartAtMilliseconds = 0; Dictionary<Track, double> candidates = repository.FindSimilarFromAudioSamples(25, 4, 2, param); return candidates; /* // Use var keyword to enumerate dictionary foreach (var pair in candidates) { Console.WriteLine("{0} - {1:0.00}", pair.Key.Title, pair.Value); } */ Dbg.WriteLine ("Soundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); }
public static AudioFeature AnalyzeSoundfingerprinting(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE); if (audiodata == null || audiodata.Length == 0) { Dbg.WriteLine("Error! - No Audio Found"); return null; } // Read TAGs using BASS FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance; Un4seen.Bass.AddOn.Tags.TAG_INFO tag_info = bass.GetTagInfoFromFile(filePath.FullName); // Name of file being processed string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name)); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, name + "_audiodata.ascii"); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, name + "_audiodata.txt"); } #endif if (doOutputDebugInfo) { DrawGraph(MathUtils.FloatToDouble(audiodata), name + "_audiodata.png"); } // Calculate duration in ms double duration = (double) audiodata.Length / SAMPLING_RATE * 1000; // zero pad if the audio file is too short to perform a mfcc if (audiodata.Length < (fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap)) { int lenNew = fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap; Array.Resize<float>(ref audiodata, lenNew); } // Get fingerprint signatures using the Soundfingerprinting methods // Get database DatabaseService databaseService = DatabaseService.Instance; IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); Repository repository = new Repository(permutations, databaseService, fingerprintService); // Image Service ImageService imageService = new ImageService( fingerprintService.SpectrumService, fingerprintService.WaveletService); // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.FingerprintingConfiguration = fingerprintingConfig; param.AudioSamples = audiodata; param.PathToAudioFile = filePath.FullName; param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000; param.StartAtMilliseconds = 0; // build track Track track = new Track(); track.Title = name; track.TrackLengthMs = (int) duration; track.FilePath = filePath.FullName; track.Id = -1; // this will be set by the insert method #region parse tag_info if (tag_info != null) { Dictionary<string, string> tags = new Dictionary<string, string>(); //if (tag_info.title != string.Empty) tags.Add("title", tag_info.title); if (tag_info.artist != string.Empty) tags.Add("artist", tag_info.artist); if (tag_info.album != string.Empty) tags.Add("album", tag_info.album); if (tag_info.albumartist != string.Empty) tags.Add("albumartist", tag_info.albumartist); if (tag_info.year != string.Empty) tags.Add("year", tag_info.year); if (tag_info.comment != string.Empty) tags.Add("comment", tag_info.comment); if (tag_info.genre != string.Empty) tags.Add("genre", tag_info.genre); if (tag_info.track != string.Empty) tags.Add("track", tag_info.track); if (tag_info.disc != string.Empty) tags.Add("disc", tag_info.disc); if (tag_info.copyright != string.Empty) tags.Add("copyright", tag_info.copyright); if (tag_info.encodedby != string.Empty) tags.Add("encodedby", tag_info.encodedby); if (tag_info.composer != string.Empty) tags.Add("composer", tag_info.composer); if (tag_info.publisher != string.Empty) tags.Add("publisher", tag_info.publisher); if (tag_info.lyricist != string.Empty) tags.Add("lyricist", tag_info.lyricist); if (tag_info.remixer != string.Empty) tags.Add("remixer", tag_info.remixer); if (tag_info.producer != string.Empty) tags.Add("producer", tag_info.producer); if (tag_info.bpm != string.Empty) tags.Add("bpm", tag_info.bpm); //if (tag_info.filename != string.Empty) tags.Add("filename", tag_info.filename); tags.Add("channelinfo", tag_info.channelinfo.ToString()); //if (tag_info.duration > 0) tags.Add("duration", tag_info.duration.ToString()); if (tag_info.bitrate > 0) tags.Add("bitrate", tag_info.bitrate.ToString()); if (tag_info.replaygain_track_gain != -100f) tags.Add("replaygain_track_gain", tag_info.replaygain_track_gain.ToString()); if (tag_info.replaygain_track_peak != -1f) tags.Add("replaygain_track_peak", tag_info.replaygain_track_peak.ToString()); if (tag_info.conductor != string.Empty) tags.Add("conductor", tag_info.conductor); if (tag_info.grouping != string.Empty) tags.Add("grouping", tag_info.grouping); if (tag_info.mood != string.Empty) tags.Add("mood", tag_info.mood); if (tag_info.rating != string.Empty) tags.Add("rating", tag_info.rating); if (tag_info.isrc != string.Empty) tags.Add("isrc", tag_info.isrc); foreach(var nativeTag in tag_info.NativeTags) { string[] keyvalue = nativeTag.Split('='); tags.Add(keyvalue[0], keyvalue[1]); } track.Tags = tags; } #endregion AudioFeature audioFeature = null; double[][] logSpectrogram; if (repository.InsertTrackInDatabaseUsingSamples(track, 25, 4, param, out logSpectrogram)) { if (doOutputDebugInfo) { imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfig.Stride, fingerprintingConfig.FingerprintLength, fingerprintingConfig.Overlap, 2).Save(name + "_specgram_logimages.png"); Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); logSpectrogramMatrix.DrawMatrixImageLogValues(name + "_specgram_logimage.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(name + "_specgram_log.csv", ";"); } } audioFeature = new DummyAudioFeature(); // Store duration audioFeature.Duration = (long) duration; // Store file name audioFeature.Name = filePath.FullName; } else { // failed } Dbg.WriteLine ("Soundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return audioFeature; }
public static Scms AnalyzeScms(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance; float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE); if (audiodata == null || audiodata.Length == 0) { Dbg.WriteLine("Error! - No Audio Found"); return null; } // Name of file being processed string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name)); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, name + "_audiodata.ascii"); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, name + "_audiodata.txt"); } #endif if (doOutputDebugInfo) { DrawGraph(MathUtils.FloatToDouble(audiodata), name + "_audiodata.png"); } // Calculate duration in ms double duration = (double) audiodata.Length / SAMPLING_RATE * 1000; // Explode samples to the range of 16 bit shorts (–32,768 to 32,767) // Matlab multiplies with 2^15 (32768) // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end; MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // 65536 // zero pad if the audio file is too short to perform a mfcc if (audiodata.Length < WINDOW_SIZE * 8) { int lenNew = WINDOW_SIZE * 8; Array.Resize<float>(ref audiodata, lenNew); } // 2. Windowing // 3. FFT Comirva.Audio.Util.Maths.Matrix stftdata = stftMirage.Apply(audiodata); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) { stftdata.WriteAscii(name + "_stftdata.ascii"); stftdata.WriteCSV(name + "_stftdata.csv", ";"); } } #endif if (doOutputDebugInfo) { // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024); stftdata.DrawMatrixImageLogValues(name + "_specgram.png", true); // spec gram with log values for the y axis (frequency) stftdata.DrawMatrixImageLogY(name + "_specgramlog.png", SAMPLING_RATE, 20, SAMPLING_RATE/2, 120, WINDOW_SIZE); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE & false) { #region Inverse STFT double[] audiodata_inverse_stft = stftMirage.InverseStft(stftdata); // divide //MathUtils.Divide(ref audiodata_inverse_stft, AUDIO_MULTIPLIER); MathUtils.Normalize(ref audiodata_inverse_stft); if (DEBUG_OUTPUT_TEXT) { WriteAscii(audiodata_inverse_stft, name + "_audiodata_inverse_stft.ascii"); WriteF3Formatted(audiodata_inverse_stft, name + "_audiodata_inverse_stft.txt"); } DrawGraph(audiodata_inverse_stft, name + "_audiodata_inverse_stft.png"); float[] audiodata_inverse_float = MathUtils.DoubleToFloat(audiodata_inverse_stft); bass.SaveFile(audiodata_inverse_float, name + "_inverse_stft.wav", Analyzer.SAMPLING_RATE); #endregion } #endif // 4. Mel Scale Filterbank // Mel-frequency is proportional to the logarithm of the linear frequency, // reflecting similar effects in the human's subjective aural perception) // 5. Take Logarithm // 6. DCT (Discrete Cosine Transform) #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { #region Mel Scale and Log Values Comirva.Audio.Util.Maths.Matrix mellog = mfccMirage.ApplyMelScaleAndLog(ref stftdata); if (DEBUG_OUTPUT_TEXT) { mellog.WriteCSV(name + "_mel_log.csv", ";"); } if (doOutputDebugInfo) { mellog.DrawMatrixImage(name + "_mel_log.png", 600, 400, true, true); } #endregion #region Inverse Mel Scale and Log Values if (false) { Comirva.Audio.Util.Maths.Matrix inverse_mellog = mfccMirage.InverseMelScaleAndLog(ref mellog); inverse_mellog.WriteCSV(name + "_mel_log_inverse.csv", ";"); inverse_mellog.DrawMatrixImageLogValues(name + "_mel_log_inverse.png", true); double[] audiodata_inverse_mellog = stftMirage.InverseStft(inverse_mellog); //MathUtils.Divide(ref audiodata_inverse_mellog, AUDIO_MULTIPLIER/100); MathUtils.Normalize(ref audiodata_inverse_mellog); if (DEBUG_OUTPUT_TEXT) { WriteAscii(audiodata_inverse_mellog, name + "_audiodata_inverse_mellog.ascii"); WriteF3Formatted(audiodata_inverse_mellog, name + "_audiodata_inverse_mellog.txt"); } DrawGraph(audiodata_inverse_mellog, name + "_audiodata_inverse_mellog.png"); float[] audiodata_inverse_mellog_float = MathUtils.DoubleToFloat(audiodata_inverse_mellog); bass.SaveFile(audiodata_inverse_mellog_float, name + "_inverse_mellog.wav", Analyzer.SAMPLING_RATE); } #endregion } #endif Comirva.Audio.Util.Maths.Matrix featureData = null; if (useHaarWavelet) { #region Wavelet Transform int lastHeight = 0; int lastWidth = 0; featureData = mfccMirage.ApplyMelScaleWaveletCompression(ref stftdata, out lastHeight, out lastWidth); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) featureData.WriteAscii(name + "_waveletdata.ascii"); } #endif if (doOutputDebugInfo) { featureData.DrawMatrixImageLogValues(name + "_waveletdata.png", true); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE & false) { #region Inverse Wavelet // try to do an inverse wavelet transform Comirva.Audio.Util.Maths.Matrix stftdata_inverse_wavelet = mfccMirage.InverseMelScaleWaveletCompression(ref featureData, lastHeight, lastWidth); if (DEBUG_OUTPUT_TEXT) stftdata_inverse_wavelet.WriteCSV(name + "_specgramlog_inverse_wavelet.csv", ";"); stftdata_inverse_wavelet.DrawMatrixImageLogValues(name + "_specgramlog_inverse_wavelet.png", true); double[] audiodata_inverse_wavelet = stftMirage.InverseStft(stftdata_inverse_wavelet); MathUtils.Normalize(ref audiodata_inverse_wavelet); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata_inverse_wavelet, name + "_audiodata_inverse_wavelet.txt"); DrawGraph(audiodata_inverse_wavelet, name + "_audiodata_inverse_wavelet.png"); bass.SaveFile(MathUtils.DoubleToFloat(audiodata_inverse_wavelet), name + "_inverse_wavelet.wav", Analyzer.SAMPLING_RATE); #endregion } #endif #endregion } else { #region DCT Transform // It seems the Mirage way of applying the DCT is slightly faster than the // Comirva way due to less loops featureData = mfccMirage.ApplyMelScaleDCT(ref stftdata); //featureData = mfccMirage.ApplyComirvaWay(ref stftdata); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) featureData.WriteAscii(name + "_mfccdata.ascii"); } #endif if (doOutputDebugInfo) { featureData.DrawMatrixImageLogValues(name + "_mfccdata.png", true); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE & false) { #region Inverse MFCC // try to do an inverse mfcc Comirva.Audio.Util.Maths.Matrix stftdata_inverse_mfcc = mfccMirage.InverseMelScaleDCT(ref featureData); if (DEBUG_OUTPUT_TEXT) stftdata_inverse_mfcc.WriteCSV(name + "_stftdata_inverse_mfcc.csv", ";"); stftdata_inverse_mfcc.DrawMatrixImageLogValues(name + "_specgramlog_inverse_mfcc.png", true); double[] audiodata_inverse_mfcc = stftMirage.InverseStft(stftdata_inverse_mfcc); MathUtils.Normalize(ref audiodata_inverse_mfcc); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata_inverse_mfcc, name + "_audiodata_inverse_mfcc.txt"); DrawGraph(audiodata_inverse_mfcc, name + "_audiodata_inverse_mfcc.png"); bass.SaveFile(MathUtils.DoubleToFloat(audiodata_inverse_mfcc), name + "_inverse_mfcc.wav", Analyzer.SAMPLING_RATE); #endregion } #endif #endregion } // Store in a Statistical Cluster Model Similarity class. // A Gaussian representation of a song Scms audioFeature = Scms.GetScms(featureData, name); if (audioFeature != null) { // Store image if debugging if (doOutputDebugInfo) { audioFeature.Image = featureData.DrawMatrixImageLogValues(name + "_featuredata.png", true, false, 0, 0, true); } // Store bitstring hash as well string hashString = GetBitString(featureData); audioFeature.BitString = hashString; // Store duration audioFeature.Duration = (long) duration; // Store file name audioFeature.Name = filePath.FullName; } Dbg.WriteLine ("Mirage - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return audioFeature; }
public static AudioFeature AnalyzeMandelEllis(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO) { DbgTimer t = new DbgTimer(); t.Start (); float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE); if (audiodata == null || audiodata.Length == 0) { Dbg.WriteLine("Error! - No Audio Found"); return null; } #if DEBUG DrawGraph(MathUtils.FloatToDouble(audiodata), "waveform.png"); #endif // Calculate duration in ms double duration = (double) audiodata.Length / SAMPLING_RATE * 1000; // Normalize //MathUtils.NormalizeInPlace(audiodata); // Matlab multiplies with 2^15 (32768) // Explode samples to the range of 16 bit shorts (–32,768 to 32,767) // if( max(abs(speech))<=1 ), speech = speech * 2^15; end; MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // 65536 MandelEllisExtractor extractor = new MandelEllisExtractor(SAMPLING_RATE, WINDOW_SIZE, MFCC_COEFFICIENTS, MEL_COEFFICIENTS); AudioFeature audioFeature = extractor.Calculate(MathUtils.FloatToDouble(audiodata)); if (audioFeature != null) { // Store duration audioFeature.Duration = (long) duration; // Store file name audioFeature.Name = filePath.Name; } Dbg.WriteLine ("MandelEllisExtractor - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return audioFeature; }
//private static Mfcc mfccOptimized = new Mfcc(WINDOW_SIZE, SAMPLING_RATE, MEL_COEFFICIENTS, MFCC_COEFFICIENTS); //private static MFCC mfccComirva = new MFCC(SAMPLING_RATE, WINDOW_SIZE, MFCC_COEFFICIENTS, true, 20.0, SAMPLING_RATE/2, MEL_COEFFICIENTS); #endif #region Methods public static bool AnalyzeAndAdd(FileInfo filePath, Db db, DatabaseService databaseService, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE); if (audiodata == null || audiodata.Length == 0) { Dbg.WriteLine("Error! - No Audio Found"); return false; } // Read TAGs using BASS FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance; Un4seen.Bass.AddOn.Tags.TAG_INFO tag_info = bass.GetTagInfoFromFile(filePath.FullName); // Name of file being processed string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name)); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, name + "_audiodata.ascii"); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, name + "_audiodata.txt"); } #endif if (doOutputDebugInfo) { DrawGraph(MathUtils.FloatToDouble(audiodata), name + "_audiodata.png"); } // Calculate duration in ms double duration = (double) audiodata.Length / SAMPLING_RATE * 1000; // Explode samples to the range of 16 bit shorts (–32,768 to 32,767) // Matlab multiplies with 2^15 (32768) // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end; MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // 65536 // zero pad if the audio file is too short to perform a mfcc if (audiodata.Length < (fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap)) { int lenNew = fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap; Array.Resize<float>(ref audiodata, lenNew); } // Get fingerprint signatures using the Soundfingerprinting methods IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); Repository repository = new Repository(permutations, databaseService, fingerprintService); // Image Service ImageService imageService = new ImageService( fingerprintService.SpectrumService, fingerprintService.WaveletService); // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.FingerprintingConfiguration = fingerprintingConfig; param.AudioSamples = audiodata; param.PathToAudioFile = filePath.FullName; param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000; param.StartAtMilliseconds = 0; // build track Track track = new Track(); track.Title = name; track.TrackLengthMs = (int) duration; track.FilePath = filePath.FullName; track.Id = -1; // this will be set by the insert method #region parse tag_info if (tag_info != null) { Dictionary<string, string> tags = new Dictionary<string, string>(); //if (tag_info.title != string.Empty) tags.Add("title", tag_info.title); if (tag_info.artist != string.Empty) tags.Add("artist", tag_info.artist); if (tag_info.album != string.Empty) tags.Add("album", tag_info.album); if (tag_info.albumartist != string.Empty) tags.Add("albumartist", tag_info.albumartist); if (tag_info.year != string.Empty) tags.Add("year", tag_info.year); if (tag_info.comment != string.Empty) tags.Add("comment", tag_info.comment); if (tag_info.genre != string.Empty) tags.Add("genre", tag_info.genre); if (tag_info.track != string.Empty) tags.Add("track", tag_info.track); if (tag_info.disc != string.Empty) tags.Add("disc", tag_info.disc); if (tag_info.copyright != string.Empty) tags.Add("copyright", tag_info.copyright); if (tag_info.encodedby != string.Empty) tags.Add("encodedby", tag_info.encodedby); if (tag_info.composer != string.Empty) tags.Add("composer", tag_info.composer); if (tag_info.publisher != string.Empty) tags.Add("publisher", tag_info.publisher); if (tag_info.lyricist != string.Empty) tags.Add("lyricist", tag_info.lyricist); if (tag_info.remixer != string.Empty) tags.Add("remixer", tag_info.remixer); if (tag_info.producer != string.Empty) tags.Add("producer", tag_info.producer); if (tag_info.bpm != string.Empty) tags.Add("bpm", tag_info.bpm); //if (tag_info.filename != string.Empty) tags.Add("filename", tag_info.filename); tags.Add("channelinfo", tag_info.channelinfo.ToString()); //if (tag_info.duration > 0) tags.Add("duration", tag_info.duration.ToString()); if (tag_info.bitrate > 0) tags.Add("bitrate", tag_info.bitrate.ToString()); if (tag_info.replaygain_track_gain != -100f) tags.Add("replaygain_track_gain", tag_info.replaygain_track_gain.ToString()); if (tag_info.replaygain_track_peak != -1f) tags.Add("replaygain_track_peak", tag_info.replaygain_track_peak.ToString()); if (tag_info.conductor != string.Empty) tags.Add("conductor", tag_info.conductor); if (tag_info.grouping != string.Empty) tags.Add("grouping", tag_info.grouping); if (tag_info.mood != string.Empty) tags.Add("mood", tag_info.mood); if (tag_info.rating != string.Empty) tags.Add("rating", tag_info.rating); if (tag_info.isrc != string.Empty) tags.Add("isrc", tag_info.isrc); foreach(var nativeTag in tag_info.NativeTags) { string[] keyvalue = nativeTag.Split('='); tags.Add(keyvalue[0], keyvalue[1]); } track.Tags = tags; } #endregion double[][] logSpectrogram; if (repository.InsertTrackInDatabaseUsingSamples(track, 25, 4, param, out logSpectrogram)) { // store logSpectrogram as Matrix Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); #region Debug for Soundfingerprinting Method if (doOutputDebugInfo) { imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfig.Stride, fingerprintingConfig.FingerprintLength, fingerprintingConfig.Overlap, 2).Save(name + "_specgram_logimages.png"); logSpectrogramMatrix.DrawMatrixImageLogValues(name + "_specgram_logimage.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(name + "_specgram_log.csv", ";"); } } #endregion #region Insert Statistical Cluster Model Similarity Audio Feature as well Comirva.Audio.Util.Maths.Matrix scmsMatrix = null; if (useHaarWavelet) { #region Wavelet Transform int lastHeight = 0; int lastWidth = 0; scmsMatrix = mfccMirage.ApplyWaveletCompression(ref logSpectrogramMatrix, out lastHeight, out lastWidth); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) scmsMatrix.WriteAscii(name + "_waveletdata.ascii"); } #endif if (doOutputDebugInfo) { scmsMatrix.DrawMatrixImageLogValues(name + "_waveletdata.png", true); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { #region Inverse Wavelet // try to do an inverse wavelet transform Comirva.Audio.Util.Maths.Matrix stftdata_inverse_wavelet = mfccMirage.InverseWaveletCompression(ref scmsMatrix, lastHeight, lastWidth, logSpectrogramMatrix.Rows, logSpectrogramMatrix.Columns); if (DEBUG_OUTPUT_TEXT) stftdata_inverse_wavelet.WriteCSV(name + "_specgramlog_inverse_wavelet.csv", ";"); stftdata_inverse_wavelet.DrawMatrixImageLogValues(name + "_specgramlog_inverse_wavelet.png", true); #endregion } #endif #endregion } else { #region DCT Transform // It seems the Mirage way of applying the DCT is slightly faster than the // Comirva way due to less loops scmsMatrix = mfccMirage.ApplyDCT(ref logSpectrogramMatrix); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) scmsMatrix.WriteAscii(name + "_mfccdata.ascii"); } #endif if (doOutputDebugInfo) { scmsMatrix.DrawMatrixImageLogValues(name + "_mfccdata.png", true); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { #region Inverse MFCC // try to do an inverse mfcc Comirva.Audio.Util.Maths.Matrix stftdata_inverse_mfcc = mfccMirage.InverseDCT(ref scmsMatrix); if (DEBUG_OUTPUT_TEXT) stftdata_inverse_mfcc.WriteCSV(name + "_stftdata_inverse_mfcc.csv", ";"); stftdata_inverse_mfcc.DrawMatrixImageLogValues(name + "_specgramlog_inverse_mfcc.png", true); #endregion } #endif #endregion } // Store in a Statistical Cluster Model Similarity class. // A Gaussian representation of a song Scms audioFeature = Scms.GetScms(scmsMatrix, name); if (audioFeature != null) { // Store image if debugging if (doOutputDebugInfo) { audioFeature.Image = scmsMatrix.DrawMatrixImageLogValues(name + "_featuredata.png", true, false, 0, 0, true); } // Store bitstring hash as well string hashString = GetBitString(scmsMatrix); audioFeature.BitString = hashString; // Store duration audioFeature.Duration = (long) duration; // Store file name audioFeature.Name = filePath.FullName; int id = track.Id; if (db.AddTrack(ref id, audioFeature) == -1) { Console.Out.WriteLine("Failed! Could not add audioFeature to database {0}!", name); } } #endregion } else { // failed return false; } Dbg.WriteLine ("AnalyzeAndAdd - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
public static AudioFeature AnalyzeScms(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath, doOutputDebugInfo); string fileName = param.FileName; // used to save wave files in the debug inverse methods FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance; // 2. Windowing // 3. FFT Comirva.Audio.Util.Maths.Matrix stftdata = stftMirage.Apply(param.AudioSamples); if (DEBUG_INFO_VERBOSE & DEBUG_OUTPUT_TEXT) { stftdata.WriteAscii(fileName + "_stftdata.ascii"); stftdata.WriteCSV(fileName + "_stftdata.csv", ";"); } if (doOutputDebugInfo) { // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024); //stftdata.DrawMatrixImageLogValues(fileName + "_specgram.png", true); // spec gram with log values for the y axis (frequency) stftdata.DrawMatrixImageLogY(fileName + "_specgramlog.png", SAMPLING_RATE, 20, SAMPLING_RATE/2, 120, WINDOW_SIZE); } if (DEBUG_DO_INVERSE_TESTS) { #region Inverse STFT double[] audiodata_inverse_stft = stftMirage.InverseStft(stftdata); // divide //MathUtils.Divide(ref audiodata_inverse_stft, AUDIO_MULTIPLIER); MathUtils.Normalize(ref audiodata_inverse_stft); if (DEBUG_OUTPUT_TEXT) { WriteAscii(audiodata_inverse_stft, fileName + "_audiodata_inverse_stft.ascii"); WriteF3Formatted(audiodata_inverse_stft, fileName + "_audiodata_inverse_stft.txt"); } DrawGraph(audiodata_inverse_stft, fileName + "_audiodata_inverse_stft.png"); float[] audiodata_inverse_float = MathUtils.DoubleToFloat(audiodata_inverse_stft); bass.SaveFile(audiodata_inverse_float, fileName + "_inverse_stft.wav", Analyzer.SAMPLING_RATE); #endregion } // 4. Mel Scale Filterbank // Mel-frequency is proportional to the logarithm of the linear frequency, // reflecting similar effects in the human's subjective aural perception) // 5. Take Logarithm // 6. DCT (Discrete Cosine Transform) if (DEBUG_INFO_VERBOSE) { #region Mel Scale and Log Values Comirva.Audio.Util.Maths.Matrix mellog = mfccMirage.ApplyMelScaleAndLog(ref stftdata); if (DEBUG_OUTPUT_TEXT) { mellog.WriteCSV(fileName + "_mel_log.csv", ";"); } if (doOutputDebugInfo) { mellog.DrawMatrixImage(fileName + "_mel_log.png", 600, 400, true, true); } #endregion #region Inverse Mel Scale and Log Values if (DEBUG_DO_INVERSE_TESTS) { Comirva.Audio.Util.Maths.Matrix inverse_mellog = mfccMirage.InverseMelScaleAndLog(ref mellog); inverse_mellog.WriteCSV(fileName + "_mel_log_inverse.csv", ";"); inverse_mellog.DrawMatrixImageLogValues(fileName + "_mel_log_inverse.png", true); double[] audiodata_inverse_mellog = stftMirage.InverseStft(inverse_mellog); //MathUtils.Divide(ref audiodata_inverse_mellog, AUDIO_MULTIPLIER/100); MathUtils.Normalize(ref audiodata_inverse_mellog); if (DEBUG_OUTPUT_TEXT) { WriteAscii(audiodata_inverse_mellog, fileName + "_audiodata_inverse_mellog.ascii"); WriteF3Formatted(audiodata_inverse_mellog, fileName + "_audiodata_inverse_mellog.txt"); } DrawGraph(audiodata_inverse_mellog, fileName + "_audiodata_inverse_mellog.png"); float[] audiodata_inverse_mellog_float = MathUtils.DoubleToFloat(audiodata_inverse_mellog); bass.SaveFile(audiodata_inverse_mellog_float, fileName + "_inverse_mellog.wav", Analyzer.SAMPLING_RATE); } #endregion } Comirva.Audio.Util.Maths.Matrix featureData = null; if (useHaarWavelet) { #region Wavelet Transform int lastHeight = 0; int lastWidth = 0; featureData = mfccMirage.ApplyMelScaleAndWaveletCompress(ref stftdata, out lastHeight, out lastWidth); if (DEBUG_INFO_VERBOSE & DEBUG_OUTPUT_TEXT) { featureData.WriteAscii(fileName + "_waveletdata.ascii"); } if (doOutputDebugInfo) { featureData.DrawMatrixImageLogValues(fileName + "_waveletdata.png", true); } if (DEBUG_DO_INVERSE_TESTS) { #region Inverse Wavelet // try to do an inverse wavelet transform Comirva.Audio.Util.Maths.Matrix stftdata_inverse_wavelet = mfccMirage.InverseMelScaleAndWaveletCompress(ref featureData, lastHeight, lastWidth); if (DEBUG_OUTPUT_TEXT) stftdata_inverse_wavelet.WriteCSV(fileName + "_specgramlog_inverse_wavelet.csv", ";"); stftdata_inverse_wavelet.DrawMatrixImageLogValues(fileName + "_specgramlog_inverse_wavelet.png", true); double[] audiodata_inverse_wavelet = stftMirage.InverseStft(stftdata_inverse_wavelet); MathUtils.Normalize(ref audiodata_inverse_wavelet); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata_inverse_wavelet, fileName + "_audiodata_inverse_wavelet.txt"); DrawGraph(audiodata_inverse_wavelet, fileName + "_audiodata_inverse_wavelet.png"); bass.SaveFile(MathUtils.DoubleToFloat(audiodata_inverse_wavelet), fileName + "_inverse_wavelet.wav", Analyzer.SAMPLING_RATE); #endregion } #endregion } else { #region DCT Transform // It seems the Mirage way of applying the DCT is slightly faster than the // Comirva way due to less loops featureData = mfccMirage.ApplyMelScaleDCT(ref stftdata); //featureData = mfccMirage.ApplyComirvaWay(ref stftdata); if (DEBUG_INFO_VERBOSE & DEBUG_OUTPUT_TEXT) { featureData.WriteAscii(fileName + "_mfccdata.ascii"); } if (doOutputDebugInfo) { featureData.DrawMatrixImageLogValues(fileName + "_mfccdata.png", true); } if (DEBUG_DO_INVERSE_TESTS) { #region Inverse MFCC // try to do an inverse mfcc Comirva.Audio.Util.Maths.Matrix stftdata_inverse_mfcc = mfccMirage.InverseMelScaleDCT(ref featureData); if (DEBUG_OUTPUT_TEXT) stftdata_inverse_mfcc.WriteCSV(fileName + "_stftdata_inverse_mfcc.csv", ";"); stftdata_inverse_mfcc.DrawMatrixImageLogValues(fileName + "_specgramlog_inverse_mfcc.png", true); double[] audiodata_inverse_mfcc = stftMirage.InverseStft(stftdata_inverse_mfcc); MathUtils.Normalize(ref audiodata_inverse_mfcc); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata_inverse_mfcc, fileName + "_audiodata_inverse_mfcc.txt"); DrawGraph(audiodata_inverse_mfcc, fileName + "_audiodata_inverse_mfcc.png"); bass.SaveFile(MathUtils.DoubleToFloat(audiodata_inverse_mfcc), fileName + "_inverse_mfcc.wav", Analyzer.SAMPLING_RATE); #endregion } #endregion } // Store in a Statistical Cluster Model Similarity class. // A Gaussian representation of a song Scms audioFeature = Scms.GetScms(featureData, fileName); if (audioFeature != null) { // Store image if debugging if (doOutputDebugInfo) { audioFeature.Image = featureData.DrawMatrixImageLogValues(fileName + "_featuredata.png", true, false, 0, 0, true); } // Store bitstring hash as well string hashString = GetBitString(featureData); audioFeature.BitString = hashString; // Store duration audioFeature.Duration = (long) param.DurationInMs; // Store file name audioFeature.Name = filePath.FullName; } else { // failed creating the Scms class Console.Out.WriteLine("Failed! Could not compute the Scms {0}!", fileName); } Dbg.WriteLine ("AnalyzeScms - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return audioFeature; }
/// <summary> /// Return information from the Audio File /// </summary> /// <param name="filePath">filepath object</param> /// <returns>a WorkUnitParameter object</returns> public static WorkUnitParameterObject GetWorkUnitParameterObjectFromAudioFile(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO) { DbgTimer t = new DbgTimer(); t.Start (); float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE); if (audiodata == null || audiodata.Length == 0) { Dbg.WriteLine("Error! - No Audio Found"); return null; } // Name of file being processed string fileName = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name)); #if DEBUG if (DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, fileName + "_audiodata.ascii"); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, fileName + "_audiodata.txt"); } #endif if (doOutputDebugInfo) { DrawGraph(MathUtils.FloatToDouble(audiodata), fileName + "_audiodata.png"); } // Calculate duration in ms double duration = (double) audiodata.Length / SAMPLING_RATE * 1000; // Explode samples to the range of 16 bit shorts (–32,768 to 32,767) // Matlab multiplies with 2^15 (32768) // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end; MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // zero pad if the audio file is too short to perform a mfcc if (audiodata.Length < (WINDOW_SIZE + OVERLAP)) { int lenNew = WINDOW_SIZE + OVERLAP; Array.Resize<float>(ref audiodata, lenNew); } // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.AudioSamples = audiodata; param.PathToAudioFile = filePath.FullName; param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000; param.StartAtMilliseconds = 0; param.FileName = fileName; param.DurationInMs = duration; param.Tags = GetTagInfoFromFile(filePath.FullName); Dbg.WriteLine ("Get Audio File Parameters - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return param; }
public Matrix Apply(ref Matrix m) { DbgTimer t = new DbgTimer(); t.Start(); Matrix mel = new Matrix(filterWeights.rows, m.columns); /* // Performance optimization of ... mel = filterWeights.Multiply(m); for (int i = 0; i < mel.rows; i++) { for (int j = 0; j < mel.columns; j++) { mel.d[i, j] = (mel.d[i, j] < 1.0f ? 0 : (float)(10.0 * Math.Log10(mel.d[i, j]))); //mel.d[i, j] = (float)(10.0 * Math.Log10(mel.d[i, j])); } } */ int mc = m.columns; int mr = m.rows; int melcolumns = mel.columns; int fwc = filterWeights.columns; int fwr = filterWeights.rows; unsafe { fixed (float* md = m.d, fwd = filterWeights.d, meld = mel.d) { for (int i = 0; i < mc; i++) { for (int k = 0; k < fwr; k++) { int idx = k*melcolumns + i; int kfwc = k*fwc; for (int j = 0; j < mr; j++) { meld[idx] += fwd[kfwc + j] * md[j*mc + i]; } meld[idx] = (meld[idx] < 1.0f ? 0 : (float)(10.0 * Math.Log10(meld[idx]))); } } } } Matrix mfcc = dct.Multiply(mel); Dbg.WriteLine("mfcc (MfccLessOptimized) Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return mfcc; }
public static List<FindSimilar.QueryResult> SimilarTracksSoundfingerprintingList(FileInfo filePath, Repository repository) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); param.FingerprintingConfiguration = fingerprintingConfigQuerying; // TODO: i don't really know how the threshold tables work. // 1 returns more similar hits // 2 returns sometimes only the one we search for // even 0 seem to work (like 1) List<FindSimilar.QueryResult> candidates = repository.FindSimilarFromAudioSamplesList(param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, 0, param); Dbg.WriteLine ("SimilarTracksSoundfingerprintingList - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return candidates; }
/// <summary> /// Computes a Scms model from the MFCC representation of a song. /// </summary> /// <param name="mfcc">Mirage.Matrix mfcc</param> /// <returns></returns> public static Scms GetScms(Matrix mfcc, string name) { DbgTimer t = new DbgTimer(); t.Start(); // Mean Vector m = mfcc.Mean(); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { m.WriteText(name + "_mean_orig.txt"); } m.DrawMatrixGraph(name + "_mean_orig.png"); } #endif // Covariance Matrix c = mfcc.Covariance(m); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { c.WriteText(name + "_covariance_orig.txt"); } c.DrawMatrixGraph(name + "_covariance_orig.png"); } #endif // Inverse Covariance Matrix ic; try { ic = c.Inverse(); } catch (MatrixSingularException) { //throw new ScmsImpossibleException(); Dbg.WriteLine("MatrixSingularException - Scms failed!"); return(null); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (Analyzer.DEBUG_OUTPUT_TEXT) { ic.WriteAscii(name + "_inverse_covariance_orig.txt"); } ic.DrawMatrixGraph(name + "_inverse_covariance_orig.png"); } #endif // Store the Mean, Covariance, Inverse Covariance in an optimal format. int dim = m.rows; Scms s = new Scms(dim); int l = 0; for (int i = 0; i < dim; i++) { s.mean[i] = m.d[i, 0]; for (int j = i; j < dim; j++) { s.cov[l] = c.d[i, j]; s.icov[l] = ic.d[i, j]; l++; } } Dbg.WriteLine("(Mirage) - scms created in: {0} ms", t.Stop().TotalMilliseconds); return(s); }
public Matrix Apply(ref Matrix m) { DbgTimer t = new DbgTimer (); t.Start (); Matrix mel = new Matrix (filterWeights.rows, m.columns); int mc = m.columns; int melcolumns = mel.columns; int fwc = filterWeights.columns; int fwr = filterWeights.rows; unsafe { fixed (float* md = m.d, fwd = filterWeights.d, meld = mel.d) { for (int i = 0; i < mc; i++) { for (int k = 0; k < fwr; k++) { int idx = k*melcolumns + i; int kfwc = k*fwc; // The filter weights matrix is mostly 0. // So only multiply non-zero elements! for (int j = fwFT[k,0]; j < fwFT[k,1]; j++) { meld[idx] += fwd[kfwc + j] * md[j*mc + i]; } meld[idx] = (meld[idx] < 1.0f ? 0 : (float)(10.0 * Math.Log10(meld[idx]))); } } } } try { Matrix mfcc = dct.Multiply (mel); long stop = 0; t.Stop (ref stop); Dbg.WriteLine ("Mirage - mfcc Execution Time: {0}ms", stop); return mfcc; } catch (MatrixDimensionMismatchException) { throw new MfccFailedException (); } }
private static void Test() { mirageaudio_initgst(); string song1_filename = "/home/lorentz/Music/Library/Pachelbel/Johann Pachelbel - Canon And Gigue In D Major For 3 Violins And Basso Continuo.mp3"; string song2_filename = "/home/lorentz/Music/Library/Karajan Adagios/CD 1/Pachelbel - Canon in d Major (Kanon And Gigue in d Major = d Dur) av Johann Pachelbel.mp3"; Scms song1 = null; Scms song2 = null; DbgTimer t1 = new DbgTimer(); t1.Start(); int runs = 10; for (int i = 0; i < runs; i++) { Analyzer.Analyze(song1_filename); } long l1 = 0; t1.Stop(ref l1); Dbg.WriteLine("Analysis: " + runs + " times - " + l1 + "ms; " + (double)l1/(double)runs + "ms per analysis"); song1 = Analyzer.Analyze(song1_filename); song2 = Analyzer.Analyze(song2_filename); ScmsConfiguration config = new ScmsConfiguration (Analyzer.MFCC_COEFFICIENTS); Console.WriteLine("Distance = " + Scms.Distance (song1, song2, config)); DbgTimer t2 = new DbgTimer(); t2.Start(); runs = 100000; for (int i = 0; i < runs; i++) { Scms.Distance (song1, song2, config); } long l2 = 0; t2.Stop(ref l2); Dbg.WriteLine("Distance Computation: " + runs + " times - " + l2 + "ms; " + (double)l2/(double)runs + "ms per comparison"); }
public static float[] DecodeUsingSox(string fileIn, int srate, int secondsToAnalyze) { lock (_locker) { using (Process toraw = new Process()) { fileIn = Regex.Replace(fileIn, "%20", " "); DbgTimer t = new DbgTimer(); t.Start(); String curdir = System.Environment.CurrentDirectory; Dbg.WriteLine("Decoding: " + fileIn); String tempFile = System.IO.Path.GetTempFileName(); String raw = tempFile + "_raw.wav"; Dbg.WriteLine("Temporary raw file: " + raw); toraw.StartInfo.FileName = "./NativeLibraries\\sox\\sox.exe"; toraw.StartInfo.Arguments = " \"" + fileIn + "\" -r " + srate + " -e float -b 32 -G -t raw \"" + raw + "\" channels 1"; toraw.StartInfo.UseShellExecute = false; toraw.StartInfo.RedirectStandardOutput = true; toraw.StartInfo.RedirectStandardError = true; toraw.Start(); toraw.WaitForExit(); int exitCode = toraw.ExitCode; // 0 = succesfull // 1 = partially succesful // 2 = failed if (exitCode != 0) { string standardError = toraw.StandardError.ReadToEnd(); Console.Out.WriteLine(standardError); return(null); } #if DEBUG string standardOutput = toraw.StandardOutput.ReadToEnd(); Console.Out.WriteLine(standardOutput); #endif float[] floatBuffer; FileStream fs = null; try { FileInfo fi = new FileInfo(raw); fs = fi.OpenRead(); int bytes = (int)fi.Length; int samples = bytes / sizeof(float); if ((samples * sizeof(float)) != bytes) { return(null); } // if the audio file is larger than seconds to analyze, // find a proper section to exctract if (bytes > secondsToAnalyze * srate * sizeof(float)) { int seekto = (bytes / 2) - ((secondsToAnalyze / 2) * sizeof(float) * srate); Dbg.WriteLine("Extracting section: seekto = " + seekto); bytes = (secondsToAnalyze) * srate * sizeof(float); fs.Seek((samples / 2 - (secondsToAnalyze / 2) * srate) * sizeof(float), SeekOrigin.Begin); } BinaryReader br = new BinaryReader(fs); byte[] bytesBuffer = new byte[bytes]; br.Read(bytesBuffer, 0, bytesBuffer.Length); int items = (int)bytes / sizeof(float); floatBuffer = new float[items]; for (int i = 0; i < items; i++) { floatBuffer[i] = BitConverter.ToSingle(bytesBuffer, i * sizeof(float)); // * 65536.0f; } } catch (System.IO.FileNotFoundException) { floatBuffer = null; } finally { if (fs != null) { fs.Close(); } try { File.Delete(tempFile); File.Delete(raw); } catch (IOException io) { Console.WriteLine(io); } Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms"); } return(floatBuffer); } } }
private static bool AnalyseAndAddScmsUsingFingerprints(List<double[][]> spectralImages, List<bool[]> fingerprints, WorkUnitParameterObject param, Db db, int trackId, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO) { DbgTimer t = new DbgTimer(); t.Start (); // Insert Statistical Cluster Model Similarity Audio Feature string fileName = param.FileName; // Merge the arrays in the List using Linq var result = spectralImages.SelectMany(i => i).ToArray(); Comirva.Audio.Util.Maths.Matrix scmsMatrix = new Comirva.Audio.Util.Maths.Matrix(result); if (doOutputDebugInfo) { scmsMatrix.DrawMatrixImage(String.Format("{0}_spectral.png", fileName)); } #region Store in a Statistical Cluster Model Similarity class. Scms audioFeature = Scms.GetScms(scmsMatrix, fileName); if (audioFeature != null) { // Store bitstring hash as well audioFeature.BitString = GetBitString(scmsMatrix); // Store duration audioFeature.Duration = (long) param.DurationInMs; // Store file name audioFeature.Name = param.PathToAudioFile; // Add to database int id = trackId; if (db.AddTrack(audioFeature) == -1) { Console.Out.WriteLine("Failed! Could not add audio feature to database ({0})!", fileName); return false; } } else { return false; } #endregion Dbg.WriteLine ("AnalyseAndAddScmsUsingFingerprints2 - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
public static float[] DecodeUsingMplayer(string fileIn, int srate) { lock (_locker) { using (Process towav = new Process()) { fileIn = Regex.Replace(fileIn, "%20", " "); DbgTimer t = new DbgTimer(); t.Start(); String curdir = System.Environment.CurrentDirectory; Dbg.WriteLine("Decoding: " + fileIn); String tempFile = System.IO.Path.GetTempFileName(); String wav = tempFile + ".wav"; Dbg.WriteLine("Temporary wav file: " + wav); towav.StartInfo.FileName = "./NativeLibraries\\mplayer\\mplayer.exe"; towav.StartInfo.Arguments = " -quiet -ao pcm:fast:waveheader \""+fileIn+"\" -format floatle -af resample="+srate+":0:2,pan=1:0.5:0.5 -channels 1 -vo null -vc null -ao pcm:file=\\\""+wav+"\\\""; towav.StartInfo.UseShellExecute = false; towav.StartInfo.RedirectStandardOutput = true; towav.StartInfo.RedirectStandardError = true; towav.Start(); towav.WaitForExit(); int exitCode = towav.ExitCode; // 0 = succesfull // 1 = partially succesful // 2 = failed if (exitCode != 0) { string standardError = towav.StandardError.ReadToEnd(); Console.Out.WriteLine(standardError); return null; } #if DEBUG string standardOutput = towav.StandardOutput.ReadToEnd(); Console.Out.WriteLine(standardOutput); #endif RiffRead riff = new RiffRead(wav); riff.Process(); float[] floatBuffer = riff.SoundData[0]; try { File.Delete(tempFile); //File.Delete(wav); } catch (IOException io) { Console.WriteLine(io); } Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return floatBuffer; } } }
/// <summary> /// Query the database for perceptually similar tracks using the sound fingerprinting methods /// </summary> /// <param name="filePath">input file</param> /// <param name="repository">the database (repository)</param> /// <param name="thresholdTables">Minimum number of hash tables that must be found for one signature to be considered a candidate (0 and 1 = return all candidates, 2+ = return only exact matches)</param> /// <param name="optimizeSignatureCount">Reduce the number of signatures in order to increase the search performance</param> /// <param name="doSearchEverything">disregard the local sensitivity hashes and search the whole database</param> /// <param name="splashScreen">The "please wait" splash screen (or null)</param> /// <returns>a list of query results objects (e.g. similar tracks)</returns> public static List<FindSimilar.QueryResult> SimilarTracksSoundfingerprintingList(FileInfo filePath, Repository repository, int thresholdTables, bool optimizeSignatureCount, bool doSearchEverything, SplashSceenWaitingForm splashScreen) { DbgTimer t = new DbgTimer(); t.Start (); if (splashScreen != null) splashScreen.SetProgress(0, "Reading audio file ..."); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); if (param == null) { if (splashScreen != null) splashScreen.SetProgress(0, "Failed reading audio file!"); return null; } param.FingerprintingConfiguration = fingerprintingConfigQuerying; if (splashScreen != null) splashScreen.SetProgress(1, "Successfully reading audio file!"); // This is how the threshold tables work: // For each signature created from a query file we retrieve a number of candidates // based on how many fingerprints that are associated to the same hash bucket. // if the number of fingerprints associated to the same hash bucket is relatively high // the likelyhood for this being an exact match is also very high. // Therefore a value of 0 or 1 basically means return every track that has an association // to the same hash bucket, while a number higher than that increases the accuracy for // only matching identical matches. // 0 and 1 returns many matches // 2 returns sometimes only the one we search for (exact match) List<FindSimilar.QueryResult> similarFiles = repository.FindSimilarFromAudioSamplesList(param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, thresholdTables, param, optimizeSignatureCount, doSearchEverything, splashScreen); Dbg.WriteLine ("SimilarTracksSoundfingerprintingList - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return similarFiles; }
/// <summary> /// Computes a Scms model from the MFCC representation of a song. /// </summary> /// <param name="mfcc">Mirage.Matrix mfcc</param> /// <returns></returns> public static Scms GetScms(Matrix mfcc, string name) { DbgTimer t = new DbgTimer(); t.Start(); // Mean Vector m = mfcc.Mean(); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { m.WriteText(name + "_mean_orig.txt"); m.DrawMatrixGraph(name + "_mean_orig.png"); } #endif // Covariance Matrix c = mfcc.Covariance(m); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { c.WriteText(name + "_covariance_orig.txt"); c.DrawMatrixGraph(name + "_covariance_orig.png"); } #endif // Inverse Covariance Matrix ic; try { ic = c.Inverse(); } catch (MatrixSingularException) { //throw new ScmsImpossibleException(); Dbg.WriteLine("MatrixSingularException - Scms failed!"); return null; } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { ic.WriteAscii(name + "_inverse_covariance_orig.txt"); ic.DrawMatrixGraph(name + "_inverse_covariance_orig.png"); } #endif // Store the Mean, Covariance, Inverse Covariance in an optimal format. int dim = m.rows; Scms s = new Scms(dim); int l = 0; for (int i = 0; i < dim; i++) { s.mean[i] = m.d[i, 0]; for (int j = i; j < dim; j++) { s.cov[l] = c.d[i, j]; s.icov[l] = ic.d[i, j]; l++; } } Dbg.WriteLine("(Mirage) - scms created in: {0} ms", t.Stop().TotalMilliseconds); return s; }
/// <summary> /// Find Similar Tracks to an audio file using its file path /// </summary> /// <param name="searchForPath">audio file path</param> /// <param name="db">database</param> /// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param> /// <param name="numToTake">max number of entries to return</param> /// <param name="percentage">percentage below and above the duration in ms when querying (used if between 0.1 - 0.9)</param> /// <param name="distanceType">distance method to use (KullbackLeiblerDivergence is default)</param> /// <returns>a dictinary list of key value pairs (filepath and distance)</returns> public static Dictionary<KeyValuePair<int, string>, double> SimilarTracks(string searchForPath, Db db, Analyzer.AnalysisMethod analysisMethod, int numToTake=25, double percentage=0.2, AudioFeature.DistanceType distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence) { DbgTimer t = new DbgTimer(); t.Start(); FileInfo fi = new FileInfo(searchForPath); AudioFeature seedAudioFeature = null; AudioFeature[] audioFeatures = null; switch (analysisMethod) { case Analyzer.AnalysisMethod.MandelEllis: seedAudioFeature = Analyzer.AnalyzeMandelEllis(fi); audioFeatures = new MandelEllis[100]; break; case Analyzer.AnalysisMethod.SCMS: seedAudioFeature = Analyzer.AnalyzeScms(fi); audioFeatures = new Scms[100]; break; } // Get all tracks from the DB except the seedSongs IDataReader r = db.GetTracks(null, seedAudioFeature.Duration, percentage); // store results in a dictionary var NameDictionary = new Dictionary<KeyValuePair<int, string>, double>(); int[] mapping = new int[100]; int read = 1; double dcur; while (read > 0) { read = db.GetNextTracks(ref r, ref audioFeatures, ref mapping, 100, analysisMethod); for (int i = 0; i < read; i++) { dcur = seedAudioFeature.GetDistance(audioFeatures[i], distanceType); // convert to positive values dcur = Math.Abs(dcur); NameDictionary.Add(new KeyValuePair<int,string>(mapping[i], audioFeatures[i].Name), dcur); } } // sort by non unique values var sortedDict = (from entry in NameDictionary orderby entry.Value ascending select entry) .Take(numToTake) .ToDictionary(pair => pair.Key, pair => pair.Value); Console.Out.WriteLine(String.Format("Found Similar to ({0}) in {1} ms", seedAudioFeature.Name, t.Stop().TotalMilliseconds)); return sortedDict; }
public static float[] DecodeUsingMplayerAndSox(string fileIn, int srate, int secondsToAnalyze) { lock (_locker) { using (Process tosoxreadable = new Process()) { fileIn = Regex.Replace(fileIn, "%20", " "); DbgTimer t = new DbgTimer(); t.Start(); String curdir = System.Environment.CurrentDirectory; Dbg.WriteLine("Decoding: " + fileIn); String tempFile = System.IO.Path.GetTempFileName(); String soxreadablewav = tempFile + ".wav"; Dbg.WriteLine("Temporary wav file: " + soxreadablewav); tosoxreadable.StartInfo.FileName = "./NativeLibraries\\mplayer\\mplayer.exe"; tosoxreadable.StartInfo.Arguments = " -quiet -vc null -vo null -ao pcm:fast:waveheader \""+fileIn+"\" -ao pcm:file=\\\""+soxreadablewav+"\\\""; tosoxreadable.StartInfo.UseShellExecute = false; tosoxreadable.StartInfo.RedirectStandardOutput = true; tosoxreadable.StartInfo.RedirectStandardError = true; tosoxreadable.Start(); tosoxreadable.WaitForExit(); int exitCode = tosoxreadable.ExitCode; // 0 = succesfull // 1 = partially succesful // 2 = failed if (exitCode != 0) { string standardError = tosoxreadable.StandardError.ReadToEnd(); Console.Out.WriteLine(standardError); return null; } #if DEBUG string standardOutput = tosoxreadable.StandardOutput.ReadToEnd(); Console.Out.WriteLine(standardOutput); #endif float[] floatBuffer = null; if (File.Exists(soxreadablewav)) { floatBuffer = DecodeUsingSox(soxreadablewav, srate, secondsToAnalyze); try { File.Delete(tempFile); File.Delete(soxreadablewav); } catch (IOException io) { Console.WriteLine(io); } } Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms"); return floatBuffer; } } }
/// <summary> /// Find Similar Tracks to one or many audio files using their unique database id(s) /// </summary> /// <param name="id">an array of unique database ids for the audio files to search for similar matches</param> /// <param name="exclude">an array of unique database ids to ignore (normally the same as the id array)</param> /// <param name="db">database</param> /// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param> /// <param name="numToTake">max number of entries to return</param> /// <param name="percentage">percentage below and above the duration in ms when querying (used if between 0.1 - 0.9)</param> /// <param name="distanceType">distance method to use (KullbackLeiblerDivergence is default)</param> /// <returns>a dictinary list of key value pairs (filepath and distance)</returns> public static Dictionary<KeyValuePair<int, string>, double> SimilarTracks(int[] id, int[] exclude, Db db, Analyzer.AnalysisMethod analysisMethod, int numToTake=25, double percentage=0.2, AudioFeature.DistanceType distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence) { DbgTimer t = new DbgTimer(); t.Start(); AudioFeature[] seedAudioFeatures = null; AudioFeature[] audioFeatures = null; switch (analysisMethod) { case Analyzer.AnalysisMethod.MandelEllis: seedAudioFeatures = new MandelEllis[id.Length]; audioFeatures = new MandelEllis[100]; break; case Analyzer.AnalysisMethod.SCMS: seedAudioFeatures = new Scms[id.Length]; audioFeatures = new Scms[100]; break; } for (int i = 0; i < seedAudioFeatures.Length; i++) { seedAudioFeatures[i] = db.GetTrack(id[i], analysisMethod); } // Get all tracks from the DB except the seedSongs IDataReader r = db.GetTracks(exclude, seedAudioFeatures[0].Duration, percentage); // store results in a dictionary var NameDictionary = new Dictionary<KeyValuePair<int, string>, double>(); int[] mapping = new int[100]; int read = 1; double d; double dcur; float count; while (read > 0) { read = db.GetNextTracks(ref r, ref audioFeatures, ref mapping, 100, analysisMethod); for (int i = 0; i < read; i++) { d = 0; count = 0; for (int j = 0; j < seedAudioFeatures.Length; j++) { dcur = seedAudioFeatures[j].GetDistance(audioFeatures[i], distanceType); // convert to positive values dcur = Math.Abs(dcur); d += dcur; count++; } if (d > 0) { NameDictionary.Add(new KeyValuePair<int,string>(mapping[i], audioFeatures[i].Name), d/count); //NameDictionary.Add(new KeyValuePair<int,string>(mapping[i], String.Format("{0} ({1} ms)", audioFeatures[i].Name, audioFeatures[i].Duration)), d/count); } } } // sort by non unique values var sortedDict = (from entry in NameDictionary orderby entry.Value ascending select entry) .Take(numToTake) .ToDictionary(pair => pair.Key, pair => pair.Value); Console.Out.WriteLine(String.Format("Found Similar to ({0}) in {1} ms", String.Join(",", seedAudioFeatures.Select(p=>p.Name)), t.Stop().TotalMilliseconds)); return sortedDict; }
public static float[] DecodeUsingSox(string fileIn, int srate, int secondsToAnalyze) { lock (_locker) { using (Process toraw = new Process()) { fileIn = Regex.Replace(fileIn, "%20", " "); DbgTimer t = new DbgTimer(); t.Start(); String curdir = System.Environment.CurrentDirectory; Dbg.WriteLine("Decoding: " + fileIn); String tempFile = System.IO.Path.GetTempFileName(); String raw = tempFile + "_raw.wav"; Dbg.WriteLine("Temporary raw file: " + raw); toraw.StartInfo.FileName = "./NativeLibraries\\sox\\sox.exe"; toraw.StartInfo.Arguments = " \"" + fileIn + "\" -r "+srate+" -e float -b 32 -G -t raw \"" + raw + "\" channels 1"; toraw.StartInfo.UseShellExecute = false; toraw.StartInfo.RedirectStandardOutput = true; toraw.StartInfo.RedirectStandardError = true; toraw.Start(); toraw.WaitForExit(); int exitCode = toraw.ExitCode; // 0 = succesfull // 1 = partially succesful // 2 = failed if (exitCode != 0) { string standardError = toraw.StandardError.ReadToEnd(); Console.Out.WriteLine(standardError); return null; } #if DEBUG string standardOutput = toraw.StandardOutput.ReadToEnd(); Console.Out.WriteLine(standardOutput); #endif float[] floatBuffer; FileStream fs = null; try { FileInfo fi = new FileInfo(raw); fs = fi.OpenRead(); int bytes = (int)fi.Length; int samples = bytes/sizeof(float); if ((samples*sizeof(float)) != bytes) return null; // if the audio file is larger than seconds to analyze, // find a proper section to exctract if (bytes > secondsToAnalyze*srate*sizeof(float)) { int seekto = (bytes/2) - ((secondsToAnalyze/2)*sizeof(float)*srate); Dbg.WriteLine("Extracting section: seekto = " + seekto); bytes = (secondsToAnalyze)*srate*sizeof(float); fs.Seek((samples/2-(secondsToAnalyze/2)*srate)*sizeof(float), SeekOrigin.Begin); } BinaryReader br = new BinaryReader(fs); byte[] bytesBuffer = new byte[bytes]; br.Read(bytesBuffer, 0, bytesBuffer.Length); int items = (int)bytes/sizeof(float); floatBuffer = new float[items]; for (int i = 0; i < items; i++) { floatBuffer[i] = BitConverter.ToSingle(bytesBuffer, i * sizeof(float)); // * 65536.0f; } } catch (System.IO.FileNotFoundException) { floatBuffer = null; } finally { if (fs != null) fs.Close(); try { File.Delete(tempFile); File.Delete(raw); } catch (IOException io) { Console.WriteLine(io); } Dbg.WriteLine("Decoding Execution Time: " + t.Stop().TotalMilliseconds + " ms"); } return floatBuffer; } } }
public List<bool[]> CreateFingerprintsFromLogSpectrum( double[][] logarithmizedSpectrum, IStride stride, int fingerprintLength, int overlap, int topWavelets, out List<double[][]> spectralImages) { DbgTimer t = new DbgTimer(); t.Start (); // Cut the logaritmic spectrogram into smaller spectrograms with one stride between each spectralImages = SpectrumService.CutLogarithmizedSpectrum(logarithmizedSpectrum, stride, fingerprintLength, overlap); // Then apply the wavelet transform on them to lated reduce the resolution // do this in place WaveletService.ApplyWaveletTransformInPlace(spectralImages); // Then for each of the wavelet reduce the resolution by only keeping the top wavelets // and ignore the magnitude of the top wavelets. // Instead, we can simply keep the sign of it (+/-). // This information is enough to keep the extract perceptual characteristics of a song. List<bool[]> fingerprints = new List<bool[]>(); foreach (var spectralImage in spectralImages) { bool[] image = FingerprintDescriptor.ExtractTopWavelets(spectralImage, topWavelets); fingerprints.Add(image); } Dbg.WriteLine ("Created {1} Fingerprints from Log Spectrum - Execution Time: {0} ms", t.Stop().TotalMilliseconds, fingerprints.Count); return fingerprints; }