public void ComputeInverseComirvaMatrixUsingLomontTableFFT(Comirva.Audio.Util.Maths.Matrix m, int column, ref double[] signal, int winsize, int hopsize) { double[] spectrogramWindow = m.GetColumn(column); // extend window with the inverse duplicate array int len = spectrogramWindow.Length; double[] extendedWindow = new double[len * 2]; Array.Copy(spectrogramWindow, extendedWindow, len); for (int i = 1; i < len; i++) { extendedWindow[len + i] = spectrogramWindow[len - i]; } double[] complexSignal = FFTUtilsLomont.DoubleToComplexDouble(extendedWindow); lomonFFT.TableFFT(complexSignal, false); double[] window = win.GetWindow(); // multiply by window w/ overlap-add int N = complexSignal.Length / 2; double[] returnArray = new double[N]; for (int j = 0; j < N; j++) { double re = complexSignal[2 * j] / Math.Sqrt(winsize); //double img = complexSignal[2*j + 1]; returnArray[j] = re * window[j]; // smooth yet another time (also did this when doing FFT) // overlap-add method // scale with 2 just because the volume got so much lower when using a second smoothing filter when reconstrcting signal[j + hopsize * column] = signal[j + hopsize * column] + returnArray[j] * 2; } }
public void ComputeComirvaMatrixUsingFftw(ref Comirva.Audio.Util.Maths.Matrix m, int j, float[] audiodata, int pos) { // apply the window method (e.g HammingWindow, HannWindow etc) win.Apply(ref data, audiodata, pos); Marshal.Copy(data, 0, fftwData, fftsize); fftwf_execute(fftwPlan); Marshal.Copy(fftwData, fft, 0, fftsize); // fft input will now contain the FFT values in a Half Complex format // r0, r1, r2, ..., rn/2, i(n+1)/2-1, ..., i2, i1 // Here, rk is the real part of the kth output, and ikis the imaginary part. (Division by 2 is rounded down.) // For a halfcomplex array hc[n], the kth component thus has its real part in hc[k] and its imaginary part in hc[n-k], // with the exception of k == 0 or n/2 (the latter only if n is even)—in these two cases, the imaginary part is zero due to symmetries of the real-input DFT, and is not stored. m.MatrixData[0][j] = Math.Sqrt(fft[0] * fft[0]); for (int i = 1; i < winsize / 2; i++) { // amplitude (or magnitude) is the square root of the power spectrum // the magnitude spectrum is abs(fft), i.e. Math.Sqrt(re*re + img*img) // use 20*log10(Y) to get dB from amplitude // the power spectrum is the magnitude spectrum squared // use 10*log10(Y) to get dB from power spectrum m.MatrixData[i][j] = Math.Sqrt((fft[i * 2] * fft[i * 2] + fft[fftsize - i * 2] * fft[fftsize - i * 2])); } //m.MatrixData[winsize/2][j] = Math.Sqrt(fft[winsize] * fft[winsize]); }
public static void RunTests() { // Run the following matlab test: // T = 1; % threshold value // v = linspace(-5,5,1024); // clf; // hold('on'); // plot(v, perform_thresholding(v,T,'hard'), 'b--'); // plot(v, perform_thresholding(v,T,'soft'), 'r--'); // plot(v, perform_thresholding(v,[T 2*T],'semisoft'), 'g'); // plot(v, perform_thresholding(v,[T 4*T],'semisoft'), 'g:'); // plot(v, perform_thresholding(v',400,'strict'), 'r:'); // legend('hard', 'soft', 'semisoft, \mu=2', 'semisoft, \mu=4', 'strict, 400'); // hold('off'); // linspace in c# double start = -5; double end = 5; double totalCount = 1024; double[][] v = new double[1][]; v[0] = new double[(int)totalCount]; int count = 0; for (double i = start; i < end; i += (end - start) / totalCount) { v[0][count] = i; count++; } // perform thresholding and plot int T = 1; double[][] hard = perform_hard_thresholding(v, T); Comirva.Audio.Util.Maths.Matrix mHard = new Comirva.Audio.Util.Maths.Matrix(hard); mHard.DrawMatrixGraph("thresholding-hard.png", false); double[][] soft = perform_soft_thresholding(v, T); Comirva.Audio.Util.Maths.Matrix mSoft = new Comirva.Audio.Util.Maths.Matrix(soft); mSoft.DrawMatrixGraph("thresholding-soft.png", false); double[][] semisoft1 = perform_semisoft_thresholding(v, T, 2 * T); Comirva.Audio.Util.Maths.Matrix mSemiSoft1 = new Comirva.Audio.Util.Maths.Matrix(semisoft1); mSemiSoft1.DrawMatrixGraph("thresholding-semisoft1.png", false); double[][] semisoft2 = perform_semisoft_thresholding(v, T, 4 * T); Comirva.Audio.Util.Maths.Matrix mSemiSoft2 = new Comirva.Audio.Util.Maths.Matrix(semisoft2); mSemiSoft2.DrawMatrixGraph("thresholding-semisoft2.png", false); double[][] strict = perform_strict_thresholding(v, 400); Comirva.Audio.Util.Maths.Matrix mStrict = new Comirva.Audio.Util.Maths.Matrix(strict); mStrict.DrawMatrixGraph("thresholding-strict.png", false); }
public void ComputeComirvaMatrixUsingLomontTableFFT(ref Comirva.Audio.Util.Maths.Matrix m, int column, float[] audiodata, int pos) { // apply the window method (e.g HammingWindow, HannWindow etc) win.Apply(ref data, audiodata, pos); double[] complexSignal = FFTUtilsLomont.FloatToComplexDouble(data); lomonFFT.TableFFT(complexSignal, true); int row = 0; for (int i = 0; i < complexSignal.Length / 4; i += 2) { double re = complexSignal[2 * i]; double img = complexSignal[2 * i + 1]; m.MatrixData[row][column] = Math.Sqrt((re * re + img * img) * complexSignal.Length / 2); row++; } }
public void ComputeInverseComirvaMatrixUsingLomontRealFFT(Comirva.Audio.Util.Maths.Matrix m, int column, ref double[] signal, int winsize, int hopsize) { double[] spectrogramWindow = m.GetColumn(column); // extend window with the inverse duplicate array int len = spectrogramWindow.Length; double[] extendedWindow = new double[len * 2]; Array.Copy(spectrogramWindow, extendedWindow, len); for (int i = 1; i < len; i++) { extendedWindow[len + i] = spectrogramWindow[len - i]; } // ifft input must contain the FFT values // r0, r(n/2), r1, i1, r2, i2 ... // Perform the ifft and take just the real part double[] ifft = new double[winsize * 2]; ifft[0] = extendedWindow[0]; ifft[1] = extendedWindow[winsize / 2]; for (int i = 1; i < extendedWindow.Length; i++) { ifft[2 * i] = extendedWindow[i]; } lomonFFT.RealFFT(ifft, false); double[] window = win.GetWindow(); // multiply by window w/ overlap-add int N = ifft.Length / 2; double[] returnArray = new double[N]; for (int j = 0; j < N; j++) { double re = ifft[2 * j] / Math.Sqrt(winsize); returnArray[j] = re * window[j]; // smooth yet another time (also did this when doing FFT) // overlap-add method // scale with 5 just because the volume got so much lower when using a second smoothing filter when reconstrcting signal[j + hopsize * column] = signal[j + hopsize * column] + returnArray[j] * 5; } }
public void ComputeComirvaMatrixUsingLomontRealFFT(ref Comirva.Audio.Util.Maths.Matrix m, int column, float[] audiodata, int pos) { // apply the window method (e.g HammingWindow, HannWindow etc) win.Apply(ref data, audiodata, pos); double[] fft = new double[data.Length / 2]; Array.Copy(data, fft, data.Length / 2); lomonFFT.RealFFT(fft, true); // fft input will now contain the FFT values // r0, r(n/2), r1, i1, r2, i2 ... m.MatrixData[0][column] = Math.Sqrt(fft[0] * fft[0] * winsize); m.MatrixData[winsize / 2 - 1][column] = Math.Sqrt(fft[1] * fft[1] * winsize); for (int row = 1; row < winsize / 2; row++) { // amplitude (or magnitude) is the square root of the power spectrum // the magnitude spectrum is abs(fft), i.e. Math.Sqrt(re*re + img*img) // use 20*log10(Y) to get dB from amplitude // the power spectrum is the magnitude spectrum squared // use 10*log10(Y) to get dB from power spectrum m.MatrixData[row][column] = Math.Sqrt((fft[2 * row] * fft[2 * row] + fft[2 * row + 1] * fft[2 * row + 1]) * winsize); } }
private static bool AnalyseAndAddScmsUsingFingerprints(List<bool[]> fingerprints, WorkUnitParameterObject param, Db db, int trackId, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO) { DbgTimer t = new DbgTimer(); t.Start (); // Insert Statistical Cluster Model Similarity Audio Feature string fileName = param.FileName; int fingerprintWidth = param.FingerprintingConfiguration.FingerprintLength; int fingerprintHeight = param.FingerprintingConfiguration.LogBins; int fingerprintCount = 0; foreach (bool[] fingerprint in fingerprints) { fingerprintCount++; Comirva.Audio.Util.Maths.Matrix scmsMatrix = new Comirva.Audio.Util.Maths.Matrix(fingerprintWidth, fingerprintHeight); for (int i = 0; i < fingerprintWidth /*128*/; i++) { for (int j = 0; j < fingerprintHeight /*32*/; j++) { // Negative Numbers = 01 // Positive Numbers = 10 // Zeros = 00 bool v1 = fingerprint[(2 * fingerprintHeight * i) + (2 * j)]; bool v2 = fingerprint[(2 * fingerprintHeight * i) + (2 * j) + 1]; if (v1) { scmsMatrix.MatrixData[i][j] = 2.0; } else if (v2) { scmsMatrix.MatrixData[i][j] = 0.0; } else { scmsMatrix.MatrixData[i][j] = 1.0; } } } if (doOutputDebugInfo) { scmsMatrix.DrawMatrixImage(String.Format("{0}_fingerprint_{1}.png", fileName, fingerprintCount), fingerprintWidth, fingerprintHeight); } #region Store in a Statistical Cluster Model Similarity class. Scms audioFeature = Scms.GetScmsNoInverse(scmsMatrix, fileName); if (audioFeature != null) { // Store bitstring hash as well audioFeature.BitString = GetBitString(fingerprint); // Store duration audioFeature.Duration = (long) param.DurationInMs; // Store file name audioFeature.Name = param.PathToAudioFile; // Add to database int id = trackId; if (db.AddTrack(audioFeature) == -1) { Console.Out.WriteLine("Failed! Could not add audio feature to database ({0})!", fileName); return false; } } else { return false; } #endregion } Dbg.WriteLine ("AnalyseAndAddScmsUsingFingerprints - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
/// <summary> /// Method to analyze and add using the soundfingerprinting methods /// </summary> /// <param name="filePath">full file path</param> /// <param name="repository">Soundfingerprinting Repository</param> /// <param name="doOutputDebugInfo">decide whether to output debug info like spectrogram and audiofile (default value can be set)</param> /// <param name="useHaarWavelet">decide whether to use haar wavelet compression or DCT compression</param> /// <returns>true if successful</returns> public static bool AnalyzeAndAddSoundfingerprinting(FileInfo filePath, Repository repository, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); param.FingerprintingConfiguration = fingerprintingConfigCreation; string fileName = param.FileName; // build track Track track = new Track(); track.Title = param.FileName; track.TrackLengthMs = (int) param.DurationInMs; track.FilePath = param.PathToAudioFile; track.Tags = param.Tags; track.Id = -1; // this will be set by the insert method // Get fingerprint signatures using the Soundfingerprinting methods double[][] logSpectrogram; List<bool[]> fingerprints; List<double[][]> spectralImages; if (repository.InsertTrackInDatabaseUsingSamples(track, param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, param, out logSpectrogram, out fingerprints, out spectralImages)) { // store logSpectrogram as Matrix Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); #region Debug for Soundfingerprinting Method if (doOutputDebugInfo) { // Image Service ImageService imageService = new ImageService(repository.FingerprintService.SpectrumService, repository.FingerprintService.WaveletService); imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfigCreation.Stride, fingerprintingConfigCreation.FingerprintLength, fingerprintingConfigCreation.Overlap, 2).Save(fileName + "_specgram_logimages.png"); logSpectrogramMatrix.DrawMatrixImageLogValues(fileName + "_specgram_logimage.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(fileName + "_specgram_log.csv", ";"); } } #endregion } else { // failed Console.Out.WriteLine("Failed! Could not compute the soundfingerprint {0}!", fileName); return false; } Dbg.WriteLine ("AnalyzeAndAddSoundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
private static bool AnalyseAndAddScmsUsingFingerprints(List<double[][]> spectralImages, List<bool[]> fingerprints, WorkUnitParameterObject param, Db db, int trackId, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO) { DbgTimer t = new DbgTimer(); t.Start (); // Insert Statistical Cluster Model Similarity Audio Feature string fileName = param.FileName; // Merge the arrays in the List using Linq var result = spectralImages.SelectMany(i => i).ToArray(); Comirva.Audio.Util.Maths.Matrix scmsMatrix = new Comirva.Audio.Util.Maths.Matrix(result); if (doOutputDebugInfo) { scmsMatrix.DrawMatrixImage(String.Format("{0}_spectral.png", fileName)); } #region Store in a Statistical Cluster Model Similarity class. Scms audioFeature = Scms.GetScms(scmsMatrix, fileName); if (audioFeature != null) { // Store bitstring hash as well audioFeature.BitString = GetBitString(scmsMatrix); // Store duration audioFeature.Duration = (long) param.DurationInMs; // Store file name audioFeature.Name = param.PathToAudioFile; // Add to database int id = trackId; if (db.AddTrack(audioFeature) == -1) { Console.Out.WriteLine("Failed! Could not add audio feature to database ({0})!", fileName); return false; } } else { return false; } #endregion Dbg.WriteLine ("AnalyseAndAddScmsUsingFingerprints2 - Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
public static AudioFeature AnalyzeSoundfingerprinting(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE); if (audiodata == null || audiodata.Length == 0) { Dbg.WriteLine("Error! - No Audio Found"); return null; } // Read TAGs using BASS FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance; Un4seen.Bass.AddOn.Tags.TAG_INFO tag_info = bass.GetTagInfoFromFile(filePath.FullName); // Name of file being processed string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name)); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, name + "_audiodata.ascii"); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, name + "_audiodata.txt"); } #endif if (doOutputDebugInfo) { DrawGraph(MathUtils.FloatToDouble(audiodata), name + "_audiodata.png"); } // Calculate duration in ms double duration = (double) audiodata.Length / SAMPLING_RATE * 1000; // zero pad if the audio file is too short to perform a mfcc if (audiodata.Length < (fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap)) { int lenNew = fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap; Array.Resize<float>(ref audiodata, lenNew); } // Get fingerprint signatures using the Soundfingerprinting methods // Get database DatabaseService databaseService = DatabaseService.Instance; IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); Repository repository = new Repository(permutations, databaseService, fingerprintService); // Image Service ImageService imageService = new ImageService( fingerprintService.SpectrumService, fingerprintService.WaveletService); // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.FingerprintingConfiguration = fingerprintingConfig; param.AudioSamples = audiodata; param.PathToAudioFile = filePath.FullName; param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000; param.StartAtMilliseconds = 0; // build track Track track = new Track(); track.Title = name; track.TrackLengthMs = (int) duration; track.FilePath = filePath.FullName; track.Id = -1; // this will be set by the insert method #region parse tag_info if (tag_info != null) { Dictionary<string, string> tags = new Dictionary<string, string>(); //if (tag_info.title != string.Empty) tags.Add("title", tag_info.title); if (tag_info.artist != string.Empty) tags.Add("artist", tag_info.artist); if (tag_info.album != string.Empty) tags.Add("album", tag_info.album); if (tag_info.albumartist != string.Empty) tags.Add("albumartist", tag_info.albumartist); if (tag_info.year != string.Empty) tags.Add("year", tag_info.year); if (tag_info.comment != string.Empty) tags.Add("comment", tag_info.comment); if (tag_info.genre != string.Empty) tags.Add("genre", tag_info.genre); if (tag_info.track != string.Empty) tags.Add("track", tag_info.track); if (tag_info.disc != string.Empty) tags.Add("disc", tag_info.disc); if (tag_info.copyright != string.Empty) tags.Add("copyright", tag_info.copyright); if (tag_info.encodedby != string.Empty) tags.Add("encodedby", tag_info.encodedby); if (tag_info.composer != string.Empty) tags.Add("composer", tag_info.composer); if (tag_info.publisher != string.Empty) tags.Add("publisher", tag_info.publisher); if (tag_info.lyricist != string.Empty) tags.Add("lyricist", tag_info.lyricist); if (tag_info.remixer != string.Empty) tags.Add("remixer", tag_info.remixer); if (tag_info.producer != string.Empty) tags.Add("producer", tag_info.producer); if (tag_info.bpm != string.Empty) tags.Add("bpm", tag_info.bpm); //if (tag_info.filename != string.Empty) tags.Add("filename", tag_info.filename); tags.Add("channelinfo", tag_info.channelinfo.ToString()); //if (tag_info.duration > 0) tags.Add("duration", tag_info.duration.ToString()); if (tag_info.bitrate > 0) tags.Add("bitrate", tag_info.bitrate.ToString()); if (tag_info.replaygain_track_gain != -100f) tags.Add("replaygain_track_gain", tag_info.replaygain_track_gain.ToString()); if (tag_info.replaygain_track_peak != -1f) tags.Add("replaygain_track_peak", tag_info.replaygain_track_peak.ToString()); if (tag_info.conductor != string.Empty) tags.Add("conductor", tag_info.conductor); if (tag_info.grouping != string.Empty) tags.Add("grouping", tag_info.grouping); if (tag_info.mood != string.Empty) tags.Add("mood", tag_info.mood); if (tag_info.rating != string.Empty) tags.Add("rating", tag_info.rating); if (tag_info.isrc != string.Empty) tags.Add("isrc", tag_info.isrc); foreach(var nativeTag in tag_info.NativeTags) { string[] keyvalue = nativeTag.Split('='); tags.Add(keyvalue[0], keyvalue[1]); } track.Tags = tags; } #endregion AudioFeature audioFeature = null; double[][] logSpectrogram; if (repository.InsertTrackInDatabaseUsingSamples(track, 25, 4, param, out logSpectrogram)) { if (doOutputDebugInfo) { imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfig.Stride, fingerprintingConfig.FingerprintLength, fingerprintingConfig.Overlap, 2).Save(name + "_specgram_logimages.png"); Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); logSpectrogramMatrix.DrawMatrixImageLogValues(name + "_specgram_logimage.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(name + "_specgram_log.csv", ";"); } } audioFeature = new DummyAudioFeature(); // Store duration audioFeature.Duration = (long) duration; // Store file name audioFeature.Name = filePath.FullName; } else { // failed } Dbg.WriteLine ("Soundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return audioFeature; }
//private static Mfcc mfccOptimized = new Mfcc(WINDOW_SIZE, SAMPLING_RATE, MEL_COEFFICIENTS, MFCC_COEFFICIENTS); //private static MFCC mfccComirva = new MFCC(SAMPLING_RATE, WINDOW_SIZE, MFCC_COEFFICIENTS, true, 20.0, SAMPLING_RATE/2, MEL_COEFFICIENTS); #endif #region Methods public static bool AnalyzeAndAdd(FileInfo filePath, Db db, DatabaseService databaseService, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE); if (audiodata == null || audiodata.Length == 0) { Dbg.WriteLine("Error! - No Audio Found"); return false; } // Read TAGs using BASS FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance; Un4seen.Bass.AddOn.Tags.TAG_INFO tag_info = bass.GetTagInfoFromFile(filePath.FullName); // Name of file being processed string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name)); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, name + "_audiodata.ascii"); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, name + "_audiodata.txt"); } #endif if (doOutputDebugInfo) { DrawGraph(MathUtils.FloatToDouble(audiodata), name + "_audiodata.png"); } // Calculate duration in ms double duration = (double) audiodata.Length / SAMPLING_RATE * 1000; // Explode samples to the range of 16 bit shorts (–32,768 to 32,767) // Matlab multiplies with 2^15 (32768) // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end; MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // 65536 // zero pad if the audio file is too short to perform a mfcc if (audiodata.Length < (fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap)) { int lenNew = fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap; Array.Resize<float>(ref audiodata, lenNew); } // Get fingerprint signatures using the Soundfingerprinting methods IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); Repository repository = new Repository(permutations, databaseService, fingerprintService); // Image Service ImageService imageService = new ImageService( fingerprintService.SpectrumService, fingerprintService.WaveletService); // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.FingerprintingConfiguration = fingerprintingConfig; param.AudioSamples = audiodata; param.PathToAudioFile = filePath.FullName; param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000; param.StartAtMilliseconds = 0; // build track Track track = new Track(); track.Title = name; track.TrackLengthMs = (int) duration; track.FilePath = filePath.FullName; track.Id = -1; // this will be set by the insert method #region parse tag_info if (tag_info != null) { Dictionary<string, string> tags = new Dictionary<string, string>(); //if (tag_info.title != string.Empty) tags.Add("title", tag_info.title); if (tag_info.artist != string.Empty) tags.Add("artist", tag_info.artist); if (tag_info.album != string.Empty) tags.Add("album", tag_info.album); if (tag_info.albumartist != string.Empty) tags.Add("albumartist", tag_info.albumartist); if (tag_info.year != string.Empty) tags.Add("year", tag_info.year); if (tag_info.comment != string.Empty) tags.Add("comment", tag_info.comment); if (tag_info.genre != string.Empty) tags.Add("genre", tag_info.genre); if (tag_info.track != string.Empty) tags.Add("track", tag_info.track); if (tag_info.disc != string.Empty) tags.Add("disc", tag_info.disc); if (tag_info.copyright != string.Empty) tags.Add("copyright", tag_info.copyright); if (tag_info.encodedby != string.Empty) tags.Add("encodedby", tag_info.encodedby); if (tag_info.composer != string.Empty) tags.Add("composer", tag_info.composer); if (tag_info.publisher != string.Empty) tags.Add("publisher", tag_info.publisher); if (tag_info.lyricist != string.Empty) tags.Add("lyricist", tag_info.lyricist); if (tag_info.remixer != string.Empty) tags.Add("remixer", tag_info.remixer); if (tag_info.producer != string.Empty) tags.Add("producer", tag_info.producer); if (tag_info.bpm != string.Empty) tags.Add("bpm", tag_info.bpm); //if (tag_info.filename != string.Empty) tags.Add("filename", tag_info.filename); tags.Add("channelinfo", tag_info.channelinfo.ToString()); //if (tag_info.duration > 0) tags.Add("duration", tag_info.duration.ToString()); if (tag_info.bitrate > 0) tags.Add("bitrate", tag_info.bitrate.ToString()); if (tag_info.replaygain_track_gain != -100f) tags.Add("replaygain_track_gain", tag_info.replaygain_track_gain.ToString()); if (tag_info.replaygain_track_peak != -1f) tags.Add("replaygain_track_peak", tag_info.replaygain_track_peak.ToString()); if (tag_info.conductor != string.Empty) tags.Add("conductor", tag_info.conductor); if (tag_info.grouping != string.Empty) tags.Add("grouping", tag_info.grouping); if (tag_info.mood != string.Empty) tags.Add("mood", tag_info.mood); if (tag_info.rating != string.Empty) tags.Add("rating", tag_info.rating); if (tag_info.isrc != string.Empty) tags.Add("isrc", tag_info.isrc); foreach(var nativeTag in tag_info.NativeTags) { string[] keyvalue = nativeTag.Split('='); tags.Add(keyvalue[0], keyvalue[1]); } track.Tags = tags; } #endregion double[][] logSpectrogram; if (repository.InsertTrackInDatabaseUsingSamples(track, 25, 4, param, out logSpectrogram)) { // store logSpectrogram as Matrix Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); #region Debug for Soundfingerprinting Method if (doOutputDebugInfo) { imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfig.Stride, fingerprintingConfig.FingerprintLength, fingerprintingConfig.Overlap, 2).Save(name + "_specgram_logimages.png"); logSpectrogramMatrix.DrawMatrixImageLogValues(name + "_specgram_logimage.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(name + "_specgram_log.csv", ";"); } } #endregion #region Insert Statistical Cluster Model Similarity Audio Feature as well Comirva.Audio.Util.Maths.Matrix scmsMatrix = null; if (useHaarWavelet) { #region Wavelet Transform int lastHeight = 0; int lastWidth = 0; scmsMatrix = mfccMirage.ApplyWaveletCompression(ref logSpectrogramMatrix, out lastHeight, out lastWidth); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) scmsMatrix.WriteAscii(name + "_waveletdata.ascii"); } #endif if (doOutputDebugInfo) { scmsMatrix.DrawMatrixImageLogValues(name + "_waveletdata.png", true); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { #region Inverse Wavelet // try to do an inverse wavelet transform Comirva.Audio.Util.Maths.Matrix stftdata_inverse_wavelet = mfccMirage.InverseWaveletCompression(ref scmsMatrix, lastHeight, lastWidth, logSpectrogramMatrix.Rows, logSpectrogramMatrix.Columns); if (DEBUG_OUTPUT_TEXT) stftdata_inverse_wavelet.WriteCSV(name + "_specgramlog_inverse_wavelet.csv", ";"); stftdata_inverse_wavelet.DrawMatrixImageLogValues(name + "_specgramlog_inverse_wavelet.png", true); #endregion } #endif #endregion } else { #region DCT Transform // It seems the Mirage way of applying the DCT is slightly faster than the // Comirva way due to less loops scmsMatrix = mfccMirage.ApplyDCT(ref logSpectrogramMatrix); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) scmsMatrix.WriteAscii(name + "_mfccdata.ascii"); } #endif if (doOutputDebugInfo) { scmsMatrix.DrawMatrixImageLogValues(name + "_mfccdata.png", true); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { #region Inverse MFCC // try to do an inverse mfcc Comirva.Audio.Util.Maths.Matrix stftdata_inverse_mfcc = mfccMirage.InverseDCT(ref scmsMatrix); if (DEBUG_OUTPUT_TEXT) stftdata_inverse_mfcc.WriteCSV(name + "_stftdata_inverse_mfcc.csv", ";"); stftdata_inverse_mfcc.DrawMatrixImageLogValues(name + "_specgramlog_inverse_mfcc.png", true); #endregion } #endif #endregion } // Store in a Statistical Cluster Model Similarity class. // A Gaussian representation of a song Scms audioFeature = Scms.GetScms(scmsMatrix, name); if (audioFeature != null) { // Store image if debugging if (doOutputDebugInfo) { audioFeature.Image = scmsMatrix.DrawMatrixImageLogValues(name + "_featuredata.png", true, false, 0, 0, true); } // Store bitstring hash as well string hashString = GetBitString(scmsMatrix); audioFeature.BitString = hashString; // Store duration audioFeature.Duration = (long) duration; // Store file name audioFeature.Name = filePath.FullName; int id = track.Id; if (db.AddTrack(ref id, audioFeature) == -1) { Console.Out.WriteLine("Failed! Could not add audioFeature to database {0}!", name); } } #endregion } else { // failed return false; } Dbg.WriteLine ("AnalyzeAndAdd - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
private static void TestSoundfingerprintingAlgorithm(string filename, string name) { // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.PathToAudioFile = filename; param.StartAtMilliseconds = 0; param.MillisecondsToProcess = 0; param.FingerprintingConfiguration = fingerprintingConfig; // Soundfingerprinting Service FingerprintService fingerprintService = GetSoundfingerprintingService(); // Image Service ImageService imageService = new ImageService( fingerprintService.SpectrumService, fingerprintService.WaveletService); // Configuration AudioServiceConfiguration audioServiceConfiguration = new AudioServiceConfiguration { LogBins = fingerprintingConfig.LogBins, LogBase = fingerprintingConfig.LogBase, MaxFrequency = fingerprintingConfig.MaxFrequency, MinFrequency = fingerprintingConfig.MinFrequency, Overlap = fingerprintingConfig.Overlap, SampleRate = fingerprintingConfig.SampleRate, WdftSize = fingerprintingConfig.WdftSize, NormalizeSignal = fingerprintingConfig.NormalizeSignal, UseDynamicLogBase = fingerprintingConfig.UseDynamicLogBase }; double[][] spectrogram = fingerprintService.AudioService.CreateSpectrogram(filename, new Mirage.HannWindow(fingerprintingConfig.WdftSize), fingerprintingConfig.SampleRate, fingerprintingConfig.Overlap, fingerprintingConfig.WdftSize); imageService.GetSpectrogramImage(spectrogram, 600, 400).Save("imageservice_" + name + "_specgram.png"); /* Comirva.Audio.Util.Maths.Matrix stftdata = new Comirva.Audio.Util.Maths.Matrix(spectrogram).Transpose(); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) { stftdata.WriteAscii(name + "_stftdata2.ascii"); stftdata.WriteCSV(name + "_stftdata2.csv", ";"); } // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024); stftdata.DrawMatrixImageLogValues(name + "_specgram2.png", true); // spec gram with log values for the y axis (frequency) stftdata.DrawMatrixImageLogY(name + "_specgramlog2.png", SAMPLING_RATE, 20, SAMPLING_RATE/2, 120, WINDOW_SIZE); } #endif */ double[][] logSpectrogram = fingerprintService.AudioService.CreateLogSpectrogram(filename, new Mirage.HannWindow(fingerprintingConfig.WdftSize), audioServiceConfiguration); imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfig.Stride, fingerprintingConfig.FingerprintLength, fingerprintingConfig.Overlap, 2).Save("imageservice_" + name + "_specgram_logimages.png"); Comirva.Audio.Util.Maths.Matrix stftdataLog = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram).Transpose(); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) { stftdataLog.WriteAscii(name + "_stftdataLog.ascii"); stftdataLog.WriteCSV(name + "_stftdataLog.csv", ";"); } // same as specgram(audio*32768, 2048, 44100, hanning(2048), 1024); stftdataLog.DrawMatrixImageLogValues(name + "_stftdataLog.png", true); } #endif // Get fingerprints double[][] LogSpectrogram; List<bool[]> fingerprints = fingerprintService.CreateFingerprintsFromAudioFile(param, out LogSpectrogram); int width = fingerprintingConfig.FingerprintLength; int height = fingerprintingConfig.LogBins; imageService.GetImageForFingerprints(fingerprints, width, height, 2).Save("imageservice_" + name + "_fingerprints.png"); /* IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); Soundfingerprinting.DuplicatesDetector.DataAccess.Repository repository = new Soundfingerprinting.DuplicatesDetector.DataAccess.Repository(permutations); // Define track Soundfingerprinting.DuplicatesDetector.Model.Track track = new Soundfingerprinting.DuplicatesDetector.Model.Track { Title = name, Path = filename }; // Get the HashSignatures List<Soundfingerprinting.DuplicatesDetector.Model.HashSignature> signatures = repository.GetSignatures(fingerprints, track, 25, 4); return signatures; */ }
private static void TestComirvaMatrix() { // http://www.itl.nist.gov/div898/handbook/pmc/section5/pmc541.htm // Tested in: // octave-3.2.4.exe or // octave3.6.2_gcc4.6.2 // > format short g // > X = [4, 2, 0.6; 4.2, 2.1, .59; 3.9, 2, .58; 4.3, 2.1, 0.6; 4.1, 2.2, 0.63] // > mean (X) // ans = // 4.1 2.08 0.6 // > mean (X') // ans = // 2.2 2.2967 2.16 2.3333 2.31 // > cov (X) // ans = // 0.025 0.0075 0.00075 // 0.0075 0.007 0.00125 // 0.00075 0.00125 0.00035 // > cov (X') // ans = // 2.92 3.098 2.846 3.18 2.966 // 3.098 3.287 3.0199 3.3737 3.1479 // 2.846 3.0199 2.7748 3.099 2.8933 // 3.18 3.3737 3.099 3.4633 3.229 // 2.966 3.1479 2.8933 3.229 3.0193 // > inverse ( cov (X) ) // ans = // 70.297 -133.66 326.73 // -133.66 648.51 -2029.7 // 326.73 -2029.7 9405.9 // > inverse (cov (X')) // warning: inverse: matrix singular to machine precision, rcond = 2.41562e-018 // ans = // -1.1505e+015 6.7533e+014 1.9306e+015 -4.7521e+014 -9.1573e+014 // -7.9177e+015 -9.2709e+015 1.0708e+016 7.809e+015 -1.1689e+015 // 3.8489e+015 1.4136e+015 -3.5083e+015 -2.405e+015 6.7916e+014 // 4.7087e+015 5.3658e+015 -7.4667e+015 -4.0211e+015 1.2355e+015 // 6.6107e+014 1.9093e+015 -1.7135e+015 -1.0698e+015 1.4605e+014 long start, stop; double elapsed; double[][] x = new double[][] { new double[] {4.00000, 2.00000, 0.60000}, new double[] {4.20000, 2.10000, 0.59000}, new double[] {3.90000, 2.00000, 0.58000}, new double[] {4.30000, 2.10000, 0.60000}, new double[] {4.10000, 2.20000, 0.63000} }; Comirva.Audio.Util.Maths.Matrix X = new Comirva.Audio.Util.Maths.Matrix(5, 3); X.MatrixData = x; X.Print(); X.Mean(1).Print(); X.Transpose().Mean(1).Print(); // or X.Mean(2).Transpose().Print(); X.Cov().Print(); X.Transpose().Cov().Print(); // or X.Cov(X.Mean(2)).Print(); X.Cov().Inverse().Print(); //X.Transpose().Cov().Inverse().Print(); //X.Transpose().Cov().InverseGausJordan().Print(); Comirva.Audio.Util.Maths.Matrix A = Comirva.Audio.Util.Maths.Matrix.Random(500,500); Comirva.Audio.Util.Maths.Matrix B = Comirva.Audio.Util.Maths.Matrix.Random(500,500); start = DateTime.Now.Ticks; Comirva.Audio.Util.Maths.Matrix C0 = A * B; stop = DateTime.Now.Ticks; elapsed = (stop - start) / 1000.0 / 10000; Console.WriteLine("Standards Multiply: " + elapsed + " seconds"); start = DateTime.Now.Ticks; Comirva.Audio.Util.Maths.Matrix C1 = Comirva.Audio.Util.Maths.Matrix.MatrixProductParallel(A, B); stop = DateTime.Now.Ticks; elapsed = (stop - start) / 1000.0 / 10000; Console.WriteLine("MatrixProductParallel: " + elapsed + " seconds"); start = DateTime.Now.Ticks; Comirva.Audio.Util.Maths.Matrix C2 = Comirva.Audio.Util.Maths.Matrix.MatrixProductFast(A, B); stop = DateTime.Now.Ticks; elapsed = (stop - start) / 1000.0 / 10000; Console.WriteLine("MatrixProductFast: " + elapsed + " seconds"); if (C0 == C1 && C0 == C2) { Console.WriteLine("C0, C1 and C2 are Equal"); } Console.In.ReadLine(); return; }
/// <summary> /// Method to analyse and add all the different types of audio features /// </summary> /// <param name="filePath">full file path</param> /// <param name="db">Scms database (Mirage)</param> /// <param name="repository">Soundfingerprinting Repository</param> /// <param name="doOutputDebugInfo">decide whether to output debug info like spectrogram and audiofile (default value can be set)</param> /// <param name="useHaarWavelet">decide whether to use haar wavelet compression or DCT compression</param> /// <returns>true if successful</returns> public static bool AnalyzeAndAddComplete(FileInfo filePath, Db db, Repository repository, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); if (param == null) return false; param.FingerprintingConfiguration = fingerprintingConfigCreation; string fileName = param.FileName; // build track Track track = new Track(); track.Title = param.FileName; track.TrackLengthMs = (int) param.DurationInMs; track.FilePath = param.PathToAudioFile; track.Tags = param.Tags; track.Id = -1; // this will be set by the insert method double[][] logSpectrogram; List<bool[]> fingerprints; if (repository.InsertTrackInDatabaseUsingSamples(track, param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, param, out logSpectrogram, out fingerprints)) { // store logSpectrogram as Matrix try { Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); #region Output debugging information (Saving spectrograms and/or csv files) if (doOutputDebugInfo) { logSpectrogramMatrix.DrawMatrixImageLogValues(fileName + "_matrix_spectrogram.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(fileName + "_matrix_spectrogram.csv", ";"); } // Save debug images using fingerprinting methods SaveFingerprintingDebugImages(fileName, logSpectrogram, fingerprints, repository.FingerprintService, param.FingerprintingConfiguration); } #endregion // Insert Statistical Cluster Model Similarity Audio Feature as well if (!AnalyseAndAddScmsUsingLogSpectrogram(logSpectrogramMatrix, param, db, track.Id, doOutputDebugInfo, useHaarWavelet)) { Dbg.WriteLine("AnalyzeAndAddComplete - Failed inserting Statistical Cluster Model Similarity Audio Feature"); // Failed, but ignore! } } catch (Exception e) { Dbg.WriteLine("AnalyzeAndAddComplete - Failed creating Statistical Cluster Model Similarity Audio Feature"); Dbg.WriteLine(e.Message); // Failed, but ignore! } } else { // Failed return false; } Dbg.WriteLine("AnalyzeAndAddComplete - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
public static void RunTests() { // Run the following matlab test: // T = 1; % threshold value // v = linspace(-5,5,1024); // clf; // hold('on'); // plot(v, perform_thresholding(v,T,'hard'), 'b--'); // plot(v, perform_thresholding(v,T,'soft'), 'r--'); // plot(v, perform_thresholding(v,[T 2*T],'semisoft'), 'g'); // plot(v, perform_thresholding(v,[T 4*T],'semisoft'), 'g:'); // plot(v, perform_thresholding(v',400,'strict'), 'r:'); // legend('hard', 'soft', 'semisoft, \mu=2', 'semisoft, \mu=4', 'strict, 400'); // hold('off'); // linspace in c# double start = -5; double end = 5; double totalCount = 1024; double[][] v = new double[1][]; v[0] = new double[(int) totalCount]; int count = 0; for(double i = start; i < end; i += (end-start)/totalCount) { v[0][count] = i; count++; } // perform thresholding and plot int T = 1; double[][] hard = perform_hard_thresholding(v, T); Comirva.Audio.Util.Maths.Matrix mHard = new Comirva.Audio.Util.Maths.Matrix(hard); mHard.DrawMatrixGraph("thresholding-hard.png", false); double[][] soft = perform_soft_thresholding(v, T); Comirva.Audio.Util.Maths.Matrix mSoft = new Comirva.Audio.Util.Maths.Matrix(soft); mSoft.DrawMatrixGraph("thresholding-soft.png", false); double[][] semisoft1 = perform_semisoft_thresholding(v, T, 2*T); Comirva.Audio.Util.Maths.Matrix mSemiSoft1 = new Comirva.Audio.Util.Maths.Matrix(semisoft1); mSemiSoft1.DrawMatrixGraph("thresholding-semisoft1.png", false); double[][] semisoft2 = perform_semisoft_thresholding(v, T, 4*T); Comirva.Audio.Util.Maths.Matrix mSemiSoft2 = new Comirva.Audio.Util.Maths.Matrix(semisoft2); mSemiSoft2.DrawMatrixGraph("thresholding-semisoft2.png", false); double[][] strict = perform_strict_thresholding(v, 400); Comirva.Audio.Util.Maths.Matrix mStrict = new Comirva.Audio.Util.Maths.Matrix(strict); mStrict.DrawMatrixGraph("thresholding-strict.png", false); }