public static List<FindSimilar.QueryResult> SimilarTracksSoundfingerprintingList(FileInfo filePath, Repository repository) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); param.FingerprintingConfiguration = fingerprintingConfigQuerying; // TODO: i don't really know how the threshold tables work. // 1 returns more similar hits // 2 returns sometimes only the one we search for // even 0 seem to work (like 1) List<FindSimilar.QueryResult> candidates = repository.FindSimilarFromAudioSamplesList(param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, 0, param); Dbg.WriteLine ("SimilarTracksSoundfingerprintingList - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return candidates; }
public FindSimilarClientForm() { // // The InitializeComponent() call is required for Windows Forms designer support. // InitializeComponent(); // // Constructor code after the InitializeComponent() call. // this.version.Text = Mirage.Mir.VERSION; this.DistanceTypeCombo.DataSource = Enum.GetValues(typeof(AudioFeature.DistanceType)); /* this.dataGridView1.Columns.Add("Id", "Id"); this.dataGridView1.Columns[0].AutoSizeMode = DataGridViewAutoSizeColumnMode.AllCells; this.dataGridView1.Columns.Add("Path", "Path"); this.dataGridView1.Columns[1].AutoSizeMode = DataGridViewAutoSizeColumnMode.Fill; this.dataGridView1.Columns.Add("Duration_Similarity", "Duration (ms) / Similarity"); this.dataGridView1.Columns[2].AutoSizeMode = DataGridViewAutoSizeColumnMode.AllCells; */ this.db = new Db(); // Instansiate soundfingerprinting Repository FingerprintService fingerprintService = Analyzer.GetSoundfingerprintingService(); this.databaseService = DatabaseService.Instance; //IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); IPermutations permutations = new LocalPermutations("perms-new.csv", ","); repository = new Repository(permutations, databaseService, fingerprintService); if (rbScms.Checked) { IgnoreFileLengthCheckBox.Visible = true; DistanceTypeCombo.Visible = true; } else { IgnoreFileLengthCheckBox.Visible = false; DistanceTypeCombo.Visible = false; } ReadAllTracks(); }
/// <summary> /// Method to analyze and add using the soundfingerprinting methods /// </summary> /// <param name="filePath">full file path</param> /// <param name="repository">Soundfingerprinting Repository</param> /// <param name="doOutputDebugInfo">decide whether to output debug info like spectrogram and audiofile (default value can be set)</param> /// <param name="useHaarWavelet">decide whether to use haar wavelet compression or DCT compression</param> /// <returns>true if successful</returns> public static bool AnalyzeAndAddSoundfingerprinting(FileInfo filePath, Repository repository, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); param.FingerprintingConfiguration = fingerprintingConfigCreation; string fileName = param.FileName; // build track Track track = new Track(); track.Title = param.FileName; track.TrackLengthMs = (int) param.DurationInMs; track.FilePath = param.PathToAudioFile; track.Tags = param.Tags; track.Id = -1; // this will be set by the insert method // Get fingerprint signatures using the Soundfingerprinting methods double[][] logSpectrogram; List<bool[]> fingerprints; List<double[][]> spectralImages; if (repository.InsertTrackInDatabaseUsingSamples(track, param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, param, out logSpectrogram, out fingerprints, out spectralImages)) { // store logSpectrogram as Matrix Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); #region Debug for Soundfingerprinting Method if (doOutputDebugInfo) { // Image Service ImageService imageService = new ImageService(repository.FingerprintService.SpectrumService, repository.FingerprintService.WaveletService); imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfigCreation.Stride, fingerprintingConfigCreation.FingerprintLength, fingerprintingConfigCreation.Overlap, 2).Save(fileName + "_specgram_logimages.png"); logSpectrogramMatrix.DrawMatrixImageLogValues(fileName + "_specgram_logimage.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(fileName + "_specgram_log.csv", ";"); } } #endregion } else { // failed Console.Out.WriteLine("Failed! Could not compute the soundfingerprint {0}!", fileName); return false; } Dbg.WriteLine ("AnalyzeAndAddSoundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
/// <summary> /// Query the database for perceptually similar tracks using the sound fingerprinting methods /// </summary> /// <param name="filePath">input file</param> /// <returns>a dictionary of similar tracks</returns> public static Dictionary<Track, double> SimilarTracksSoundfingerprinting(FileInfo filePath, Repository repository) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); param.FingerprintingConfiguration = fingerprintingConfigQuerying; Dictionary<Track, double> candidates = repository.FindSimilarFromAudioSamples(param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, 1, param); Dbg.WriteLine ("SimilarTracksSoundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return candidates; }
/// <summary> /// Scan a directory recursively and add all the audio files found to the database /// </summary> /// <param name="path">Path to directory</param> /// <param name="db">MandelEllis or Scms Database Instance</param> /// <param name="repository">Soundfingerprinting Repository</param> /// <param name="skipDurationAboveSeconds">Skip files with duration longer than this number of seconds (0 or less disables this)</param> /// <param name="silent">true if silent mode (reduced console output)</param> public static void ScanDirectory(string path, Db db, Repository repository, double skipDurationAboveSeconds, bool silent=false) { Stopwatch stopWatch = Stopwatch.StartNew(); FAILED_FILES_LOG.Delete(); WARNING_FILES_LOG.Delete(); // scan directory for audio files try { // By some reason the IOUtils.GetFiles returns a higher count than what seams correct?! IEnumerable<string> filesAll = Directory.EnumerateFiles(path, "*", SearchOption.AllDirectories) .Where(f => extensions.Contains(Path.GetExtension(f).ToLower())); Console.Out.WriteLine("Found {0} files in scan directory.", filesAll.Count()); // Get all already processed files stored in the database and store in memory // It seems to work well with huge volumes of files (200k) IList<string> filesAlreadyProcessed = repository.DatabaseService.ReadTrackFilenames(); Console.Out.WriteLine("Database contains {0} already processed files.", filesAlreadyProcessed.Count); // find the files that has not already been added to the database List<string> filesRemaining = filesAll.Except(filesAlreadyProcessed).ToList(); Console.Out.WriteLine("Found {0} files remaining in scan directory to be processed.", filesRemaining.Count); int filesCounter = 1; int filesAllCounter = filesAlreadyProcessed.Count + 1; #if !DEBUG Console.Out.WriteLine("Running in multi-threaded mode!"); Parallel.ForEach(filesRemaining, file => { #else Console.Out.WriteLine("Running in single-threaded mode!"); foreach (string file in filesRemaining) { #endif FileInfo fileInfo = new FileInfo(file); // Try to use Un4Seen Bass to check duration BassProxy bass = BassProxy.Instance; double duration = bass.GetDurationInSeconds(fileInfo.FullName); // check if we should skip files longer than x seconds if ( (skipDurationAboveSeconds > 0 && duration > 0 && duration < skipDurationAboveSeconds) || skipDurationAboveSeconds <= 0 || duration < 0) { if(!Analyzer.AnalyzeAndAddComplete(fileInfo, db, repository)) { //if(!Analyzer.AnalyzeAndAddSoundfingerprinting(fileInfo, repository)) { //if(!Analyzer.AnalyzeAndAddScms(fileInfo, db)) { Console.Out.WriteLine("Failed! Could not generate audio fingerprint for {0}!", fileInfo.Name); IOUtils.LogMessageToFile(FAILED_FILES_LOG, fileInfo.FullName); } else { Console.Out.WriteLine("[{1}/{2} - {3}/{4}] Succesfully added {0} to database. (Thread: {5})", fileInfo.Name, filesCounter, filesRemaining.Count, filesAllCounter, filesAll.Count(), Thread.CurrentThread.ManagedThreadId); // Threadsafe increment (TODO: doesn't always seem to work?) //filesCounter++; //filesAllCounter++; Interlocked.Increment(ref filesCounter); Interlocked.Increment(ref filesAllCounter); } } else { if (!silent) Console.Out.WriteLine("Skipping {0} since duration exceeds limit ({1:0.00} > {2:0.00} sec.)", fileInfo.Name, duration, skipDurationAboveSeconds); } fileInfo = null; } #if !DEBUG ); #endif int filesActuallyProcessed = filesCounter -1; Console.WriteLine("Added {0} out of a total remaining set of {1} files. (Of {2} files found).", filesActuallyProcessed, filesRemaining.Count(), filesAll.Count()); } catch (UnauthorizedAccessException UAEx) { Console.WriteLine(UAEx.Message); } catch (PathTooLongException PathEx) { Console.WriteLine(PathEx.Message); } catch (System.NullReferenceException NullEx) { Console.WriteLine(NullEx.Message); } Console.WriteLine("Time used: {0}", stopWatch.Elapsed); }
public static void Main(string[] args) { Analyzer.AnalysisMethod analysisMethod = Analyzer.AnalysisMethod.SCMS; //Analyzer.AnalysisMethod analysisMethod = Analyzer.AnalysisMethod.MandelEllis; string scanPath = ""; double skipDurationAboveSeconds = -1; // less than zero disables this string queryPath = ""; int queryId = -1; int numToTake = 20; double percentage = 0.4; // percentage below and above when querying bool resetdb = false; bool silent = false; AudioFeature.DistanceType distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence; // Command line parsing Arguments CommandLine = new Arguments(args); if(CommandLine["match"] != null) { queryPath = CommandLine["match"]; } if(CommandLine["matchid"] != null) { string matchId = CommandLine["matchid"]; queryId = int.Parse(matchId); } if(CommandLine["scandir"] != null) { scanPath = CommandLine["scandir"]; } if(CommandLine["skipduration"] != null) { double.TryParse(CommandLine["skipduration"], NumberStyles.Number,CultureInfo.InvariantCulture, out skipDurationAboveSeconds); } if(CommandLine["num"] != null) { string num = CommandLine["num"]; numToTake = int.Parse(num); } if(CommandLine["percentage"] != null) { double.TryParse(CommandLine["percentage"], NumberStyles.Number,CultureInfo.InvariantCulture, out percentage); } if(CommandLine["type"] != null) { string type = CommandLine["type"]; if (type.Equals("kl", StringComparison.InvariantCultureIgnoreCase)) { distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence; } else if (type.StartsWith("dtw", StringComparison.InvariantCultureIgnoreCase)) { if (type.Equals("dtwe", StringComparison.InvariantCultureIgnoreCase)) { distanceType = AudioFeature.DistanceType.Dtw_Euclidean; } else if (type.Equals("dtwe2", StringComparison.InvariantCultureIgnoreCase)) { distanceType = AudioFeature.DistanceType.Dtw_SquaredEuclidean; } else if (type.Equals("dtwman", StringComparison.InvariantCultureIgnoreCase)) { distanceType = AudioFeature.DistanceType.Dtw_Manhattan; } else if (type.Equals("dtwmax", StringComparison.InvariantCultureIgnoreCase)) { distanceType = AudioFeature.DistanceType.Dtw_Maximum; } else if (type.Equals("ucrdtw", StringComparison.InvariantCultureIgnoreCase)) { distanceType = AudioFeature.DistanceType.UCR_Dtw; } else { distanceType = AudioFeature.DistanceType.Dtw_Euclidean; } } } if(CommandLine["dtw"] != null || CommandLine["dtwe"] != null) { distanceType = AudioFeature.DistanceType.Dtw_Euclidean; } if(CommandLine["dtwe2"] != null) { distanceType = AudioFeature.DistanceType.Dtw_SquaredEuclidean; } if(CommandLine["dtwman"] != null) { distanceType = AudioFeature.DistanceType.Dtw_Manhattan; } if(CommandLine["dtwmax"] != null) { distanceType = AudioFeature.DistanceType.Dtw_Maximum; } if(CommandLine["kl"] != null) { distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence; } if(CommandLine["ucrdtw"] != null) { distanceType = AudioFeature.DistanceType.UCR_Dtw; } if(CommandLine["resetdb"] != null) { resetdb = true; } if(CommandLine["silent"] != null) { silent = true; } if(CommandLine["permutations"] != null) { Console.WriteLine("Generating hash permutations for used by the Soundfingerprinting methods."); Console.WriteLine("Saving to file: {0}", "Soundfingerprinting\\perms-new.csv"); Console.WriteLine(); PermutationGeneratorService permutationGeneratorService = new PermutationGeneratorService(); Analyzer.GenerateAndSavePermutations(permutationGeneratorService, "Soundfingerprinting\\perms-new.csv"); return; } if(CommandLine["?"] != null) { PrintUsage(); return; } if(CommandLine["help"] != null) { PrintUsage(); return; } if(CommandLine["gui"] != null) { StartGUI(); return; } if (queryPath == "" && queryId == -1 && scanPath == "") { PrintUsage(); return; } // Get database Db mandelEllisScmsDatabase = new Db(); // For MandelEllis and SCMS // Instansiate soundfingerprinting Repository FingerprintService fingerprintService = Analyzer.GetSoundfingerprintingService(); DatabaseService databaseService = DatabaseService.Instance; // For AudioFingerprinting IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); //IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms-new.csv", ","); Repository repository = new Repository(permutations, databaseService, fingerprintService); if (scanPath != "") { if (IOUtils.IsDirectory(scanPath)) { if (resetdb) { // For MandelEllis and Scms mandelEllisScmsDatabase.RemoveTable(); mandelEllisScmsDatabase.AddTable(); // For AudioFingerprinting databaseService.RemoveFingerprintTable(); databaseService.AddFingerprintTable(); databaseService.RemoveHashBinTable(); databaseService.AddHashBinTable(); databaseService.RemoveTrackTable(); databaseService.AddTrackTable(); } Console.WriteLine("FindSimilar. Version {0}.", VERSION); ScanDirectory(scanPath, mandelEllisScmsDatabase, repository, skipDurationAboveSeconds, silent); } else { Console.Out.WriteLine("No directory found {0}!", scanPath); } } if (queryPath != "") { FileInfo fi = new FileInfo(queryPath); if (fi.Exists) { FindSimilar(queryPath, mandelEllisScmsDatabase, analysisMethod, numToTake, percentage, distanceType); } else { Console.Out.WriteLine("No file found {0}!", queryPath); } } if (queryId != -1) { FindSimilar(new int[] { queryId }, mandelEllisScmsDatabase, analysisMethod, numToTake, percentage, distanceType); } System.Console.ReadLine(); }
public static AudioFeature AnalyzeSoundfingerprinting(FileInfo filePath, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE); if (audiodata == null || audiodata.Length == 0) { Dbg.WriteLine("Error! - No Audio Found"); return null; } // Read TAGs using BASS FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance; Un4seen.Bass.AddOn.Tags.TAG_INFO tag_info = bass.GetTagInfoFromFile(filePath.FullName); // Name of file being processed string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name)); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, name + "_audiodata.ascii"); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, name + "_audiodata.txt"); } #endif if (doOutputDebugInfo) { DrawGraph(MathUtils.FloatToDouble(audiodata), name + "_audiodata.png"); } // Calculate duration in ms double duration = (double) audiodata.Length / SAMPLING_RATE * 1000; // zero pad if the audio file is too short to perform a mfcc if (audiodata.Length < (fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap)) { int lenNew = fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap; Array.Resize<float>(ref audiodata, lenNew); } // Get fingerprint signatures using the Soundfingerprinting methods // Get database DatabaseService databaseService = DatabaseService.Instance; IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); Repository repository = new Repository(permutations, databaseService, fingerprintService); // Image Service ImageService imageService = new ImageService( fingerprintService.SpectrumService, fingerprintService.WaveletService); // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.FingerprintingConfiguration = fingerprintingConfig; param.AudioSamples = audiodata; param.PathToAudioFile = filePath.FullName; param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000; param.StartAtMilliseconds = 0; // build track Track track = new Track(); track.Title = name; track.TrackLengthMs = (int) duration; track.FilePath = filePath.FullName; track.Id = -1; // this will be set by the insert method #region parse tag_info if (tag_info != null) { Dictionary<string, string> tags = new Dictionary<string, string>(); //if (tag_info.title != string.Empty) tags.Add("title", tag_info.title); if (tag_info.artist != string.Empty) tags.Add("artist", tag_info.artist); if (tag_info.album != string.Empty) tags.Add("album", tag_info.album); if (tag_info.albumartist != string.Empty) tags.Add("albumartist", tag_info.albumartist); if (tag_info.year != string.Empty) tags.Add("year", tag_info.year); if (tag_info.comment != string.Empty) tags.Add("comment", tag_info.comment); if (tag_info.genre != string.Empty) tags.Add("genre", tag_info.genre); if (tag_info.track != string.Empty) tags.Add("track", tag_info.track); if (tag_info.disc != string.Empty) tags.Add("disc", tag_info.disc); if (tag_info.copyright != string.Empty) tags.Add("copyright", tag_info.copyright); if (tag_info.encodedby != string.Empty) tags.Add("encodedby", tag_info.encodedby); if (tag_info.composer != string.Empty) tags.Add("composer", tag_info.composer); if (tag_info.publisher != string.Empty) tags.Add("publisher", tag_info.publisher); if (tag_info.lyricist != string.Empty) tags.Add("lyricist", tag_info.lyricist); if (tag_info.remixer != string.Empty) tags.Add("remixer", tag_info.remixer); if (tag_info.producer != string.Empty) tags.Add("producer", tag_info.producer); if (tag_info.bpm != string.Empty) tags.Add("bpm", tag_info.bpm); //if (tag_info.filename != string.Empty) tags.Add("filename", tag_info.filename); tags.Add("channelinfo", tag_info.channelinfo.ToString()); //if (tag_info.duration > 0) tags.Add("duration", tag_info.duration.ToString()); if (tag_info.bitrate > 0) tags.Add("bitrate", tag_info.bitrate.ToString()); if (tag_info.replaygain_track_gain != -100f) tags.Add("replaygain_track_gain", tag_info.replaygain_track_gain.ToString()); if (tag_info.replaygain_track_peak != -1f) tags.Add("replaygain_track_peak", tag_info.replaygain_track_peak.ToString()); if (tag_info.conductor != string.Empty) tags.Add("conductor", tag_info.conductor); if (tag_info.grouping != string.Empty) tags.Add("grouping", tag_info.grouping); if (tag_info.mood != string.Empty) tags.Add("mood", tag_info.mood); if (tag_info.rating != string.Empty) tags.Add("rating", tag_info.rating); if (tag_info.isrc != string.Empty) tags.Add("isrc", tag_info.isrc); foreach(var nativeTag in tag_info.NativeTags) { string[] keyvalue = nativeTag.Split('='); tags.Add(keyvalue[0], keyvalue[1]); } track.Tags = tags; } #endregion AudioFeature audioFeature = null; double[][] logSpectrogram; if (repository.InsertTrackInDatabaseUsingSamples(track, 25, 4, param, out logSpectrogram)) { if (doOutputDebugInfo) { imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfig.Stride, fingerprintingConfig.FingerprintLength, fingerprintingConfig.Overlap, 2).Save(name + "_specgram_logimages.png"); Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); logSpectrogramMatrix.DrawMatrixImageLogValues(name + "_specgram_logimage.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(name + "_specgram_log.csv", ";"); } } audioFeature = new DummyAudioFeature(); // Store duration audioFeature.Duration = (long) duration; // Store file name audioFeature.Name = filePath.FullName; } else { // failed } Dbg.WriteLine ("Soundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return audioFeature; }
// TODO: Rememeber to use another stride when querying public static Dictionary<Track, double> SimilarTracksSoundfingerprinting(FileInfo filePath) { DbgTimer t = new DbgTimer(); t.Start (); FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance; float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE); if (audiodata == null || audiodata.Length == 0) { Dbg.WriteLine("Error! - No Audio Found"); return null; } // Name of file being processed string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name)); // Calculate duration in ms double duration = (double) audiodata.Length / SAMPLING_RATE * 1000; // Explode samples to the range of 16 bit shorts (–32,768 to 32,767) // Matlab multiplies with 2^15 (32768) // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end; MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // 65536 // zero pad if the audio file is too short to perform a mfcc if (audiodata.Length < (fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap)) { int lenNew = fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap; Array.Resize<float>(ref audiodata, lenNew); } // Get fingerprint signatures using the Soundfingerprinting methods // Get database DatabaseService databaseService = DatabaseService.Instance; IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); Repository repository = new Repository(permutations, databaseService, fingerprintService); // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.FingerprintingConfiguration = fingerprintingConfig; param.PathToAudioFile = filePath.FullName; param.AudioSamples = audiodata; param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000; param.StartAtMilliseconds = 0; Dictionary<Track, double> candidates = repository.FindSimilarFromAudioSamples(25, 4, 2, param); return candidates; /* // Use var keyword to enumerate dictionary foreach (var pair in candidates) { Console.WriteLine("{0} - {1:0.00}", pair.Key.Title, pair.Value); } */ Dbg.WriteLine ("Soundfingerprinting - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); }
//private static Mfcc mfccOptimized = new Mfcc(WINDOW_SIZE, SAMPLING_RATE, MEL_COEFFICIENTS, MFCC_COEFFICIENTS); //private static MFCC mfccComirva = new MFCC(SAMPLING_RATE, WINDOW_SIZE, MFCC_COEFFICIENTS, true, 20.0, SAMPLING_RATE/2, MEL_COEFFICIENTS); #endif #region Methods public static bool AnalyzeAndAdd(FileInfo filePath, Db db, DatabaseService databaseService, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); float[] audiodata = AudioFileReader.Decode(filePath.FullName, SAMPLING_RATE, SECONDS_TO_ANALYZE); if (audiodata == null || audiodata.Length == 0) { Dbg.WriteLine("Error! - No Audio Found"); return false; } // Read TAGs using BASS FindSimilar.AudioProxies.BassProxy bass = FindSimilar.AudioProxies.BassProxy.Instance; Un4seen.Bass.AddOn.Tags.TAG_INFO tag_info = bass.GetTagInfoFromFile(filePath.FullName); // Name of file being processed string name = StringUtils.RemoveNonAsciiCharacters(Path.GetFileNameWithoutExtension(filePath.Name)); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) WriteAscii(audiodata, name + "_audiodata.ascii"); if (DEBUG_OUTPUT_TEXT) WriteF3Formatted(audiodata, name + "_audiodata.txt"); } #endif if (doOutputDebugInfo) { DrawGraph(MathUtils.FloatToDouble(audiodata), name + "_audiodata.png"); } // Calculate duration in ms double duration = (double) audiodata.Length / SAMPLING_RATE * 1000; // Explode samples to the range of 16 bit shorts (–32,768 to 32,767) // Matlab multiplies with 2^15 (32768) // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end; MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER); // 65536 // zero pad if the audio file is too short to perform a mfcc if (audiodata.Length < (fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap)) { int lenNew = fingerprintingConfig.WdftSize + fingerprintingConfig.Overlap; Array.Resize<float>(ref audiodata, lenNew); } // Get fingerprint signatures using the Soundfingerprinting methods IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); Repository repository = new Repository(permutations, databaseService, fingerprintService); // Image Service ImageService imageService = new ImageService( fingerprintService.SpectrumService, fingerprintService.WaveletService); // work config WorkUnitParameterObject param = new WorkUnitParameterObject(); param.FingerprintingConfiguration = fingerprintingConfig; param.AudioSamples = audiodata; param.PathToAudioFile = filePath.FullName; param.MillisecondsToProcess = SECONDS_TO_ANALYZE * 1000; param.StartAtMilliseconds = 0; // build track Track track = new Track(); track.Title = name; track.TrackLengthMs = (int) duration; track.FilePath = filePath.FullName; track.Id = -1; // this will be set by the insert method #region parse tag_info if (tag_info != null) { Dictionary<string, string> tags = new Dictionary<string, string>(); //if (tag_info.title != string.Empty) tags.Add("title", tag_info.title); if (tag_info.artist != string.Empty) tags.Add("artist", tag_info.artist); if (tag_info.album != string.Empty) tags.Add("album", tag_info.album); if (tag_info.albumartist != string.Empty) tags.Add("albumartist", tag_info.albumartist); if (tag_info.year != string.Empty) tags.Add("year", tag_info.year); if (tag_info.comment != string.Empty) tags.Add("comment", tag_info.comment); if (tag_info.genre != string.Empty) tags.Add("genre", tag_info.genre); if (tag_info.track != string.Empty) tags.Add("track", tag_info.track); if (tag_info.disc != string.Empty) tags.Add("disc", tag_info.disc); if (tag_info.copyright != string.Empty) tags.Add("copyright", tag_info.copyright); if (tag_info.encodedby != string.Empty) tags.Add("encodedby", tag_info.encodedby); if (tag_info.composer != string.Empty) tags.Add("composer", tag_info.composer); if (tag_info.publisher != string.Empty) tags.Add("publisher", tag_info.publisher); if (tag_info.lyricist != string.Empty) tags.Add("lyricist", tag_info.lyricist); if (tag_info.remixer != string.Empty) tags.Add("remixer", tag_info.remixer); if (tag_info.producer != string.Empty) tags.Add("producer", tag_info.producer); if (tag_info.bpm != string.Empty) tags.Add("bpm", tag_info.bpm); //if (tag_info.filename != string.Empty) tags.Add("filename", tag_info.filename); tags.Add("channelinfo", tag_info.channelinfo.ToString()); //if (tag_info.duration > 0) tags.Add("duration", tag_info.duration.ToString()); if (tag_info.bitrate > 0) tags.Add("bitrate", tag_info.bitrate.ToString()); if (tag_info.replaygain_track_gain != -100f) tags.Add("replaygain_track_gain", tag_info.replaygain_track_gain.ToString()); if (tag_info.replaygain_track_peak != -1f) tags.Add("replaygain_track_peak", tag_info.replaygain_track_peak.ToString()); if (tag_info.conductor != string.Empty) tags.Add("conductor", tag_info.conductor); if (tag_info.grouping != string.Empty) tags.Add("grouping", tag_info.grouping); if (tag_info.mood != string.Empty) tags.Add("mood", tag_info.mood); if (tag_info.rating != string.Empty) tags.Add("rating", tag_info.rating); if (tag_info.isrc != string.Empty) tags.Add("isrc", tag_info.isrc); foreach(var nativeTag in tag_info.NativeTags) { string[] keyvalue = nativeTag.Split('='); tags.Add(keyvalue[0], keyvalue[1]); } track.Tags = tags; } #endregion double[][] logSpectrogram; if (repository.InsertTrackInDatabaseUsingSamples(track, 25, 4, param, out logSpectrogram)) { // store logSpectrogram as Matrix Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); #region Debug for Soundfingerprinting Method if (doOutputDebugInfo) { imageService.GetLogSpectralImages(logSpectrogram, fingerprintingConfig.Stride, fingerprintingConfig.FingerprintLength, fingerprintingConfig.Overlap, 2).Save(name + "_specgram_logimages.png"); logSpectrogramMatrix.DrawMatrixImageLogValues(name + "_specgram_logimage.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(name + "_specgram_log.csv", ";"); } } #endregion #region Insert Statistical Cluster Model Similarity Audio Feature as well Comirva.Audio.Util.Maths.Matrix scmsMatrix = null; if (useHaarWavelet) { #region Wavelet Transform int lastHeight = 0; int lastWidth = 0; scmsMatrix = mfccMirage.ApplyWaveletCompression(ref logSpectrogramMatrix, out lastHeight, out lastWidth); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) scmsMatrix.WriteAscii(name + "_waveletdata.ascii"); } #endif if (doOutputDebugInfo) { scmsMatrix.DrawMatrixImageLogValues(name + "_waveletdata.png", true); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { #region Inverse Wavelet // try to do an inverse wavelet transform Comirva.Audio.Util.Maths.Matrix stftdata_inverse_wavelet = mfccMirage.InverseWaveletCompression(ref scmsMatrix, lastHeight, lastWidth, logSpectrogramMatrix.Rows, logSpectrogramMatrix.Columns); if (DEBUG_OUTPUT_TEXT) stftdata_inverse_wavelet.WriteCSV(name + "_specgramlog_inverse_wavelet.csv", ";"); stftdata_inverse_wavelet.DrawMatrixImageLogValues(name + "_specgramlog_inverse_wavelet.png", true); #endregion } #endif #endregion } else { #region DCT Transform // It seems the Mirage way of applying the DCT is slightly faster than the // Comirva way due to less loops scmsMatrix = mfccMirage.ApplyDCT(ref logSpectrogramMatrix); #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { if (DEBUG_OUTPUT_TEXT) scmsMatrix.WriteAscii(name + "_mfccdata.ascii"); } #endif if (doOutputDebugInfo) { scmsMatrix.DrawMatrixImageLogValues(name + "_mfccdata.png", true); } #if DEBUG if (Analyzer.DEBUG_INFO_VERBOSE) { #region Inverse MFCC // try to do an inverse mfcc Comirva.Audio.Util.Maths.Matrix stftdata_inverse_mfcc = mfccMirage.InverseDCT(ref scmsMatrix); if (DEBUG_OUTPUT_TEXT) stftdata_inverse_mfcc.WriteCSV(name + "_stftdata_inverse_mfcc.csv", ";"); stftdata_inverse_mfcc.DrawMatrixImageLogValues(name + "_specgramlog_inverse_mfcc.png", true); #endregion } #endif #endregion } // Store in a Statistical Cluster Model Similarity class. // A Gaussian representation of a song Scms audioFeature = Scms.GetScms(scmsMatrix, name); if (audioFeature != null) { // Store image if debugging if (doOutputDebugInfo) { audioFeature.Image = scmsMatrix.DrawMatrixImageLogValues(name + "_featuredata.png", true, false, 0, 0, true); } // Store bitstring hash as well string hashString = GetBitString(scmsMatrix); audioFeature.BitString = hashString; // Store duration audioFeature.Duration = (long) duration; // Store file name audioFeature.Name = filePath.FullName; int id = track.Id; if (db.AddTrack(ref id, audioFeature) == -1) { Console.Out.WriteLine("Failed! Could not add audioFeature to database {0}!", name); } } #endregion } else { // failed return false; } Dbg.WriteLine ("AnalyzeAndAdd - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
public CompareAudioForm() { // // The InitializeComponent() call is required for Windows Forms designer support. // InitializeComponent(); // // TODO: Add constructor code after the InitializeComponent() call. // // Instansiate Soundfingerprinting Repository FingerprintService fingerprintService = Analyzer.GetSoundfingerprintingService(); this.databaseService = DatabaseService.Instance; IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); //IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms-new.csv", ","); IFingerprintingConfiguration fingerprintingConfigCreation = new FullFrequencyFingerprintingConfiguration(); repository = new Repository(permutations, databaseService, fingerprintService); ImageService imageService = new ImageService(fingerprintService.SpectrumService, fingerprintService.WaveletService); FileInfo filePathAudio1 = new FileInfo(@"C:\Users\perivar.nerseth\Music\Test Samples Database\VDUB1 Snare 004.wav"); FileInfo filePathAudio2 = new FileInfo(@"C:\Users\perivar.nerseth\Music\Test Samples Search\VDUB1 Snare 004 - Start.wav"); int fingerprintsPerRow = 2; double[][] logSpectrogram1 = null; double[][] logSpectrogram2 = null; List<bool[]> fingerprints1 = null; List<bool[]> fingerprints2 = null; WorkUnitParameterObject file1Param = Analyzer.GetWorkUnitParameterObjectFromAudioFile(filePathAudio1); if (file1Param != null) { file1Param.FingerprintingConfiguration = fingerprintingConfigCreation; // Get fingerprints fingerprints1 = fingerprintService.CreateFingerprintsFromAudioSamples(file1Param.AudioSamples, file1Param, out logSpectrogram1); pictureBox1.Image = imageService.GetSpectrogramImage(logSpectrogram1, logSpectrogram1.Length, logSpectrogram1[0].Length); pictureBoxWithInterpolationMode1.Image = imageService.GetImageForFingerprints(fingerprints1, file1Param.FingerprintingConfiguration.FingerprintLength, file1Param.FingerprintingConfiguration.LogBins, fingerprintsPerRow); } WorkUnitParameterObject file2Param = Analyzer.GetWorkUnitParameterObjectFromAudioFile(filePathAudio2); if (file2Param != null) { file2Param.FingerprintingConfiguration = fingerprintingConfigCreation; // Get fingerprints fingerprints2 = fingerprintService.CreateFingerprintsFromAudioSamples(file2Param.AudioSamples, file2Param, out logSpectrogram2); pictureBox2.Image = imageService.GetSpectrogramImage(logSpectrogram2, logSpectrogram2.Length, logSpectrogram2[0].Length); pictureBoxWithInterpolationMode2.Image = imageService.GetImageForFingerprints(fingerprints2, file2Param.FingerprintingConfiguration.FingerprintLength, file2Param.FingerprintingConfiguration.LogBins, fingerprintsPerRow); } MinHash minHash = repository.MinHash; // only use the first signatures bool[] signature1 = fingerprints1[0]; bool[] signature2 = fingerprints2[0]; if (signature1 != null && signature2 != null) { int hammingDistance = MinHash.CalculateHammingDistance(signature1, signature2); double jaqSimilarity = MinHash.CalculateJaqSimilarity(signature1, signature2); lblSimilarity.Text = String.Format("Hamming: {0} JAQ: {1}", hammingDistance, jaqSimilarity); } }
/// <summary> /// Method to analyse and add all the different types of audio features /// </summary> /// <param name="filePath">full file path</param> /// <param name="db">Scms database (Mirage)</param> /// <param name="repository">Soundfingerprinting Repository</param> /// <param name="doOutputDebugInfo">decide whether to output debug info like spectrogram and audiofile (default value can be set)</param> /// <param name="useHaarWavelet">decide whether to use haar wavelet compression or DCT compression</param> /// <returns>true if successful</returns> public static bool AnalyzeAndAddComplete(FileInfo filePath, Db db, Repository repository, bool doOutputDebugInfo=DEFAULT_DEBUG_INFO, bool useHaarWavelet = true) { DbgTimer t = new DbgTimer(); t.Start (); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); if (param == null) return false; param.FingerprintingConfiguration = fingerprintingConfigCreation; string fileName = param.FileName; // build track Track track = new Track(); track.Title = param.FileName; track.TrackLengthMs = (int) param.DurationInMs; track.FilePath = param.PathToAudioFile; track.Tags = param.Tags; track.Id = -1; // this will be set by the insert method double[][] logSpectrogram; List<bool[]> fingerprints; if (repository.InsertTrackInDatabaseUsingSamples(track, param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, param, out logSpectrogram, out fingerprints)) { // store logSpectrogram as Matrix try { Comirva.Audio.Util.Maths.Matrix logSpectrogramMatrix = new Comirva.Audio.Util.Maths.Matrix(logSpectrogram); logSpectrogramMatrix = logSpectrogramMatrix.Transpose(); #region Output debugging information (Saving spectrograms and/or csv files) if (doOutputDebugInfo) { logSpectrogramMatrix.DrawMatrixImageLogValues(fileName + "_matrix_spectrogram.png", true); if (DEBUG_OUTPUT_TEXT) { logSpectrogramMatrix.WriteCSV(fileName + "_matrix_spectrogram.csv", ";"); } // Save debug images using fingerprinting methods SaveFingerprintingDebugImages(fileName, logSpectrogram, fingerprints, repository.FingerprintService, param.FingerprintingConfiguration); } #endregion // Insert Statistical Cluster Model Similarity Audio Feature as well if (!AnalyseAndAddScmsUsingLogSpectrogram(logSpectrogramMatrix, param, db, track.Id, doOutputDebugInfo, useHaarWavelet)) { Dbg.WriteLine("AnalyzeAndAddComplete - Failed inserting Statistical Cluster Model Similarity Audio Feature"); // Failed, but ignore! } } catch (Exception e) { Dbg.WriteLine("AnalyzeAndAddComplete - Failed creating Statistical Cluster Model Similarity Audio Feature"); Dbg.WriteLine(e.Message); // Failed, but ignore! } } else { // Failed return false; } Dbg.WriteLine("AnalyzeAndAddComplete - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return true; }
/// <summary> /// Query the database for perceptually similar tracks using the sound fingerprinting methods /// </summary> /// <param name="filePath">input file</param> /// <param name="repository">the database (repository)</param> /// <param name="thresholdTables">Minimum number of hash tables that must be found for one signature to be considered a candidate (0 and 1 = return all candidates, 2+ = return only exact matches)</param> /// <param name="optimizeSignatureCount">Reduce the number of signatures in order to increase the search performance</param> /// <param name="doSearchEverything">disregard the local sensitivity hashes and search the whole database</param> /// <param name="splashScreen">The "please wait" splash screen (or null)</param> /// <returns>a list of query results objects (e.g. similar tracks)</returns> public static List<FindSimilar.QueryResult> SimilarTracksSoundfingerprintingList(FileInfo filePath, Repository repository, int thresholdTables, bool optimizeSignatureCount, bool doSearchEverything, SplashSceenWaitingForm splashScreen) { DbgTimer t = new DbgTimer(); t.Start (); if (splashScreen != null) splashScreen.SetProgress(0, "Reading audio file ..."); // get work config from the audio file WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath); if (param == null) { if (splashScreen != null) splashScreen.SetProgress(0, "Failed reading audio file!"); return null; } param.FingerprintingConfiguration = fingerprintingConfigQuerying; if (splashScreen != null) splashScreen.SetProgress(1, "Successfully reading audio file!"); // This is how the threshold tables work: // For each signature created from a query file we retrieve a number of candidates // based on how many fingerprints that are associated to the same hash bucket. // if the number of fingerprints associated to the same hash bucket is relatively high // the likelyhood for this being an exact match is also very high. // Therefore a value of 0 or 1 basically means return every track that has an association // to the same hash bucket, while a number higher than that increases the accuracy for // only matching identical matches. // 0 and 1 returns many matches // 2 returns sometimes only the one we search for (exact match) List<FindSimilar.QueryResult> similarFiles = repository.FindSimilarFromAudioSamplesList(param.FingerprintingConfiguration.NumberOfHashTables, param.FingerprintingConfiguration.NumberOfKeys, thresholdTables, param, optimizeSignatureCount, doSearchEverything, splashScreen); Dbg.WriteLine ("SimilarTracksSoundfingerprintingList - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds); return similarFiles; }
public FindSimilarClientForm() { // // The InitializeComponent() call is required for Windows Forms designer support. // InitializeComponent(); // // Constructor code after the InitializeComponent() call. // this.version.Text = Mirage.Mir.VERSION; this.DistanceTypeCombo.DataSource = Enum.GetValues(typeof(AudioFeature.DistanceType)); this.ThresholdTablesCombo.DataSource = Enum.GetValues(typeof(ThresholdTables)); // Instansiate SCMS or Mandel Ellis Repository this.db = new Db(); // Instansiate Soundfingerprinting Repository FingerprintService fingerprintService = Analyzer.GetSoundfingerprintingService(); this.databaseService = DatabaseService.Instance; IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms.csv", ","); //IPermutations permutations = new LocalPermutations("Soundfingerprinting\\perms-new.csv", ","); repository = new Repository(permutations, databaseService, fingerprintService); if (rbScms.Checked) { IgnoreFileLengthCheckBox.Visible = true; DistanceTypeCombo.Visible = true; LessAccurateCheckBox.Visible = false; ThresholdTablesCombo.Visible = false; SearchAllFilesCheckbox.Visible = false; } else { IgnoreFileLengthCheckBox.Visible = false; DistanceTypeCombo.Visible = false; LessAccurateCheckBox.Visible = true; ThresholdTablesCombo.Visible = true; SearchAllFilesCheckbox.Visible = true; } ReadAllTracks(); }