private static void FindSimilar(string path, Db db, Analyzer.AnalysisMethod analysisMethod, int numToTake=25, double percentage=0.2, AudioFeature.DistanceType distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence) { var similarTracks = SimilarTracks(path, db, analysisMethod, numToTake, percentage, distanceType); foreach (var entry in similarTracks) { Console.WriteLine("{0}, {1}", entry.Key, entry.Value); } }
/// <summary> /// Get a track from the database using its id /// </summary> /// <param name="trackid">id</param> /// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param> /// <returns>an AudioFeature object</returns> public AudioFeature GetTrack(int trackid, Analyzer.AnalysisMethod analysisMethod) { IDbCommand dbcmd; lock (dbcon) { dbcmd = dbcon.CreateCommand(); } dbcmd.CommandText = "SELECT audioFeature, name, duration, bitstring, signature FROM mirage " + "WHERE trackid = " + trackid; IDataReader reader = dbcmd.ExecuteReader(); if (!reader.Read()) { return null; } byte[] buf = (byte[]) reader.GetValue(0); string name = reader.GetString(1); long duration = reader.GetInt64(2); string bitstring = reader.GetString(3); bool[] signature = ByteToBool((byte[]) reader.GetValue(4)); reader.Close(); AudioFeature audioFeature = null; switch (analysisMethod) { case Analyzer.AnalysisMethod.MandelEllis: audioFeature = MandelEllis.FromBytes(buf); break; case Analyzer.AnalysisMethod.SCMS: audioFeature = Scms.FromBytes(buf); break; } audioFeature.Name = name; audioFeature.Duration = duration; audioFeature.BitString = bitstring; audioFeature.Signatures.Add(signature); return audioFeature; }
/// <summary> /// Using the passed datareader pointer, fill the audio feature tracks array with content /// </summary> /// <param name="tracksIterator">datareader pointer</param> /// <param name="tracks">AudioFeature array</param> /// <param name="mapping">array of trackids</param> /// <param name="len">number of tracks to return</param> /// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param> /// <returns>number of tracks returned</returns> public int GetNextTracks(ref IDataReader tracksIterator, ref AudioFeature[] tracks, ref int[] mapping, int len, Analyzer.AnalysisMethod analysisMethod) { int i = 0; while ((i < len) && tracksIterator.Read()) { AudioFeature audioFeature = null; switch (analysisMethod) { case Analyzer.AnalysisMethod.MandelEllis: audioFeature = MandelEllis.FromBytes((byte[]) tracksIterator.GetValue(0)); break; case Analyzer.AnalysisMethod.SCMS: audioFeature = Scms.FromBytes((byte[]) tracksIterator.GetValue(0)); break; } mapping[i] = tracksIterator.GetInt32(1); audioFeature.Name = tracksIterator.GetString(2); audioFeature.Duration = tracksIterator.GetInt64(3); audioFeature.BitString = tracksIterator.GetString(4); audioFeature.Signatures.Add(ByteToBool((byte[]) tracksIterator.GetValue(5))); tracks[i] = audioFeature; i++; } if (i == 0) { tracksIterator.Close(); tracksIterator = null; } return i; }
/// <summary> /// Compare to audio files and print the distance between them /// </summary> /// <param name="path1">audio 1 file path</param> /// <param name="path2">audio 2 file path</param> /// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param> public static void Compare(string path1, string path2, Analyzer.AnalysisMethod analysisMethod) { AudioFeature m1 = null; AudioFeature m2 = null; FileInfo filePath1 = new FileInfo(path1); FileInfo filePath2 = new FileInfo(path2); switch (analysisMethod) { case Analyzer.AnalysisMethod.MandelEllis: m1 = Analyzer.AnalyzeMandelEllis(filePath1); m2 = Analyzer.AnalyzeMandelEllis(filePath2); break; case Analyzer.AnalysisMethod.SCMS: m1 = Analyzer.AnalyzeScms(filePath1); m2 = Analyzer.AnalyzeScms(filePath2); break; } System.Console.Out.WriteLine("Similarity between m1 and m2 is: " + m1.GetDistance(m2)); }
/// <summary> /// Compare to audio files using their audio ids and print the distance between them /// </summary> /// <param name="trackId1">audio 1 id</param> /// <param name="trackId2">audio 2 id</param> /// <param name="db">database</param> /// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param> public static void Compare(int trackId1, int trackId2, Db db, Analyzer.AnalysisMethod analysisMethod) { AudioFeature m1 = db.GetTrack(trackId1, analysisMethod); AudioFeature m2 = db.GetTrack(trackId2, analysisMethod); System.Console.Out.WriteLine("Similarity between m1 and m2 is: " + m1.GetDistance(m2)); }
/// <summary> /// Find Similar Tracks to one or many audio files using their unique database id(s) /// </summary> /// <param name="id">an array of unique database ids for the audio files to search for similar matches</param> /// <param name="exclude">an array of unique database ids to ignore (normally the same as the id array)</param> /// <param name="db">database</param> /// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param> /// <param name="numToTake">max number of entries to return</param> /// <param name="percentage">percentage below and above the duration in ms when querying (used if between 0.1 - 0.9)</param> /// <param name="distanceType">distance method to use (KullbackLeiblerDivergence is default)</param> /// <returns>a list of query results</returns> public static List<FindSimilar.QueryResult> SimilarTracksList(int[] id, int[] exclude, Db db, Analyzer.AnalysisMethod analysisMethod, int numToTake=25, double percentage=0.2, AudioFeature.DistanceType distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence) { AudioFeature[] seedAudioFeatures = null; AudioFeature[] audioFeatures = null; switch (analysisMethod) { case Analyzer.AnalysisMethod.MandelEllis: seedAudioFeatures = new MandelEllis[id.Length]; audioFeatures = new MandelEllis[100]; break; case Analyzer.AnalysisMethod.SCMS: seedAudioFeatures = new Scms[id.Length]; audioFeatures = new Scms[100]; break; } for (int i = 0; i < seedAudioFeatures.Length; i++) { seedAudioFeatures[i] = db.GetTrack(id[i], analysisMethod); } // Get all tracks from the DB except the seedSongs IDataReader r = db.GetTracks(exclude, seedAudioFeatures[0].Duration, percentage); // store results in a query results list List<FindSimilar.QueryResult> queryResultList = new List<FindSimilar.QueryResult>(); int[] mapping = new int[100]; int read = 1; double d; double dcur; float count; while (read > 0) { read = db.GetNextTracks(ref r, ref audioFeatures, ref mapping, 100, analysisMethod); for (int i = 0; i < read; i++) { d = 0; count = 0; for (int j = 0; j < seedAudioFeatures.Length; j++) { dcur = seedAudioFeatures[j].GetDistance(audioFeatures[i], distanceType); // convert to positive values dcur = Math.Abs(dcur); d += dcur; count++; } if (d > 0) { QueryResult queryResult = new QueryResult(); queryResult.Id = mapping[i]; queryResult.Path = audioFeatures[i].Name; queryResult.Duration = audioFeatures[i].Duration; queryResult.Similarity = d/count; queryResultList.Add(queryResult); } } } var sortedList = (from row in queryResultList orderby row.Similarity ascending select new QueryResult { Id = row.Id, Path = row.Path, Duration = row.Duration, Similarity = row.Similarity }).Take(numToTake).ToList(); return sortedList; }
/// <summary> /// Find Similar Tracks to one or many audio files using their unique database id(s) /// </summary> /// <param name="id">an array of unique database ids for the audio files to search for similar matches</param> /// <param name="exclude">an array of unique database ids to ignore (normally the same as the id array)</param> /// <param name="db">database</param> /// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param> /// <param name="numToTake">max number of entries to return</param> /// <param name="percentage">percentage below and above the duration in ms when querying (used if between 0.1 - 0.9)</param> /// <param name="distanceType">distance method to use (KullbackLeiblerDivergence is default)</param> /// <returns>a dictinary list of key value pairs (filepath and distance)</returns> public static Dictionary<KeyValuePair<int, string>, double> SimilarTracks(int[] id, int[] exclude, Db db, Analyzer.AnalysisMethod analysisMethod, int numToTake=25, double percentage=0.2, AudioFeature.DistanceType distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence) { DbgTimer t = new DbgTimer(); t.Start(); AudioFeature[] seedAudioFeatures = null; AudioFeature[] audioFeatures = null; switch (analysisMethod) { case Analyzer.AnalysisMethod.MandelEllis: seedAudioFeatures = new MandelEllis[id.Length]; audioFeatures = new MandelEllis[100]; break; case Analyzer.AnalysisMethod.SCMS: seedAudioFeatures = new Scms[id.Length]; audioFeatures = new Scms[100]; break; } for (int i = 0; i < seedAudioFeatures.Length; i++) { seedAudioFeatures[i] = db.GetTrack(id[i], analysisMethod); } // Get all tracks from the DB except the seedSongs IDataReader r = db.GetTracks(exclude, seedAudioFeatures[0].Duration, percentage); // store results in a dictionary var NameDictionary = new Dictionary<KeyValuePair<int, string>, double>(); int[] mapping = new int[100]; int read = 1; double d; double dcur; float count; while (read > 0) { read = db.GetNextTracks(ref r, ref audioFeatures, ref mapping, 100, analysisMethod); for (int i = 0; i < read; i++) { d = 0; count = 0; for (int j = 0; j < seedAudioFeatures.Length; j++) { dcur = seedAudioFeatures[j].GetDistance(audioFeatures[i], distanceType); // convert to positive values dcur = Math.Abs(dcur); d += dcur; count++; } if (d > 0) { NameDictionary.Add(new KeyValuePair<int,string>(mapping[i], audioFeatures[i].Name), d/count); //NameDictionary.Add(new KeyValuePair<int,string>(mapping[i], String.Format("{0} ({1} ms)", audioFeatures[i].Name, audioFeatures[i].Duration)), d/count); } } } // sort by non unique values var sortedDict = (from entry in NameDictionary orderby entry.Value ascending select entry) .Take(numToTake) .ToDictionary(pair => pair.Key, pair => pair.Value); Console.Out.WriteLine(String.Format("Found Similar to ({0}) in {1} ms", String.Join(",", seedAudioFeatures.Select(p=>p.Name)), t.Stop().TotalMilliseconds)); return sortedDict; }
/// <summary> /// Find Similar Tracks to an audio file using its file path /// </summary> /// <param name="searchForPath">audio file path</param> /// <param name="db">database</param> /// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param> /// <param name="numToTake">max number of entries to return</param> /// <param name="percentage">percentage below and above the duration in ms when querying (used if between 0.1 - 0.9)</param> /// <param name="distanceType">distance method to use (KullbackLeiblerDivergence is default)</param> /// <returns>a list of query results</returns> public static List<FindSimilar.QueryResult> SimilarTracksList(string searchForPath, Db db, Analyzer.AnalysisMethod analysisMethod, int numToTake=25, double percentage=0.2, AudioFeature.DistanceType distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence) { FileInfo fi = new FileInfo(searchForPath); AudioFeature seedAudioFeature = null; AudioFeature[] audioFeatures = null; switch (analysisMethod) { case Analyzer.AnalysisMethod.MandelEllis: seedAudioFeature = Analyzer.AnalyzeMandelEllis(fi); audioFeatures = new MandelEllis[100]; break; case Analyzer.AnalysisMethod.SCMS: seedAudioFeature = Analyzer.AnalyzeScms(fi); audioFeatures = new Scms[100]; break; } // Get all tracks from the DB except the seedSongs IDataReader r = db.GetTracks(null, seedAudioFeature.Duration, percentage); // store results in a query results list List<FindSimilar.QueryResult> queryResultList = new List<FindSimilar.QueryResult>(); int[] mapping = new int[100]; int read = 1; double dcur; while (read > 0) { read = db.GetNextTracks(ref r, ref audioFeatures, ref mapping, 100, analysisMethod); for (int i = 0; i < read; i++) { dcur = seedAudioFeature.GetDistance(audioFeatures[i], distanceType); // convert to positive values dcur = Math.Abs(dcur); QueryResult queryResult = new QueryResult(); queryResult.Id = mapping[i]; queryResult.Path = audioFeatures[i].Name; queryResult.Duration = audioFeatures[i].Duration; queryResult.Similarity = dcur; queryResultList.Add(queryResult); } } var sortedList = (from row in queryResultList orderby row.Similarity ascending select new QueryResult { Id = row.Id, Path = row.Path, Duration = row.Duration, Similarity = row.Similarity }).Take(numToTake).ToList(); return sortedList; }
/// <summary> /// Find Similar Tracks to an audio file using its file path /// </summary> /// <param name="searchForPath">audio file path</param> /// <param name="db">database</param> /// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param> /// <param name="numToTake">max number of entries to return</param> /// <param name="percentage">percentage below and above the duration in ms when querying (used if between 0.1 - 0.9)</param> /// <param name="distanceType">distance method to use (KullbackLeiblerDivergence is default)</param> /// <returns>a dictinary list of key value pairs (filepath and distance)</returns> public static Dictionary<KeyValuePair<int, string>, double> SimilarTracks(string searchForPath, Db db, Analyzer.AnalysisMethod analysisMethod, int numToTake=25, double percentage=0.2, AudioFeature.DistanceType distanceType = AudioFeature.DistanceType.KullbackLeiblerDivergence) { DbgTimer t = new DbgTimer(); t.Start(); FileInfo fi = new FileInfo(searchForPath); AudioFeature seedAudioFeature = null; AudioFeature[] audioFeatures = null; switch (analysisMethod) { case Analyzer.AnalysisMethod.MandelEllis: seedAudioFeature = Analyzer.AnalyzeMandelEllis(fi); audioFeatures = new MandelEllis[100]; break; case Analyzer.AnalysisMethod.SCMS: seedAudioFeature = Analyzer.AnalyzeScms(fi); audioFeatures = new Scms[100]; break; } // Get all tracks from the DB except the seedSongs IDataReader r = db.GetTracks(null, seedAudioFeature.Duration, percentage); // store results in a dictionary var NameDictionary = new Dictionary<KeyValuePair<int, string>, double>(); int[] mapping = new int[100]; int read = 1; double dcur; while (read > 0) { read = db.GetNextTracks(ref r, ref audioFeatures, ref mapping, 100, analysisMethod); for (int i = 0; i < read; i++) { dcur = seedAudioFeature.GetDistance(audioFeatures[i], distanceType); // convert to positive values dcur = Math.Abs(dcur); NameDictionary.Add(new KeyValuePair<int,string>(mapping[i], audioFeatures[i].Name), dcur); } } // sort by non unique values var sortedDict = (from entry in NameDictionary orderby entry.Value ascending select entry) .Take(numToTake) .ToDictionary(pair => pair.Key, pair => pair.Value); Console.Out.WriteLine(String.Format("Found Similar to ({0}) in {1} ms", seedAudioFeature.Name, t.Stop().TotalMilliseconds)); return sortedDict; }
/// <summary> /// Scan a directory recursively and add all the audio files found to the database /// </summary> /// <param name="path">path to directory</param> /// <param name="db">database</param> /// <param name="analysisMethod">analysis method (SCMS or MandelEllis)</param> /// <param name="skipDurationAboveSeconds">skip files with duration longer than this number of seconds (0 or less disables this)</param> public static void ScanDirectoryOLD(string path, Db db, DatabaseService databaseService, Analyzer.AnalysisMethod analysisMethod, double skipDurationAboveSeconds) { Stopwatch stopWatch = Stopwatch.StartNew(); FAILED_FILES_LOG.Delete(); WARNING_FILES_LOG.Delete(); // scan directory for audio files try { // By some reason the IOUtils.GetFiles returns a higher count than what seams correct?! IEnumerable<string> filesAll = Directory.EnumerateFiles(path, "*", SearchOption.AllDirectories) .Where(f => extensions.Contains(Path.GetExtension(f).ToLower())); //IEnumerable<string> filesAll = IOUtils.GetFiles(path, extensionsWithStar, SearchOption.AllDirectories); Console.Out.WriteLine("Found {0} files in scan directory.", filesAll.Count()); // get all already processed files stored in the database // store in memory // It seems to work well with huge volumes of file (200k) IList<string> filesAlreadyProcessed = null; if (analysisMethod != Analyzer.AnalysisMethod.AudioFingerprinting) { // if we are not using the audio fingerprinting database filesAlreadyProcessed = db.ReadTrackFilenames(); } else { // Get database filesAlreadyProcessed = databaseService.ReadTrackFilenames(); } Console.Out.WriteLine("Database contains {0} already processed files.", filesAlreadyProcessed.Count); // find the files that has not already been added to the database List<string> filesRemaining = filesAll.Except(filesAlreadyProcessed).ToList(); Console.Out.WriteLine("Found {0} files remaining in scan directory to be processed.", filesRemaining.Count); int filesCounter = 1; int filesAllCounter = filesAlreadyProcessed.Count + 1; #if !DEBUG Console.Out.WriteLine("Running in multi-threaded mode!"); Parallel.ForEach(filesRemaining, file => { #else Console.Out.WriteLine("Running in single-threaded mode!"); foreach (string file in filesRemaining) { #endif FileInfo fileInfo = new FileInfo(file); // Try to use Un4Seen Bass to check duration BassProxy bass = BassProxy.Instance; double duration = bass.GetDurationInSeconds(fileInfo.FullName); // check if we should skip files longer than x seconds if ( (skipDurationAboveSeconds > 0 && duration > 0 && duration < skipDurationAboveSeconds) || skipDurationAboveSeconds <= 0 || duration < 0) { AudioFeature feature = null; switch (analysisMethod) { case Analyzer.AnalysisMethod.MandelEllis: feature = Analyzer.AnalyzeMandelEllis(fileInfo); break; case Analyzer.AnalysisMethod.SCMS: feature = Analyzer.AnalyzeScms(fileInfo); break; case Analyzer.AnalysisMethod.AudioFingerprinting: feature = Analyzer.AnalyzeSoundfingerprinting(fileInfo); break; } if (feature != null) { if (analysisMethod != Analyzer.AnalysisMethod.AudioFingerprinting) { // if we are not using the audio fingerprinting database db.AddTrack(ref filesAllCounter, feature); } else { // if we are using the audio fingerprinting database, we have already added the track } Console.Out.WriteLine("[{1}/{2} - {3}/{4}] Succesfully added {0} to database ({5} ms) (Thread: {6})", fileInfo.Name, filesCounter, filesRemaining.Count, filesAllCounter, filesAll.Count(), feature.Duration, Thread.CurrentThread.ManagedThreadId); filesCounter++; filesAllCounter++; feature = null; } else { Console.Out.WriteLine("Failed! Could not generate audio fingerprint for {0}!", fileInfo.Name); IOUtils.LogMessageToFile(FAILED_FILES_LOG, fileInfo.FullName); } } else { Console.Out.WriteLine("Skipping {0} since duration exceeds limit ({1:0.00} > {2:0.00} sec.)", fileInfo.Name, duration, skipDurationAboveSeconds); } fileInfo = null; } #if !DEBUG ); #endif int filesActuallyProcessed = filesCounter -1; Console.WriteLine("Added {0} out of a total remaining set of {1} files. (Of {2} files found).", filesActuallyProcessed, filesRemaining.Count(), filesAll.Count()); } catch (UnauthorizedAccessException UAEx) { Console.WriteLine(UAEx.Message); } catch (PathTooLongException PathEx) { Console.WriteLine(PathEx.Message); } Console.WriteLine("Time used: {0}", stopWatch.Elapsed); }