/// <summary>
		/// Query one specific song using MinHash algorithm.
		/// </summary>
		/// <param name="signatures">Signature signatures from a song</param>
		/// <param name="dbService">DatabaseService used to query the underlying database</param>
		/// <param name="lshHashTables">Number of hash tables from the database</param>
		/// <param name="lshGroupsPerKey">Number of groups per hash table</param>
		/// <param name="thresholdTables">Minimum number of hash tables that must be found for one signature to be considered a candidate (0 = return all candidates, 2+ = return only exact matches)</param>
		/// <param name="queryTime">Set by the method, representing the query length</param>
		/// <param name="doSearchEverything">disregard the local sensitivity hashes and search the whole database</param>
		/// <param name="splashScreen">The "please wait" splash screen (or null)</param>
		/// <returns>Dictionary with Tracks ID's and the Query Statistics</returns>
		public static Dictionary<Int32, QueryStats> QueryOneSongMinHash(
			IEnumerable<bool[]> signatures,
			DatabaseService dbService,
			MinHash minHash,
			int lshHashTables,
			int lshGroupsPerKey,
			int thresholdTables,
			ref long queryTime,
			bool doSearchEverything = false,
			SplashSceenWaitingForm splashScreen = null)
		{
			Stopwatch stopWatch = new Stopwatch();
			stopWatch.Start();
			
			int signatureCounter = 0;
			int signatureTotalCount = signatures.Count();
			Dictionary<int, QueryStats> stats = new Dictionary<int, QueryStats>();
			foreach (bool[] signature in signatures) {
				
				#region Please Wait Splash Screen Cancel Event
				// check if the user clicked cancel
				if (splashScreen.CancellationPending) {
					break;
				}
				#endregion

				if (signature == null) {
					continue;
				}

				IDictionary<int, IList<HashBinMinHash>> candidates = null;
				if (doSearchEverything) {
					candidates = dbService.ReadAllFingerprints();
				} else {
					// Compute Min Hash on randomly selected fingerprint
					int[] bin = minHash.ComputeMinHashSignature(signature);
					
					// Find all hashbuckets to care about
					Dictionary<int, long> hashes = minHash.GroupMinHashToLSHBuckets(bin, lshHashTables, lshGroupsPerKey);
					long[] hashbuckets = hashes.Values.ToArray();
					
					// Find all candidates by querying the database for those hashbuckets
					candidates = dbService.ReadFingerprintsByHashBucketLsh(hashbuckets);
				}
				
				// Reduce the potential candidates list if the number of hash tables found for each signature are less than the threshold
				Dictionary<int, IList<HashBinMinHash>> potentialCandidates = SelectPotentialMatchesOutOfEntireDataset(candidates, thresholdTables);
				
				// get the final candidate list by only using the potential candidate list
				if (potentialCandidates.Count > 0) {
					IList<Fingerprint> fingerprints = dbService.ReadFingerprintById(potentialCandidates.Keys);
					Dictionary<Fingerprint, int> finalCandidates = fingerprints.ToDictionary(finger => finger, finger => potentialCandidates[finger.Id].Count);
					ArrangeCandidatesAccordingToFingerprints(signature,
					                                         finalCandidates,
					                                         lshHashTables,
					                                         lshGroupsPerKey,
					                                         stats);
				}
				
				#region Please Wait Splash Screen Update
				// calculate a percentage between 5 and 90
				int percentage = (int) ((float) (signatureCounter) / (float) signatureTotalCount * 85) + 5;
				if (splashScreen != null) splashScreen.SetProgress(percentage, String.Format("Searching for similar fingerprints.\n(Signature {0} of {1})", signatureCounter+1, signatureTotalCount));
				signatureCounter++;
				#endregion Updat
			}

			stopWatch.Stop();
			queryTime = stopWatch.ElapsedMilliseconds; /*Set the query Time parameter*/
			return stats;
		}
Beispiel #2
0
		/// <summary>
		/// Query the database for perceptually similar tracks using the sound fingerprinting methods
		/// </summary>
		/// <param name="filePath">input file</param>
		/// <param name="repository">the database (repository)</param>
		/// <param name="thresholdTables">Minimum number of hash tables that must be found for one signature to be considered a candidate (0 and 1 = return all candidates, 2+ = return only exact matches)</param>
		/// <param name="optimizeSignatureCount">Reduce the number of signatures in order to increase the search performance</param>
		/// <param name="doSearchEverything">disregard the local sensitivity hashes and search the whole database</param>
		/// <param name="splashScreen">The "please wait" splash screen (or null)</param>
		/// <returns>a list of query results objects (e.g. similar tracks)</returns>
		public static List<FindSimilar.QueryResult> SimilarTracksSoundfingerprintingList(FileInfo filePath,
		                                                                                 Repository repository,
		                                                                                 int thresholdTables,
		                                                                                 bool optimizeSignatureCount,
		                                                                                 bool doSearchEverything,
		                                                                                 SplashSceenWaitingForm splashScreen) {
			DbgTimer t = new DbgTimer();
			t.Start ();

			if (splashScreen != null) splashScreen.SetProgress(0, "Reading audio file ...");
			
			// get work config from the audio file
			WorkUnitParameterObject param = GetWorkUnitParameterObjectFromAudioFile(filePath);
			if (param == null) {
				if (splashScreen != null) splashScreen.SetProgress(0, "Failed reading audio file!");
				return null;
			}
			
			param.FingerprintingConfiguration = fingerprintingConfigQuerying;
			
			if (splashScreen != null) splashScreen.SetProgress(1, "Successfully reading audio file!");

			// This is how the threshold tables work:
			// For each signature created from a query file we retrieve a number of candidates
			// based on how many fingerprints that are associated to the same hash bucket.
			// if the number of fingerprints associated to the same hash bucket is relatively high
			// the likelyhood for this being an exact match is also very high.
			// Therefore a value of 0 or 1 basically means return every track that has an association
			// to the same hash bucket, while a number higher than that increases the accuracy for
			// only matching identical matches.
			// 0 and 1 returns many matches
			// 2 returns sometimes only the one we search for (exact match)
			List<FindSimilar.QueryResult> similarFiles = repository.FindSimilarFromAudioSamplesList(param.FingerprintingConfiguration.NumberOfHashTables,
			                                                                                        param.FingerprintingConfiguration.NumberOfKeys,
			                                                                                        thresholdTables,
			                                                                                        param,
			                                                                                        optimizeSignatureCount,
			                                                                                        doSearchEverything,
			                                                                                        splashScreen);

			Dbg.WriteLine ("SimilarTracksSoundfingerprintingList - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);
			return similarFiles;
		}
		/// <summary>
		/// Find Similar Tracks using passed audio samples as input and return a List
		/// </summary>
		/// <param name="lshHashTables">Number of hash tables from the database</param>
		/// <param name="lshGroupsPerKey">Number of groups per hash table</param>
		/// <param name="thresholdTables">Minimum number of hash tables that must be found for one signature to be considered a candidate (0 and 1 = return all candidates, 2+ = return only exact matches)</param>
		/// <param name="param">Audio File Work Unit Parameter Object</param>
		/// <param name="optimizeSignatureCount">Reduce the number of signatures in order to increase the search performance</param>
		/// <param name="doSearchEverything">disregard the local sensitivity hashes and search the whole database</param>
		/// <param name="splashScreen">The "please wait" splash screen (or null)</param>
		/// <returns>a list of perceptually similar tracks</returns>
		public List<FindSimilar.QueryResult> FindSimilarFromAudioSamplesList(
			int lshHashTables,
			int lshGroupsPerKey,
			int thresholdTables,
			WorkUnitParameterObject param,
			bool optimizeSignatureCount,
			bool doSearchEverything,
			SplashSceenWaitingForm splashScreen) {
			
			DbgTimer t = new DbgTimer();
			t.Start ();

			if (splashScreen != null) splashScreen.SetProgress(2, "Creating fingerprints from audio samples ...");
			
			// Get fingerprints
			double[][] logSpectrogram;
			List<bool[]> fingerprints = fingerprintService.CreateFingerprintsFromAudioSamples(param.AudioSamples, param, out logSpectrogram);

			#if DEBUG
			// Save debug images using fingerprinting methods
			//Analyzer.SaveFingerprintingDebugImages(param.FileName, logSpectrogram, fingerprints, fingerprintService, param.FingerprintingConfiguration);
			#endif
			
			if (splashScreen != null) splashScreen.SetProgress(3, String.Format("Successfully created {0} fingerprints.", fingerprints.Count));
			
			// If the number of signatures is to big, only keep the first MAX_SIGNATURE_COUNT to avoid a very time consuming search
			if (optimizeSignatureCount && fingerprints.Count > MAX_SIGNATURE_COUNT) {
				if (splashScreen != null) splashScreen.SetProgress(4, String.Format("Only using the first {0} fingerprints out of {1}.", MAX_SIGNATURE_COUNT, fingerprints.Count));
				fingerprints.RemoveRange(MAX_SIGNATURE_COUNT, fingerprints.Count - MAX_SIGNATURE_COUNT);
				Dbg.WriteLine("Only using the first {0} fingerprints.", MAX_SIGNATURE_COUNT);
			}
			
			long elapsedMiliseconds = 0;
			
			// Query the database using Min Hash
			Dictionary<int, QueryStats> allCandidates = QueryFingerprintManager.QueryOneSongMinHash(
				fingerprints,
				dbService,
				minHash,
				lshHashTables,
				lshGroupsPerKey,
				thresholdTables,
				ref elapsedMiliseconds,
				doSearchEverything,
				splashScreen);

			if (splashScreen != null) splashScreen.SetProgress(91, String.Format("Found {0} candidates.", allCandidates.Count));
			
			IEnumerable<int> ids = allCandidates.Select(p => p.Key);
			IList<Track> tracks = dbService.ReadTrackById(ids);

			if (splashScreen != null) splashScreen.SetProgress(95, String.Format("Reading {0} tracks.", tracks.Count));
			
			// Order by Hamming Similarity
			// TODO: What does the 0.4 number do here?
			// there doesn't seem to be any change using another number?!

			/*
			// Using PLINQ
			IOrderedEnumerable<KeyValuePair<int, QueryStats>> order = allCandidates
				.OrderBy((pair) => pair.Value.OrderingValue =
				         pair.Value.HammingDistance / pair.Value.NumberOfTotalTableVotes
				         + 0.4 * pair.Value.MinHammingDistance);


			var fingerprintList = (from o in order
			                       join track in tracks on o.Key equals track.Id
			                       select new FindSimilar.QueryResult {
			                       	Id = track.Id,
			                       	Path = track.FilePath,
			                       	Duration = track.TrackLengthMs,
			                       	Similarity = o.Value.Similarity
			                       }).ToList();
			 */
			
			// http://msdn.microsoft.com/en-us/library/dd460719(v=vs.110).aspx
			// http://stackoverflow.com/questions/2767709/c-sharp-joins-where-with-linq-and-lambda
			// Lambda query to order the candidates
			var order = allCandidates.AsParallel()
				.OrderBy((pair) => pair.Value.OrderingValue =
				         pair.Value.HammingDistance / pair.Value.NumberOfTotalTableVotes
				         + 0.4 * pair.Value.MinHammingDistance)
				.Take(200);
			
			// TODO: Be able to create the above query as a LINQ query
			
			// Join on the ID properties.
			var fingerprintList = (from o in order.AsParallel()
			                       join track in tracks.AsParallel() on o.Key equals track.Id
			                       select new FindSimilar.QueryResult {
			                       	Id = track.Id,
			                       	Path = track.FilePath,
			                       	Duration = track.TrackLengthMs,
			                       	Similarity = o.Value.Similarity
			                       })
				.OrderByDescending((ord) => ord.Similarity)
				.ToList();
			
			if (splashScreen != null) splashScreen.SetProgress(100, "Ready!");
			
			Dbg.WriteLine ("FindSimilarFromAudioSamplesList - Total Execution Time: {0} ms", t.Stop().TotalMilliseconds);
			return fingerprintList;
		}