/// <summary> /// Hash Fingerprints using Min-Hash algorithm /// </summary> /// <param name="listOfFingerprintsToHash">List of fingerprints already inserted in the database</param> /// <param name="track">Track of the corresponding fingerprints</param> /// <param name="hashTables">Number of hash tables (e.g. 25)</param> /// <param name="hashKeys">Number of hash keys (e.g. 4)</param> private bool HashFingerprintsUsingMinHash(IEnumerable <Fingerprint> listOfFingerprintsToHash, Track track, int hashTables, int hashKeys) { List <HashBinMinHash> listToInsert = new List <HashBinMinHash>(); foreach (Fingerprint fingerprint in listOfFingerprintsToHash) { int[] hashBins = minHash.ComputeMinHashSignature(fingerprint.Signature); //Compute Min Hashes Dictionary <int, long> hashTable = minHash.GroupMinHashToLSHBuckets(hashBins, hashTables, hashKeys); foreach (KeyValuePair <int, long> item in hashTable) { HashBinMinHash hash = new HashBinMinHash(-1, item.Value, item.Key, track.Id, fingerprint.Id); listToInsert.Add(hash); } } return(dbService.InsertHashBin(listToInsert)); }
// ReSharper disable ReturnTypeCanBeEnumerable.Local private List <HashSignature> GetSignatures(IEnumerable <bool[]> fingerprints, Track track, int hashTables, int hashKeys) // ReSharper restore ReturnTypeCanBeEnumerable.Local { List <HashSignature> signatures = new List <HashSignature>(); foreach (bool[] fingerprint in fingerprints) { int[] signature = _hasher.ComputeMinHashSignature(fingerprint); /*Compute min-hash signature out of fingerprint*/ Dictionary <int, long> buckets = _hasher.GroupMinHashToLSHBuckets(signature, hashTables, hashKeys); /*Group Min-Hash signature into LSH buckets*/ int[] hashSignature = new int[buckets.Count]; foreach (KeyValuePair <int, long> bucket in buckets) { hashSignature[bucket.Key] = (int)bucket.Value; } HashSignature hash = new HashSignature(track, hashSignature); /*associate track to hash-signature*/ signatures.Add(hash); } return(signatures); /*Return the signatures*/ }
/// <summary> /// Query one specific song using MinHash algorithm. /// </summary> /// <param name="signatures">Signature signatures from a song</param> /// <param name="dbService">DatabaseService used to query the underlying database</param> /// <param name="lshHashTables">Number of hash tables from the database</param> /// <param name="lshGroupsPerKey">Number of groups per hash table</param> /// <param name="thresholdTables">Minimum number of hash tables that must be found for one signature to be considered a candidate (0 = return all candidates, 2+ = return only exact matches)</param> /// <param name="queryTime">Set by the method, representing the query length</param> /// <param name="doSearchEverything">disregard the local sensitivity hashes and search the whole database</param> /// <param name="splashScreen">The "please wait" splash screen (or null)</param> /// <returns>Dictionary with Tracks ID's and the Query Statistics</returns> public static Dictionary <Int32, QueryStats> QueryOneSongMinHash( IEnumerable <bool[]> signatures, DatabaseService dbService, MinHash minHash, int lshHashTables, int lshGroupsPerKey, int thresholdTables, ref long queryTime, bool doSearchEverything = false, SplashSceenWaitingForm splashScreen = null) { Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); int signatureCounter = 0; int signatureTotalCount = signatures.Count(); Dictionary <int, QueryStats> stats = new Dictionary <int, QueryStats>(); foreach (bool[] signature in signatures) { #region Please Wait Splash Screen Cancel Event // check if the user clicked cancel if (splashScreen.CancellationPending) { break; } #endregion if (signature == null) { continue; } IDictionary <int, IList <HashBinMinHash> > candidates = null; if (doSearchEverything) { candidates = dbService.ReadAllFingerprints(); } else { // Compute Min Hash on randomly selected fingerprint int[] bin = minHash.ComputeMinHashSignature(signature); // Find all hashbuckets to care about Dictionary <int, long> hashes = minHash.GroupMinHashToLSHBuckets(bin, lshHashTables, lshGroupsPerKey); long[] hashbuckets = hashes.Values.ToArray(); // Find all candidates by querying the database for those hashbuckets candidates = dbService.ReadFingerprintsByHashBucketLsh(hashbuckets); } // Reduce the potential candidates list if the number of hash tables found for each signature are less than the threshold Dictionary <int, IList <HashBinMinHash> > potentialCandidates = SelectPotentialMatchesOutOfEntireDataset(candidates, thresholdTables); // get the final candidate list by only using the potential candidate list if (potentialCandidates.Count > 0) { IList <Fingerprint> fingerprints = dbService.ReadFingerprintById(potentialCandidates.Keys); Dictionary <Fingerprint, int> finalCandidates = fingerprints.ToDictionary(finger => finger, finger => potentialCandidates[finger.Id].Count); ArrangeCandidatesAccordingToFingerprints(signature, finalCandidates, lshHashTables, lshGroupsPerKey, stats); } #region Please Wait Splash Screen Update // calculate a percentage between 5 and 90 int percentage = (int)((float)(signatureCounter) / (float)signatureTotalCount * 85) + 5; if (splashScreen != null) { splashScreen.SetProgress(percentage, String.Format("Searching for similar fingerprints.\n(Signature {0} of {1})", signatureCounter + 1, signatureTotalCount)); } signatureCounter++; #endregion Updat } stopWatch.Stop(); queryTime = stopWatch.ElapsedMilliseconds; /*Set the query Time parameter*/ return(stats); }