Пример #1
0
        /// <summary>
        /// Hash Fingerprints using Min-Hash algorithm
        /// </summary>
        /// <param name="listOfFingerprintsToHash">List of fingerprints already inserted in the database</param>
        /// <param name="track">Track of the corresponding fingerprints</param>
        /// <param name="hashTables">Number of hash tables (e.g. 25)</param>
        /// <param name="hashKeys">Number of hash keys (e.g. 4)</param>
        private bool HashFingerprintsUsingMinHash(IEnumerable <Fingerprint> listOfFingerprintsToHash, Track track, int hashTables, int hashKeys)
        {
            List <HashBinMinHash> listToInsert = new List <HashBinMinHash>();

            foreach (Fingerprint fingerprint in listOfFingerprintsToHash)
            {
                int[] hashBins = minHash.ComputeMinHashSignature(fingerprint.Signature);                 //Compute Min Hashes
                Dictionary <int, long> hashTable = minHash.GroupMinHashToLSHBuckets(hashBins, hashTables, hashKeys);
                foreach (KeyValuePair <int, long> item in hashTable)
                {
                    HashBinMinHash hash = new HashBinMinHash(-1, item.Value, item.Key, track.Id, fingerprint.Id);
                    listToInsert.Add(hash);
                }
            }
            return(dbService.InsertHashBin(listToInsert));
        }
        // ReSharper disable ReturnTypeCanBeEnumerable.Local
        private List <HashSignature> GetSignatures(IEnumerable <bool[]> fingerprints, Track track, int hashTables, int hashKeys)
        // ReSharper restore ReturnTypeCanBeEnumerable.Local
        {
            List <HashSignature> signatures = new List <HashSignature>();

            foreach (bool[] fingerprint in fingerprints)
            {
                int[] signature = _hasher.ComputeMinHashSignature(fingerprint);                                     /*Compute min-hash signature out of fingerprint*/
                Dictionary <int, long> buckets = _hasher.GroupMinHashToLSHBuckets(signature, hashTables, hashKeys); /*Group Min-Hash signature into LSH buckets*/
                int[] hashSignature            = new int[buckets.Count];
                foreach (KeyValuePair <int, long> bucket in buckets)
                {
                    hashSignature[bucket.Key] = (int)bucket.Value;
                }
                HashSignature hash = new HashSignature(track, hashSignature);                 /*associate track to hash-signature*/
                signatures.Add(hash);
            }
            return(signatures);            /*Return the signatures*/
        }
        /// <summary>
        /// Query one specific song using MinHash algorithm.
        /// </summary>
        /// <param name="signatures">Signature signatures from a song</param>
        /// <param name="dbService">DatabaseService used to query the underlying database</param>
        /// <param name="lshHashTables">Number of hash tables from the database</param>
        /// <param name="lshGroupsPerKey">Number of groups per hash table</param>
        /// <param name="thresholdTables">Minimum number of hash tables that must be found for one signature to be considered a candidate (0 = return all candidates, 2+ = return only exact matches)</param>
        /// <param name="queryTime">Set by the method, representing the query length</param>
        /// <param name="doSearchEverything">disregard the local sensitivity hashes and search the whole database</param>
        /// <param name="splashScreen">The "please wait" splash screen (or null)</param>
        /// <returns>Dictionary with Tracks ID's and the Query Statistics</returns>
        public static Dictionary <Int32, QueryStats> QueryOneSongMinHash(
            IEnumerable <bool[]> signatures,
            DatabaseService dbService,
            MinHash minHash,
            int lshHashTables,
            int lshGroupsPerKey,
            int thresholdTables,
            ref long queryTime,
            bool doSearchEverything             = false,
            SplashSceenWaitingForm splashScreen = null)
        {
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            int signatureCounter               = 0;
            int signatureTotalCount            = signatures.Count();
            Dictionary <int, QueryStats> stats = new Dictionary <int, QueryStats>();

            foreach (bool[] signature in signatures)
            {
                #region Please Wait Splash Screen Cancel Event
                // check if the user clicked cancel
                if (splashScreen.CancellationPending)
                {
                    break;
                }
                #endregion

                if (signature == null)
                {
                    continue;
                }

                IDictionary <int, IList <HashBinMinHash> > candidates = null;
                if (doSearchEverything)
                {
                    candidates = dbService.ReadAllFingerprints();
                }
                else
                {
                    // Compute Min Hash on randomly selected fingerprint
                    int[] bin = minHash.ComputeMinHashSignature(signature);

                    // Find all hashbuckets to care about
                    Dictionary <int, long> hashes = minHash.GroupMinHashToLSHBuckets(bin, lshHashTables, lshGroupsPerKey);
                    long[] hashbuckets            = hashes.Values.ToArray();

                    // Find all candidates by querying the database for those hashbuckets
                    candidates = dbService.ReadFingerprintsByHashBucketLsh(hashbuckets);
                }

                // Reduce the potential candidates list if the number of hash tables found for each signature are less than the threshold
                Dictionary <int, IList <HashBinMinHash> > potentialCandidates = SelectPotentialMatchesOutOfEntireDataset(candidates, thresholdTables);

                // get the final candidate list by only using the potential candidate list
                if (potentialCandidates.Count > 0)
                {
                    IList <Fingerprint>           fingerprints    = dbService.ReadFingerprintById(potentialCandidates.Keys);
                    Dictionary <Fingerprint, int> finalCandidates = fingerprints.ToDictionary(finger => finger, finger => potentialCandidates[finger.Id].Count);
                    ArrangeCandidatesAccordingToFingerprints(signature,
                                                             finalCandidates,
                                                             lshHashTables,
                                                             lshGroupsPerKey,
                                                             stats);
                }

                #region Please Wait Splash Screen Update
                // calculate a percentage between 5 and 90
                int percentage = (int)((float)(signatureCounter) / (float)signatureTotalCount * 85) + 5;
                if (splashScreen != null)
                {
                    splashScreen.SetProgress(percentage, String.Format("Searching for similar fingerprints.\n(Signature {0} of {1})", signatureCounter + 1, signatureTotalCount));
                }
                signatureCounter++;
                #endregion Updat
            }

            stopWatch.Stop();
            queryTime = stopWatch.ElapsedMilliseconds;             /*Set the query Time parameter*/
            return(stats);
        }