/// <summary>
 ///   Query one specific song using MinHash algorithm. ConnectionString is set by the caller.
 /// </summary>
 /// <param name = "signatures">Fingerprint signatures from a song</param>
 /// <param name = "dalManager">DAL Manager used to query the underlying database</param>
 /// <param name = "permStorage">Permutation storage</param>
 /// <param name = "seconds">Fingerprints to consider as query points [1.4 sec * N]</param>
 /// <param name = "lHashTables">Number of hash tables from the database</param>
 /// <param name = "lGroupsPerKey">Number of groups per hash table</param>
 /// <param name = "thresholdTables">Threshold percentage [0.07 for 20 LHash Tables, 0.17 for 25 LHashTables]</param>
 /// <param name = "queryTime">Set but the method, representing the query length</param>
 /// <returns>Dictionary with Tracks ID's and the Query Statistics</returns>
 public static Dictionary<Int32, QueryStats> QueryOneSongMinHash(
     IEnumerable<bool[]> signatures,
     DaoGateway dalManager,
     IPermutations permStorage,
     int seconds,
     int lHashTables,
     int lGroupsPerKey,
     int thresholdTables,
     ref long queryTime)
 {
     Stopwatch stopWatch = new Stopwatch();
     stopWatch.Start();
     Dictionary<Int32, QueryStats> stats = new Dictionary<Int32, QueryStats>();
     MinHash minHash = new MinHash(permStorage);
     foreach (bool[] f in signatures)
     {
         if (f == null) continue;
         int[] bin = minHash.ComputeMinHashSignature(f); /*Compute Min Hash on randomly selected fingerprints*/
         Dictionary<int, long> hashes = minHash.GroupMinHashToLSHBuckets(bin, lHashTables, lGroupsPerKey); /*Find all candidates by querying the database*/
         long[] hashbuckets = hashes.Values.ToArray();
         Dictionary<int, List<HashBinMinHash>> candidates = dalManager.ReadFingerprintsByHashBucketLSH(hashbuckets);
         Dictionary<int, List<HashBinMinHash>> potentialCandidates = SelectPotentialMatchesOutOfEntireDataset(candidates, thresholdTables);
         if (potentialCandidates.Count > 0)
         {
             List<Fingerprint> fingerprints = dalManager.ReadFingerprintById(potentialCandidates.Keys);
             Dictionary<Fingerprint, int> fCandidates = new Dictionary<Fingerprint, int>();
             foreach (Fingerprint finger in fingerprints)
                 fCandidates.Add(finger, potentialCandidates[finger.Id].Count);
             ArrangeCandidatesAccordingToFingerprints(f, fCandidates, lHashTables, lGroupsPerKey, stats);
         }
     }
     stopWatch.Stop();
     queryTime = stopWatch.ElapsedMilliseconds; /*Set the query Time parameter*/
     return stats;
 }
Exemplo n.º 2
0
        /// <summary>
        /// Query one specific song using MinHash algorithm.
        /// </summary>
        /// <param name="signatures">Signature signatures from a song</param>
        /// <param name="dbService">DatabaseService used to query the underlying database</param>
        /// <param name="lshHashTables">Number of hash tables from the database</param>
        /// <param name="lshGroupsPerKey">Number of groups per hash table</param>
        /// <param name="thresholdTables">Threshold percentage [0.07 for 20 LHash Tables, 0.17 for 25 LHashTables]</param>
        /// <param name="queryTime">Set buy the method, representing the query length</param>
        /// <returns>Dictionary with Tracks ID's and the Query Statistics</returns>
        public static Dictionary<Int32, QueryStats> QueryOneSongMinHash(
			IEnumerable<bool[]> signatures,
			DatabaseService dbService,
			MinHash minHash,
			int lshHashTables,
			int lshGroupsPerKey,
			int thresholdTables,
			ref long queryTime)
        {
            Stopwatch stopWatch = new Stopwatch();
            stopWatch.Start();
            Dictionary<int, QueryStats> stats = new Dictionary<int, QueryStats>();
            foreach (bool[] signature in signatures)
            {
                if (signature == null)
                {
                    continue;
                }

                // Compute Min Hash on randomly selected fingerprints
                int[] bin = minHash.ComputeMinHashSignature(signature);

                Dictionary<int, long> hashes = minHash.GroupMinHashToLSHBuckets(bin, lshHashTables, lshGroupsPerKey); /*Find all candidates by querying the database*/
                long[] hashbuckets = hashes.Values.ToArray();
                IDictionary<int, IList<HashBinMinHash>> candidates = dbService.ReadFingerprintsByHashBucketLsh(hashbuckets);
                Dictionary<int, IList<HashBinMinHash>> potentialCandidates = SelectPotentialMatchesOutOfEntireDataset(candidates, thresholdTables);
                if (potentialCandidates.Count > 0)
                {
                    IList<Fingerprint> fingerprints = dbService.ReadFingerprintById(potentialCandidates.Keys);
                    Dictionary<Fingerprint, int> finalCandidates = fingerprints.ToDictionary(finger => finger, finger => potentialCandidates[finger.Id].Count);
                    ArrangeCandidatesAccordingToFingerprints(
                        signature, finalCandidates, lshHashTables, lshGroupsPerKey, stats);
                }
            }

            stopWatch.Stop();
            queryTime = stopWatch.ElapsedMilliseconds; /*Set the query Time parameter*/
            return stats;
        }
Exemplo n.º 3
0
        /// <summary>
        ///   Get hash similarity of one song
        /// </summary>
        /// <param name = "service">Fingerprint service</param>
        /// <param name = "hashTables">Number of hash tables in the LSH transformation</param>
        /// <param name = "hashKeys">Number of hash keys per table in the LSH transformation</param>
        /// <param name = "path">Path to analyzed file</param>
        /// <param name = "results">Results object to be filled with the appropriate data</param>
        private void GetHashSimilarity(IFingerprintService service, int hashTables, int hashKeys, IWorkUnit unitOfWork, IWorkUnit sameUnitOfWork, DumpResults results)
        {
            double sum = 0;
            int hashesCount = 0;
            int startindex = 0;

            List<bool[]> listDb = unitOfWork.GetFingerprintsUsingService(service).Result;
            List<bool[]> listQuery = sameUnitOfWork.GetFingerprintsUsingService(service).Result;
            IPermutations perms = new DbPermutations(ConfigurationManager.ConnectionStrings["FingerprintConnectionString"].ConnectionString);
            MinHash minHash = new MinHash(perms);
            List<int[]> minHashDb = listDb.Select(minHash.ComputeMinHashSignature).ToList();
            List<int[]> minHashQuery = listQuery.Select(minHash.ComputeMinHashSignature).ToList();

            /*Calculate Min Hash signature similarity by comparing 2 consecutive signatures*/
            int countDb = minHashDb.Count;
            int countQuery = minHashQuery.Count;
            int minHashSignatureLen = minHashDb[0].Length;
            int similarMinHashValues = 0;
            for (int i = 0; i < countDb; i++)
            {
                for (int j = 0; j < countQuery; j++)
                {
                    for (int k = 0; k < minHashSignatureLen; k++)
                        if (minHashDb[i][k] == minHashQuery[j][k])
                            similarMinHashValues++;
                }
            }
            results.Results.SumIdenticalMinHash = similarMinHashValues;
            results.Results.AverageIdenticalMinHash = (double) similarMinHashValues/(countDb*countQuery*minHashSignatureLen);

            /*Group min hash signatures into LSH Buckets*/
            List<Dictionary<int, long>> lshBucketsDb =
                minHashDb.Select(item => minHash.GroupMinHashToLSHBuckets(item, hashTables, hashKeys)).ToList();

            List<Dictionary<int, long>> lshBucketsQuery =
                minHashQuery.Select(item => minHash.GroupMinHashToLSHBuckets(item, hashTables, hashKeys)).ToList();

            int countSignatures = lshBucketsDb.Count;
            sum = 0;
            foreach (Dictionary<int, long> a in lshBucketsDb)
            {
                Dictionary<int, long>.ValueCollection aValues = a.Values;
                foreach (Dictionary<int, long> b in lshBucketsQuery)
                {
                    Dictionary<int, long>.ValueCollection bValues = b.Values;
                    hashesCount += aValues.Intersect(bValues).Count();
                }
            }

            results.Results.SumJaqLSHBucketSimilarity = -1;
            results.Results.AverageJaqLSHBucketSimilarity = -1;
            results.Results.TotalIdenticalLSHBuckets = hashesCount;
        }
Exemplo n.º 4
0
        /// <summary>
        ///   Compute Hash Bins using Min Hash algorithm
        /// </summary>
        private void ComputeHashBinsUsingMinHash()
        {
            List<Track> tracks = _dalManager.ReadTracks(); /*Read all tracks from the database*/
            _pbMinHash.Invoke(new Action(() => /*Progress bar Settings*/
                                         {
                                             _pbMinHash.Minimum = 1;
                                             _pbMinHash.Maximum = tracks.Count;
                                             _pbMinHash.Value = 1;
                                             _pbMinHash.Step = 1;
                                         }));

            MinHash minHash = new MinHash(_permutations);
            for (int index = 0; index < tracks.Count; index++)
            {
                Track track = tracks[index];
                List<Fingerprint> fingerprints;
                try
                {
                    fingerprints = _dalManager.ReadFingerprintsByTrackId(track.Id, 0); /*Read corresponding fingerprints of a specific track*/
                    if (fingerprints == null)
                        continue;
                }
                catch (Exception ex)
                {
                    MessageBox.Show(ex.Message, Resources.Error, MessageBoxButtons.OK, MessageBoxIcon.Error);
                    return;
                }

                List<HashBinMinHash> listToInsert = new List<HashBinMinHash>(); /*Generate Min Hash signatures*/
                foreach (Fingerprint fingerprint in fingerprints)
                {
                    int[] hashBins = minHash.ComputeMinHashSignature(fingerprint.Signature); /*For each of the fingerprints*/
                    Dictionary<int, long> hashTable = minHash.GroupMinHashToLSHBuckets(hashBins, _numberofgroupsminhash, _numberofhashesperkeyminhash);
                    foreach (KeyValuePair<int, long> item in hashTable)
                    {
                        HashBinMinHash hash = new HashBinMinHash(-1, item.Value, item.Key, track.Id, fingerprint.Id);
                        listToInsert.Add(hash);
                    }
                }
                _dalManager.InsertHashBin(listToInsert); /*Actual insert*/
                _pbMinHash.Invoke(new Action(() => _pbMinHash.PerformStep()));
            }
        }
Exemplo n.º 5
0
 /// <summary>
 ///   Hash Fingerprints using Min-Hash algorithm
 /// </summary>
 /// <param name = "listOfFingerprintsToHash">List of fingerprints already inserted in the database</param>
 /// <param name = "track">Track of the corresponding fingerprints</param>
 /// <param name = "hashTables">Number of hash tables</param>
 /// <param name = "hashKeys">Number of hash keys</param>
 private void HashFingerprintsUsingMinHash(IEnumerable<Fingerprint> listOfFingerprintsToHash, Track track, int hashTables, int hashKeys)
 {
     List<HashBinMinHash> listToInsert = new List<HashBinMinHash>();
     MinHash minHash = new MinHash(_permStorage);
     foreach (Fingerprint fingerprint in listOfFingerprintsToHash)
     {
         int[] hashBins = minHash.ComputeMinHashSignature(fingerprint.Signature); //Compute Min Hashes
         Dictionary<int, long> hashTable = minHash.GroupMinHashToLSHBuckets(hashBins, hashTables, hashKeys);
         foreach (KeyValuePair<int, long> item in hashTable)
         {
             HashBinMinHash hash = new HashBinMinHash(-1, item.Value, item.Key, track.Id, fingerprint.Id);
             listToInsert.Add(hash);
         }
     }
     _dalManager.InsertHashBin(listToInsert); //Insert 
 }
		/// <summary>
		/// Query one specific song using MinHash algorithm.
		/// </summary>
		/// <param name="signatures">Signature signatures from a song</param>
		/// <param name="dbService">DatabaseService used to query the underlying database</param>
		/// <param name="lshHashTables">Number of hash tables from the database</param>
		/// <param name="lshGroupsPerKey">Number of groups per hash table</param>
		/// <param name="thresholdTables">Minimum number of hash tables that must be found for one signature to be considered a candidate (0 = return all candidates, 2+ = return only exact matches)</param>
		/// <param name="queryTime">Set by the method, representing the query length</param>
		/// <param name="doSearchEverything">disregard the local sensitivity hashes and search the whole database</param>
		/// <param name="splashScreen">The "please wait" splash screen (or null)</param>
		/// <returns>Dictionary with Tracks ID's and the Query Statistics</returns>
		public static Dictionary<Int32, QueryStats> QueryOneSongMinHash(
			IEnumerable<bool[]> signatures,
			DatabaseService dbService,
			MinHash minHash,
			int lshHashTables,
			int lshGroupsPerKey,
			int thresholdTables,
			ref long queryTime,
			bool doSearchEverything = false,
			SplashSceenWaitingForm splashScreen = null)
		{
			Stopwatch stopWatch = new Stopwatch();
			stopWatch.Start();
			
			int signatureCounter = 0;
			int signatureTotalCount = signatures.Count();
			Dictionary<int, QueryStats> stats = new Dictionary<int, QueryStats>();
			foreach (bool[] signature in signatures) {
				
				#region Please Wait Splash Screen Cancel Event
				// check if the user clicked cancel
				if (splashScreen.CancellationPending) {
					break;
				}
				#endregion

				if (signature == null) {
					continue;
				}

				IDictionary<int, IList<HashBinMinHash>> candidates = null;
				if (doSearchEverything) {
					candidates = dbService.ReadAllFingerprints();
				} else {
					// Compute Min Hash on randomly selected fingerprint
					int[] bin = minHash.ComputeMinHashSignature(signature);
					
					// Find all hashbuckets to care about
					Dictionary<int, long> hashes = minHash.GroupMinHashToLSHBuckets(bin, lshHashTables, lshGroupsPerKey);
					long[] hashbuckets = hashes.Values.ToArray();
					
					// Find all candidates by querying the database for those hashbuckets
					candidates = dbService.ReadFingerprintsByHashBucketLsh(hashbuckets);
				}
				
				// Reduce the potential candidates list if the number of hash tables found for each signature are less than the threshold
				Dictionary<int, IList<HashBinMinHash>> potentialCandidates = SelectPotentialMatchesOutOfEntireDataset(candidates, thresholdTables);
				
				// get the final candidate list by only using the potential candidate list
				if (potentialCandidates.Count > 0) {
					IList<Fingerprint> fingerprints = dbService.ReadFingerprintById(potentialCandidates.Keys);
					Dictionary<Fingerprint, int> finalCandidates = fingerprints.ToDictionary(finger => finger, finger => potentialCandidates[finger.Id].Count);
					ArrangeCandidatesAccordingToFingerprints(signature,
					                                         finalCandidates,
					                                         lshHashTables,
					                                         lshGroupsPerKey,
					                                         stats);
				}
				
				#region Please Wait Splash Screen Update
				// calculate a percentage between 5 and 90
				int percentage = (int) ((float) (signatureCounter) / (float) signatureTotalCount * 85) + 5;
				if (splashScreen != null) splashScreen.SetProgress(percentage, String.Format("Searching for similar fingerprints.\n(Signature {0} of {1})", signatureCounter+1, signatureTotalCount));
				signatureCounter++;
				#endregion Updat
			}

			stopWatch.Stop();
			queryTime = stopWatch.ElapsedMilliseconds; /*Set the query Time parameter*/
			return stats;
		}