public void Add(SubFingerprintsGeneratedEventArgs e) { if (e.SubFingerprints.Count == 0) { return; } lock (this) { // Make sure there's a store for the track and get it if (!store.ContainsKey(e.AudioTrack)) { store.Add(e.AudioTrack, new TrackStore()); } var trackStore = store[e.AudioTrack]; int hashListIndex = 0; SubFingerprint hash; // Iterate through the sequence of input hashes and add them to the store (in batches of the same frame index) while (e.SubFingerprints.Count > hashListIndex) { int storeHashIndex = trackStore.hashes.Count; int storeIndex = e.SubFingerprints[hashListIndex].Index; int hashCount = 0; // Count all sequential input hashes with the same frame index (i.e. batch) and add them to the store while (e.SubFingerprints.Count > hashListIndex + hashCount && (hash = e.SubFingerprints[hashListIndex + hashCount]).Index == storeIndex) { // Insert hash into the sequential store trackStore.hashes.Add(hash.Hash); // Insert a track/index lookup entry for the fingerprint hash collisionMap.Add(hash.Hash, new SubFingerprintLookupEntry(e.AudioTrack, hash.Index)); hashCount++; } // Add an index entry which tells where a hash with a specific frame index can be found in the store if (hashCount > 0) { TrackStore.IndexEntry ie; // If there is already an entry for the frame index, take it and update its length, ... if (trackStore.index.ContainsKey(storeIndex)) { ie = trackStore.index[storeIndex]; ie.length += hashCount; trackStore.index.Remove(storeIndex); } else // ... else create a new entry { ie = new TrackStore.IndexEntry(storeHashIndex, hashCount); } // Add the current length of the hash list as start pointer for all hashes belonging to the current index trackStore.index.Add(storeIndex, ie); } hashListIndex += hashCount; } } }
public List <Match> FindMatches(SubFingerprintHash hash) { List <Match> matches = new List <Match>(); List <SubFingerprintLookupEntry> entries = collisionMap.GetValues(hash); for (int x = 0; x < entries.Count; x++) { SubFingerprintLookupEntry entry1 = entries[x]; for (int y = x; y < entries.Count; y++) { SubFingerprintLookupEntry entry2 = entries[y]; if (entry1.AudioTrack != entry2.AudioTrack) // don't compare tracks with themselves { var store1 = store[entry1.AudioTrack]; var store2 = store[entry2.AudioTrack]; List <SubFingerprintHash> hashes1 = store1.hashes; List <SubFingerprintHash> hashes2 = store2.hashes; int index1 = entry1.Index; int index2 = entry2.Index; TrackStore.IndexEntry indexEntry1, indexEntry2; int numTried = 0; // count of hashes tried to match int numMatched = 0; // count of hashes matched int frameCount = 0; // count over how many actual frames hashes were matched (an index in the store is the equivalent of a frame in the generator) bool matchFound = false; // Iterate through sequential frames TrackStore.IndexEntry indexEntryNone = new TrackStore.IndexEntry(); while (true) { indexEntry1 = store1.index.ContainsKey(index1) ? store1.index[index1] : indexEntryNone; indexEntry2 = store2.index.ContainsKey(index2) ? store2.index[index2] : indexEntryNone; // Hash collision // The union of the two ranges is the total number of distinct hashes // The intersection of the two ranges is the total number of similar hashes // NOTE The following block calculates the number of matches (the intersection) // of the two hash lists with the Zipper intersection algorithm and relies // on the hash list sorting in the fingerprint generator. // Other approaches tried which are slower: // - n*m element by element comparison (seven though the amount of elements is reasonably small) // - concatenating the two ranges (LINQ), sorting them, and linearly iterating over, counting the duplicates (sort is slow) // - using a hash set for collision detection (hash set insertion and lookup are costly) int i = indexEntry1.index; int i_e = indexEntry1.index + indexEntry1.length; int j = indexEntry2.index; int j_e = indexEntry2.index + indexEntry2.length; int intersectionCount = 0; // Count intersecting hashes of a frame with the Zipper algorithm while (i < i_e && j < j_e) { if (hashes1[i] < hashes2[j]) { i++; } else if (hashes2[j] < hashes1[i]) { j++; } else { intersectionCount++; i++; j++; } } numMatched += intersectionCount; numTried += indexEntry1.length + indexEntry2.length - intersectionCount; // Determine the next indices to check for collisions int nextIndex1Increment = 0; if (hashes1.Count > i_e) { do { nextIndex1Increment++; } while (!store1.index.ContainsKey(index1 + nextIndex1Increment)); } int nextIndex2Increment = 0; if (hashes2.Count > j_e) { do { nextIndex2Increment++; } while (!store2.index.ContainsKey(index2 + nextIndex2Increment)); } int nextIndexIncrement = Math.Min(nextIndex1Increment, nextIndex2Increment); index1 += nextIndexIncrement; index2 += nextIndexIncrement; frameCount += nextIndexIncrement; // Match detection // This approach trades the hash matching rate with time, i.e. the rate required // for a match drops with time, by using an exponentially with time decaying threshold. // The idea is that a high matching rate after a short time is equivalent to a low matching // rate after a long time. The difficulty is to to parameterize it in such a way, that a // match is detected as fast as possible, while detecting a no-match isn't delayed too far // as it takes a lot of processing time. // NOTE The current parameters are just eyeballed, there's a lot of influence on processing speed and matching rate here double rate = 1d / numTried * numMatched; if (frameCount >= matchingMaxFrames || rate < thresholdReject[frameCount]) { break; // exit condition } else if (frameCount > matchingMinFrames && rate > thresholdAccept[frameCount]) { matchFound = true; break; } if (nextIndexIncrement == 0) { // We reached the end of a hash list break; // Break the while loop } } if (matchFound) { matches.Add(new Match { Similarity = 1f / numTried * numMatched, Track1 = entry1.AudioTrack, Track1Time = SubFingerprintIndexToTimeSpan(entry1.Index), Track2 = entry2.AudioTrack, Track2Time = SubFingerprintIndexToTimeSpan(entry2.Index), Source = matchSourceName }); } } } } return(matches); }