public void OnlyTracksWithGroupIdAreConsideredAsPotentialCandidatesTest() { long[] buckets = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; var queryHash = new HashData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets); const int DefaultThreshold = 5; const int FirstTrackId = 20; const int FirstSubFingerprintId = 10; var firstTrackReference = new ModelReference <int>(FirstTrackId); SubFingerprintData firstResult = new SubFingerprintData( new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, new ModelReference <int>(FirstSubFingerprintId), firstTrackReference); modelService.Setup( service => service.ReadSubFingerprintDataByHashBucketsThresholdWithGroupId(buckets, DefaultThreshold, "group-id")) .Returns(new List <SubFingerprintData> { firstResult }); modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)).Returns( new TrackData { ISRC = "isrc", TrackReference = firstTrackReference }); var queryResult = queryFingerprintService.Query( new List <HashData> { queryHash }, new CustomQueryConfiguration { TrackGroupId = "group-id" }); Assert.IsTrue(queryResult.IsSuccessful); Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); }
public void HammingSimilarityIsSummedUpAccrossAllSubFingerprintsTest() { var queryHash = new HashedFingerprint(GenericHashBuckets(), 0, 0, Enumerable.Empty <string>()); const int FirstTrackId = 20; const int FirstSubFingerprintId = 10; const int SecondSubFingerprintId = 11; var firstTrackReference = new ModelReference <int>(FirstTrackId); var firstResult = new SubFingerprintData(GenericHashBuckets(), 1, 0, new ModelReference <int>(FirstSubFingerprintId), firstTrackReference); var secondResult = new SubFingerprintData(GenericHashBuckets(), 2, 0.928f, new ModelReference <int>(SecondSubFingerprintId), firstTrackReference); var defaultQueryConfiguration = new DefaultQueryConfiguration(); modelService.Setup(service => service.SupportsBatchedSubFingerprintQuery).Returns(false); modelService.Setup(service => service.ReadSubFingerprints(It.IsAny <int[]>(), defaultQueryConfiguration)) .Returns(new List <SubFingerprintData> { firstResult, secondResult }); modelService.Setup(service => service.ReadTracksByReferences(new [] { firstTrackReference })) .Returns(new List <TrackData> { new TrackData { ISRC = "isrc", TrackReference = firstTrackReference } }); var queryResult = queryFingerprintService.Query(new List <HashedFingerprint> { queryHash }, defaultQueryConfiguration, modelService.Object); Assert.IsTrue(queryResult.ContainsMatches); Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(200, queryResult.BestMatch.HammingSimilaritySum); Assert.AreEqual(1, queryResult.ResultEntries.Count()); }
public void DistributionOfHashesHasToBeUniform() { var lshAlgorithm = LocalitySensitiveHashingAlgorithm.Instance; var random = new Random(); var storage = new RAMStorage(25); float one = 8192f / 5512; var config = new DefaultHashingConfig { NumberOfLSHTables = 25, NumberOfMinHashesPerTable = 4, HashBuckets = 0 }; var track = new ModelReference <int>(1); int l = 100000; for (int i = 0; i < l; ++i) { var schema = TestUtilities.GenerateRandomFingerprint(random, 200, 128, 32); var hash = lshAlgorithm.Hash(new Fingerprint(schema, i * one, (uint)i, Array.Empty <byte>()), config); var subFingerprint = new SubFingerprintData(hash.HashBins, hash.SequenceNumber, hash.StartsAt, new ModelReference <uint>((uint)i), track); storage.AddSubFingerprint(subFingerprint); } var distribution = storage.HashCountsPerTable; foreach (var hashPerTable in distribution) { double collisions = (double)(l - hashPerTable) / l; Assert.IsTrue(collisions <= 0.01d, $"Less than 1% of collisions across 100K hashes: {collisions}"); } }
public void HammingSimilarityIsSummedUpAcrossAllSubFingerprintsTest() { var queryHash = new HashedFingerprint(GenericHashBuckets(), 0, 0, Array.Empty <byte>()); const int firstTrackId = 20; const int firstSubFingerprintId = 10; const int secondSubFingerprintId = 11; var firstTrackReference = new ModelReference <int>(firstTrackId); var firstResult = new SubFingerprintData(GenericHashBuckets(), 1, 0, new ModelReference <int>(firstSubFingerprintId), firstTrackReference); var secondResult = new SubFingerprintData(GenericHashBuckets(), 2, 0.928f, new ModelReference <int>(secondSubFingerprintId), firstTrackReference); var defaultQueryConfiguration = new DefaultQueryConfiguration(); modelService.Setup(service => service.Query( It.IsAny <Hashes>(), It.IsAny <QueryConfiguration>())).Returns(new[] { firstResult, secondResult }); modelService.Setup(service => service.ReadTracksByReferences(new[] { firstTrackReference })).Returns( new List <TrackData> { new TrackData("id", string.Empty, string.Empty, 0d, firstTrackReference) }); var hashes = new Hashes(new List <HashedFingerprint> { queryHash }, 1.48f, DateTime.Now, Enumerable.Empty <string>()); var queryResult = queryFingerprintService.Query(hashes, defaultQueryConfiguration, modelService.Object); Assert.IsTrue(queryResult.ContainsMatches); Assert.AreEqual("id", queryResult.BestMatch.Track.Id); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(200, queryResult.BestMatch.Score); Assert.AreEqual(1, queryResult.ResultEntries.Count()); }
public void HammingSimilarityIsSummedUpAccrossAllSubFingerprintsTest() { var queryHash = new HashedFingerprint(GenericSignature(), GenericHashBuckets(), 0, 0, Enumerable.Empty<string>()); const int FirstTrackId = 20; const int FirstSubFingerprintId = 10; const int SecondSubFingerprintId = 11; var firstTrackReference = new ModelReference<int>(FirstTrackId); var firstResult = new SubFingerprintData(GenericHashBuckets(), 1, 0, new ModelReference<int>(FirstSubFingerprintId), firstTrackReference); var secondResult = new SubFingerprintData(GenericHashBuckets(), 2, 0.928, new ModelReference<int>(SecondSubFingerprintId), firstTrackReference); var defaultQueryConfiguration = new DefaultQueryConfiguration(); modelService.Setup(service => service.SupportsBatchedSubFingerprintQuery).Returns(false); modelService.Setup(service => service.ReadSubFingerprints(It.IsAny<long[]>(), defaultQueryConfiguration)) .Returns(new List<SubFingerprintData> { firstResult, secondResult }); modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)) .Returns(new TrackData { ISRC = "isrc", TrackReference = firstTrackReference }); var queryResult = queryFingerprintService.Query(new List<HashedFingerprint> { queryHash }, defaultQueryConfiguration, modelService.Object); Assert.IsTrue(queryResult.ContainsMatches); Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(GenericSignature().Length * 2, queryResult.BestMatch.HammingSimilaritySum); Assert.AreEqual(1, queryResult.ResultEntries.Count()); }
public void FingerprintsCantMatchUniformlyAtRandom() { var lshAlgorithm = LocalitySensitiveHashingAlgorithm.Instance; var random = new Random(); var storage = new RAMStorage(25); float one = 8192f / 5512; var config = new DefaultHashingConfig { NumberOfLSHTables = 25, NumberOfMinHashesPerTable = 4, HashBuckets = 0 }; var track = new ModelReference <int>(1); for (int i = 0; i < 100; ++i) { var schema = TestUtilities.GenerateRandomFingerprint(random, 200, 128, 32); var hash = lshAlgorithm.Hash(new Fingerprint(schema, i * one, (uint)i, Array.Empty <byte>()), config); var subFingerprint = new SubFingerprintData(hash.HashBins, hash.SequenceNumber, hash.StartsAt, new ModelReference <uint>((uint)i), track); storage.AddSubFingerprint(subFingerprint); } for (int i = 0; i < 10; ++i) { var schema = TestUtilities.GenerateRandomFingerprint(random, 200, 128, 32); var hash = lshAlgorithm.Hash(new Fingerprint(schema, i * one, (uint)i, Array.Empty <byte>()), config); for (int j = 0; j < 25; ++j) { var ids = storage.GetSubFingerprintsByHashTableAndHash(j, hash.HashBins[j]); Assert.IsFalse(ids.Any()); } } }
public void SameQueryHashGeneratesMultipleTrackMatches() { var groupedQueryResults = new GroupedQueryResults(10d, DateTime.Now); var random = new Random(1); int runs = 100; int[] counts = new int[runs]; var trackRef = new ModelReference <uint>(1); int k = 0; Parallel.For(0, runs, i => { counts[i] = random.Next(5, 10); var queryPoint = new HashedFingerprint(new int[25], (uint)i, i * 1.48f, Array.Empty <byte>()); for (int j = 0; j < counts[i]; ++j) { var dbPoint = new SubFingerprintData(new int[25], (uint)k, k * 0.01f, new ModelReference <uint>((uint)Interlocked.Increment(ref k)), trackRef); groupedQueryResults.Add(queryPoint, dbPoint, i); } }); var allMatches = groupedQueryResults.GetMatchesForTrack(trackRef).ToList(); Assert.AreEqual(counts.Sum(), allMatches.Count); Assert.AreEqual(runs, allMatches.Select(m => m.QuerySequenceNumber).Distinct().Count()); }
public void ShouldSerialize() { var sub = new SubFingerprintData( new[] { 1, 2, 3 }, 1, 1f, new ModelReference <int>(1), new ModelReference <int>(2)) { Clusters = new List <string>() { "1", "2" } }; using (var stream = new MemoryStream()) { Serializer.SerializeWithLengthPrefix(stream, sub, PrefixStyle.Fixed32); byte[] serialized = stream.ToArray(); using (var streamFinal = new MemoryStream(serialized)) { var deserialized = Serializer.DeserializeWithLengthPrefix <SubFingerprintData>(streamFinal, PrefixStyle.Fixed32); Assert.AreEqual(sub, deserialized); } } }
public void ShouldInsertEntriesInThreadSafeManner() { var storage = new RAMStorage(50); var hashConverter = new HashConverter(); var hashes = Enumerable.Range(0, 100).Select(b => (byte)b).ToArray(); var longs = hashConverter.ToInts(hashes, 25); int tracksCount = 520; int subFingerprintsPerTrack = 33; float one = 8192f / 5512; Parallel.For(0, tracksCount, i => { var trackReference = new ModelReference <uint>((uint)i); for (int j = 0; j < subFingerprintsPerTrack; ++j) { var subFingerprintData = new SubFingerprintData(longs, (uint)j, j * one, new ModelReference <uint>((uint)j), trackReference, Array.Empty <byte>()); storage.AddSubFingerprint(subFingerprintData); } }); for (int i = 0; i < 25; ++i) { var subFingerprints = storage.GetSubFingerprintsByHashTableAndHash(i, longs[i]); Assert.AreEqual(tracksCount * subFingerprintsPerTrack, subFingerprints.Count); } }
public void HammingSimilarityIsSummedUpAccrossAllSubFingerprintsTest() { long[] buckets = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; var queryHash = new HashData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets); const int DefaultThreshold = 5; const int FirstTrackId = 20; const int FirstSubFingerprintId = 10; const int SecondSubFingerprintId = 11; var firstTrackReference = new ModelReference <int>(FirstTrackId); SubFingerprintData firstResult = new SubFingerprintData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, new ModelReference <int>(FirstSubFingerprintId), firstTrackReference); SubFingerprintData secondResult = new SubFingerprintData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 10, 12 }, new ModelReference <int>(SecondSubFingerprintId), firstTrackReference); modelService.Setup(service => service.ReadSubFingerprintDataByHashBucketsWithThreshold(buckets, DefaultThreshold)) .Returns(new List <SubFingerprintData> { firstResult, secondResult }); modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)) .Returns(new TrackData { ISRC = "isrc", TrackReference = firstTrackReference }); var queryResult = queryFingerprintService.Query(new List <HashData> { queryHash }, new DefaultQueryConfiguration()); Assert.IsTrue(queryResult.IsSuccessful); Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(9 + 8, queryResult.BestMatch.Similarity); Assert.AreEqual(1, queryResult.AnalyzedCandidatesCount); Assert.AreEqual(1, queryResult.ResultEntries.Count); }
internal SubFingerprintDataDTO(SubFingerprintData subFingerprintData) { Hashes = subFingerprintData.Hashes; SequenceNumber = subFingerprintData.SequenceNumber; SequenceAt = subFingerprintData.SequenceAt; SubFingerprintReference = (ulong)subFingerprintData.SubFingerprintReference.Id; TrackReference = (ulong)subFingerprintData.TrackReference.Id; }
public ResultEntryAccumulator Add(HashedFingerprint hashedFingerprint, SubFingerprintData match, int hammingSimilarity) { HammingSimilaritySum += hammingSimilarity; var matchedPair = new MatchedPair(hashedFingerprint, match, hammingSimilarity); ResetBestMatchIfAppropriate(matchedPair); matches.Add(matchedPair); return this; }
internal SubFingerprintDataDTO(SubFingerprintData subFingerprintData) { Hashes = subFingerprintData.Hashes; SequenceNumber = subFingerprintData.SequenceNumber; SequenceAt = subFingerprintData.SequenceAt; SubFingerprintReference = subFingerprintData.SubFingerprintReference.Get <ulong>(); TrackReference = subFingerprintData.TrackReference.Get <ulong>(); }
public void ShouldIdentifyAsEqual() { var dto0 = new SubFingerprintData(new int[0], 0, 0, new ModelReference <int>(1), new ModelReference <int>(0)); var dto1 = new SubFingerprintData(new int[0], 0, 0, new ModelReference <int>(1), new ModelReference <int>(0)); Assert.AreEqual(dto0, dto1); }
public void MaximumNumberOfReturnedTracksIsLessThanAnalyzedCandidatesResultsTest() { long[] buckets = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; var queryHash = new HashData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets); const int DefaultThreshold = 5; const int FirstTrackId = 20; const int SecondTrackId = 21; const int ThirdTrackId = 22; const int FirstSubFingerprintId = 10; const int SecondSubFingerprintId = 11; var firstTrackReference = new ModelReference <int>(FirstTrackId); var thirdTrackReference = new ModelReference <int>(ThirdTrackId); SubFingerprintData firstResult = new SubFingerprintData( new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, new ModelReference <int>(FirstSubFingerprintId), firstTrackReference); SubFingerprintData secondResult = new SubFingerprintData( new byte[] { 11, 2, 13, 4, 15, 6, 7, 8, 10, 12 }, new ModelReference <int>(SecondSubFingerprintId), new ModelReference <int>(SecondTrackId)); SubFingerprintData thirdResult = new SubFingerprintData( new byte[] { 1, 2, 3, 4, 5, 15, 7, 8, 10, 12 }, new ModelReference <int>(SecondSubFingerprintId), new ModelReference <int>(ThirdTrackId)); modelService.Setup( service => service.ReadSubFingerprintDataByHashBucketsWithThreshold(buckets, DefaultThreshold)).Returns( new List <SubFingerprintData> { firstResult, secondResult, thirdResult }); modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)).Returns( new TrackData { ISRC = "isrc", TrackReference = firstTrackReference }); modelService.Setup(service => service.ReadTrackByReference(thirdTrackReference)).Returns( new TrackData { ISRC = "isrc_2", TrackReference = thirdTrackReference }); var queryResult = queryFingerprintService.Query( modelService.Object, new List <HashData> { queryHash }, new CustomQueryConfiguration { MaximumNumberOfTracksToReturnAsResult = 2, ThresholdVotes = DefaultThreshold }); Assert.IsTrue(queryResult.IsSuccessful); Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(9, queryResult.BestMatch.Similarity); Assert.AreEqual(3, queryResult.AnalyzedCandidatesCount); Assert.AreEqual(2, queryResult.ResultEntries.Count); Assert.AreEqual(firstTrackReference, queryResult.ResultEntries[0].Track.TrackReference); Assert.AreEqual(thirdTrackReference, queryResult.ResultEntries[1].Track.TrackReference); }
private void AsserSubFingerprintsAreEqual(SubFingerprintData expected, SubFingerprintData actual) { Assert.AreEqual(expected.SubFingerprintReference, actual.SubFingerprintReference); Assert.AreEqual(expected.TrackReference, actual.TrackReference); for (int i = 0; i < expected.Signature.Length; i++) { Assert.AreEqual(expected.Signature[i], actual.Signature[i]); } }
public ResultEntryAccumulator Add(HashedFingerprint hashedFingerprint, SubFingerprintData match, int hammingSimilarity) { HammingSimilaritySum += hammingSimilarity; var matchedPair = new MatchedPair(hashedFingerprint, match, hammingSimilarity); ResetBestMatchIfAppropriate(matchedPair); matches.Add(matchedPair); return(this); }
public void MaximumNumberOfReturnedTracksIsLessThanAnalyzedCandidatesResultsTest() { long[] buckets = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; var queryHash = new HashedFingerprint(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets, 1, 0); const int DefaultThreshold = 5; const int FirstTrackId = 20; const int SecondTrackId = 21; const int ThirdTrackId = 22; const int FirstSubFingerprintId = 10; const int SecondSubFingerprintId = 11; var firstTrackReference = new ModelReference<int>(FirstTrackId); var thirdTrackReference = new ModelReference<int>(ThirdTrackId); var firstResult = new SubFingerprintData( new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 1, 0, new ModelReference<int>(FirstSubFingerprintId), firstTrackReference); SubFingerprintData secondResult = new SubFingerprintData( new byte[] { 11, 2, 13, 4, 15, 6, 7, 8, 10, 12 }, 2, 0.928, new ModelReference<int>(SecondSubFingerprintId), new ModelReference<int>(SecondTrackId)); SubFingerprintData thirdResult = new SubFingerprintData( new byte[] { 1, 2, 3, 4, 5, 15, 7, 8, 10, 12 }, 3, 0.928 * 2, new ModelReference<int>(SecondSubFingerprintId), new ModelReference<int>(ThirdTrackId)); modelService.Setup( service => service.ReadSubFingerprintDataByHashBucketsWithThreshold(buckets, DefaultThreshold)).Returns( new List<SubFingerprintData> { firstResult, secondResult, thirdResult }); modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)).Returns( new TrackData { ISRC = "isrc", TrackReference = firstTrackReference }); modelService.Setup(service => service.ReadTrackByReference(thirdTrackReference)).Returns( new TrackData { ISRC = "isrc_2", TrackReference = thirdTrackReference }); var queryResult = queryFingerprintService.Query( modelService.Object, new List<HashedFingerprint> { queryHash }, new CustomQueryConfiguration { MaximumNumberOfTracksToReturnAsResult = 2, ThresholdVotes = DefaultThreshold }); Assert.IsTrue(queryResult.IsSuccessful); Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(9, queryResult.BestMatch.Similarity); Assert.AreEqual(3, queryResult.AnalyzedCandidatesCount); Assert.AreEqual(2, queryResult.ResultEntries.Count); Assert.AreEqual(firstTrackReference, queryResult.ResultEntries[0].Track.TrackReference); Assert.AreEqual(thirdTrackReference, queryResult.ResultEntries[1].Track.TrackReference); }
public void ReadTest() { TrackData track = new TrackData("isrc", "artist", "title", "album", 1986, 200); var trackReference = TrackDao.InsertTrack(track); var subFingerprintReference = SubFingerprintDao.InsertSubFingerprint(GenericSignature, trackReference); SubFingerprintData actual = SubFingerprintDao.ReadSubFingerprint(subFingerprintReference); AsserSubFingerprintsAreEqual(new SubFingerprintData(GenericSignature, subFingerprintReference, trackReference), actual); }
private void AsserSubFingerprintsAreEqual(SubFingerprintData expected, SubFingerprintData actual) { Assert.AreEqual(expected.SubFingerprintReference, actual.SubFingerprintReference); Assert.AreEqual(expected.TrackReference, actual.TrackReference); for (int i = 0; i < expected.Signature.Length; i++) { Assert.AreEqual(expected.Signature[i], actual.Signature[i]); } Assert.AreEqual(expected.SequenceNumber, actual.SequenceNumber); Assert.IsTrue(System.Math.Abs(expected.SequenceAt - actual.SequenceAt) < Epsilon); }
private SubFingerprintData GetSubFingerprintData(SubFingerprintDTO dto) { long[] resultHashBins = this.dictionaryToHashConverter.FromSolrDictionaryToHashes(dto.Hashes); var sub = new SubFingerprintData( resultHashBins, dto.SequenceNumber, dto.SequenceAt, new SolrModelReference(dto.SubFingerprintId), new SolrModelReference(dto.TrackId)); return(sub); }
public void MaximumNumberOfReturnedTracksIsLessThanAnalyzedCandidatesResultsTest() { var queryHash = new HashedFingerprint(GenericSignature(), GenericHashBuckets(), 1, 0, Enumerable.Empty<string>()); const int DefaultThreshold = 5; const int FirstTrackId = 20; const int SecondTrackId = 21; const int ThirdTrackId = 22; const int FirstSubFingerprintId = 10; const int SecondSubFingerprintId = 11; var firstTrackReference = new ModelReference<int>(FirstTrackId); var secondTrackReference = new ModelReference<int>(SecondTrackId); var firstResult = new SubFingerprintData( GenericHashBuckets(), 1, 0, new ModelReference<int>(FirstSubFingerprintId), firstTrackReference); var secondResult = new SubFingerprintData( GenericHashBuckets(), 2, 0.928, new ModelReference<int>(SecondSubFingerprintId), secondTrackReference); var thirdResult = new SubFingerprintData( GenericHashBuckets(), 3, 0.928 * 2, new ModelReference<int>(SecondSubFingerprintId), new ModelReference<int>(ThirdTrackId)); var customQueryConfiguration = new DefaultQueryConfiguration { MaxTracksToReturn = 2, ThresholdVotes = DefaultThreshold }; modelService.Setup(service => service.SupportsBatchedSubFingerprintQuery).Returns(false); modelService.Setup( service => service.ReadSubFingerprints(It.IsAny<long[]>(), customQueryConfiguration)).Returns( new List<SubFingerprintData> { firstResult, secondResult, thirdResult }); modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)).Returns( new TrackData { ISRC = "isrc", TrackReference = firstTrackReference }); modelService.Setup(service => service.ReadTrackByReference(secondTrackReference)).Returns( new TrackData { ISRC = "isrc_1", TrackReference = secondTrackReference }); var queryResult = queryFingerprintService.Query(new List<HashedFingerprint> { queryHash }, customQueryConfiguration, modelService.Object); Assert.IsTrue(queryResult.ContainsMatches); Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(50, queryResult.BestMatch.HammingSimilaritySum); Assert.AreEqual(2, queryResult.ResultEntries.Count()); var results = queryResult.ResultEntries.ToList(); Assert.AreEqual(firstTrackReference, results[0].Track.TrackReference); Assert.AreEqual(secondTrackReference, results[1].Track.TrackReference); }
public void AccumulateHammingSimilarity(IEnumerable <SubFingerprintData> candidates, HashedFingerprint expected, ConcurrentDictionary <IModelReference, ResultEntryAccumulator> accumulator) { foreach (var subFingerprint in candidates) { byte[] signature = hashConverter.ToBytes(subFingerprint.Hashes, expected.SubFingerprint.Length); int hammingSimilarity = CalculateHammingSimilarity(expected.SubFingerprint, signature); SubFingerprintData fingerprint = subFingerprint; accumulator.AddOrUpdate( subFingerprint.TrackReference, reference => new ResultEntryAccumulator(expected, fingerprint, hammingSimilarity), (reference, entryAccumulator) => entryAccumulator.Add(expected, fingerprint, hammingSimilarity)); } }
public void MaximumNumberOfReturnedTracksIsLessThanAnalyzedCandidatesResultsTest() { var queryHash = new HashedFingerprint(GenericHashBuckets(), 1, 0, Array.Empty <byte>()); const int defaultThreshold = 5; const int firstTrackId = 20; const int secondTrackId = 21; const int thirdTrackId = 22; const int firstSubFingerprintId = 10; const int secondSubFingerprintId = 11; var firstTrackReference = new ModelReference <int>(firstTrackId); var secondTrackReference = new ModelReference <int>(secondTrackId); var firstResult = new SubFingerprintData(GenericHashBuckets(), 1, 0, new ModelReference <int>(firstSubFingerprintId), firstTrackReference); var secondResult = new SubFingerprintData(GenericHashBuckets(), 2, 0.928f, new ModelReference <int>(secondSubFingerprintId), secondTrackReference); var thirdResult = new SubFingerprintData(GenericHashBuckets(), 3, 0.928f * 2, new ModelReference <int>(secondSubFingerprintId), new ModelReference <int>(thirdTrackId)); var customQueryConfiguration = new DefaultQueryConfiguration { MaxTracksToReturn = 2, ThresholdVotes = defaultThreshold }; modelService .Setup(service => service.Query(It.IsAny <Hashes>(), customQueryConfiguration)) .Returns(new[] { firstResult, secondResult, thirdResult }); modelService.Setup(service => service.ReadTracksByReferences(new[] { firstTrackReference, secondTrackReference })) .Returns(new List <TrackData> { new TrackData("id", string.Empty, string.Empty, 0d, firstTrackReference), new TrackData("id_1", string.Empty, string.Empty, 0d, secondTrackReference) }); var hashes = new Hashes(new List <HashedFingerprint> { queryHash }, 1.48f, DateTime.Now, Enumerable.Empty <string>()); var queryResult = queryFingerprintService.Query(hashes, customQueryConfiguration, modelService.Object); Assert.IsTrue(queryResult.ContainsMatches); Assert.AreEqual("id", queryResult.BestMatch.Track.Id); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(100, queryResult.BestMatch.Score); Assert.AreEqual(2, queryResult.ResultEntries.Count()); var results = queryResult.ResultEntries.ToList(); Assert.AreEqual(firstTrackReference, results[0].Track.TrackReference); Assert.AreEqual(secondTrackReference, results[1].Track.TrackReference); }
private IEnumerable <SubFingerprintData> GetSubFingerprints(int totalCount, int countPerTrack) { var subFingerprints = new List <SubFingerprintData>(); int tracksCount = totalCount / countPerTrack; for (int i = 0; i < totalCount; ++i) { int trackId = i % tracksCount; var subFingerprint = new SubFingerprintData(new int[25], (uint)i, i * 1.48f, new ModelReference <ulong>((ulong)i), new ModelReference <int>(trackId)); subFingerprints.Add(subFingerprint); } return(subFingerprints); }
public void Add(HashedFingerprint hashedFingerprint, SubFingerprintData subFingerprintData, int hammingSimilarity) { similaritySumPerTrack.AddOrUpdate(subFingerprintData.TrackReference, hammingSimilarity, (key, oldHamming) => oldHamming + hammingSimilarity); var matchedWith = new MatchedWith(hashedFingerprint.StartsAt, subFingerprintData.SequenceAt, hammingSimilarity); if (!matches.TryGetValue(hashedFingerprint.SequenceNumber, out var matched)) { matches.Add(hashedFingerprint.SequenceNumber, new Candidates(subFingerprintData.TrackReference, matchedWith)); } else { matched.AddOrUpdateNewMatch(subFingerprintData.TrackReference, matchedWith); } }
private IEnumerable <SubFingerprintData> GetSubFingerprintsForTrack(ModelReference <int> trackReference, int candidatesCount) { List <SubFingerprintData> subFingerprints = new List <SubFingerprintData>(); const double OneFingerprintLength = 0.256; for (int i = 0; i < candidatesCount - 1; ++i) { var sub = new SubFingerprintData(GenericHashBuckets(), i, OneFingerprintLength * i, new ModelReference <int>(i), trackReference); sub.Hashes[0] = 0; subFingerprints.Add(sub); } subFingerprints.Add(new SubFingerprintData(GenericHashBuckets(), candidatesCount - 1, OneFingerprintLength * candidatesCount, new ModelReference <int>(candidatesCount), trackReference)); return(subFingerprints); }
public void AddSubfingerprint(HashedFingerprint hashedFingerprint, IModelReference trackReference) { var subFingerprintReference = new ModelReference <ulong>((ulong)Interlocked.Increment(ref subFingerprintReferenceCounter)); var subFingerprintData = new SubFingerprintData( hashedFingerprint.HashBins, hashedFingerprint.SequenceNumber, hashedFingerprint.StartsAt, subFingerprintReference, trackReference) { Clusters = hashedFingerprint.Clusters }; SubFingerprints[(ulong)subFingerprintData.SubFingerprintReference.Id] = subFingerprintData; InsertHashes(hashedFingerprint.HashBins, subFingerprintReference.Id); }
public void ReadTest() { TrackData track = new TrackData("isrc", "artist", "title", "album", 1986, 200); int trackId = TrackDao.Insert(track); long subFingerprintId = SubFingerprintDao.Insert(GenericSignature, trackId); SubFingerprintData actual = SubFingerprintDao.ReadById(subFingerprintId); AsserSubFingerprintsAreEqual( new SubFingerprintData( GenericSignature, new ModelReference <long>(subFingerprintId), new ModelReference <int>(trackId)), actual); }
public void Add(HashedFingerprint queryFingerprint, SubFingerprintData resultSubFingerprint, double score) { lock (lockObject) { scoreSumPerTrack.AddOrUpdate(resultSubFingerprint.TrackReference, score, (key, old) => old + score); var matchedWith = new MatchedWith(queryFingerprint.SequenceNumber, queryFingerprint.StartsAt, resultSubFingerprint.SequenceNumber, resultSubFingerprint.SequenceAt, score); if (!sequenceToCandidates.TryGetValue(queryFingerprint.SequenceNumber, out Candidates candidates)) { sequenceToCandidates.Add(queryFingerprint.SequenceNumber, new Candidates(resultSubFingerprint.TrackReference, matchedWith)); } else { candidates.AddNewMatchForTrack(resultSubFingerprint.TrackReference, matchedWith); } } }
public long Insert(byte[] signature, int trackId) { lock (lockObject) { counter++; SubFingerprintData subFingerprint = new SubFingerprintData(signature, new ModelReference <long>(counter), new ModelReference <int>(trackId)); storage.SubFingerprints[counter] = subFingerprint; if (!storage.TracksHashes.ContainsKey(trackId)) { storage.TracksHashes[trackId] = new ConcurrentDictionary <long, HashData>(); } storage.TracksHashes[trackId][counter] = new HashData { SubFingerprint = signature }; return(counter); } }
private IEnumerable<SubFingerprintData> ConvertResults(IEnumerable<SubFingerprintDTO> results) { var all = new List<SubFingerprintData>(); foreach (var dto in results) { long[] resultHashBins = this.dictionaryToHashConverter.FromSolrDictionary(dto.Hashes); byte[] signature = this.hashConverter.ToBytes(resultHashBins, 100); // TODO refactor, extracting this constant var sub = new SubFingerprintData( signature, dto.SequenceNumber, dto.SequenceAt, new SolrModelReference(dto.SubFingerprintId), new SolrModelReference(dto.TrackId)); all.Add(sub); } return all; }
public void AccumulateHammingSimilarity( IEnumerable <SubFingerprintData> candidates, HashedFingerprint expected, ConcurrentDictionary <IModelReference, ResultEntryAccumulator> accumulator, int keysPerHash) { foreach (var subFingerprint in candidates) { int hammingSimilarity = CalculateHammingSimilarity( expected.HashBins, subFingerprint.Hashes, keysPerHash); SubFingerprintData fingerprint = subFingerprint; accumulator.AddOrUpdate( subFingerprint.TrackReference, reference => new ResultEntryAccumulator(expected, fingerprint, hammingSimilarity), (reference, entryAccumulator) => entryAccumulator.Add(expected, fingerprint, hammingSimilarity)); } }
public void MatchesShouldBeOrderedByQueryAt() { int runs = 1000; var groupedQueryResults = new GroupedQueryResults(Enumerable.Empty <HashedFingerprint>()); var reference = new ModelReference <int>(1); Parallel.For(0, runs, i => { var hashed = new HashedFingerprint(new int[0], (uint)i, i, new string[0]); var candidate = new SubFingerprintData(new int[0], (uint)i, runs - i, new ModelReference <uint>((uint)i), reference); groupedQueryResults.Add(hashed, candidate, i); }); var matchedWith = groupedQueryResults.GetMatchesForTrackOrderedByQueryAt(reference); var ordered = matchedWith.Select(with => (int)with.QueryAt).ToList(); CollectionAssert.AreEqual(Enumerable.Range(0, runs), ordered); }
public void ShouldAccumulateResults() { int runs = 1000; var groupedQueryResults = new GroupedQueryResults(Enumerable.Empty <HashedFingerprint>()); var references = new[] { 1, 2, 3, 4, 5 }.Select(id => new ModelReference <int>(id)).ToArray(); Parallel.For(0, runs, i => { var hashed = new HashedFingerprint(new int[0], (uint)i, i * 0.05f, new string[0]); var candidate = new SubFingerprintData(new int[0], (uint)i, i * 0.07f, new ModelReference <uint>((uint)i), references[i % references.Length]); groupedQueryResults.Add(hashed, candidate, i); }); Assert.IsTrue(groupedQueryResults.ContainsMatches); for (int i = 0; i < references.Length; ++i) { int pertrack = runs / references.Length; int ham = (pertrack - 1) * runs / 2 + pertrack * i; Assert.AreEqual(ham, groupedQueryResults.GetHammingSimilaritySumForTrack(references[i])); } var modelReferences = groupedQueryResults.GetTopTracksByHammingSimilarity(references.Length * 2).ToList(); for (int i = 0; i < references.Length; ++i) { Assert.AreEqual(references[references.Length - i - 1], modelReferences[i]); } var bestMatch = groupedQueryResults.GetBestMatchForTrack(references.Last()); Assert.AreEqual((runs - 1) * 0.05f, bestMatch.QueryAt, 0.000001); Assert.AreEqual((runs - 1) * 0.07f, bestMatch.ResultAt, 0.000001); for (int i = 0; i < references.Length; ++i) { var matchedWith = groupedQueryResults.GetMatchesForTrackOrderedByQueryAt(references[i]).ToList(); Assert.AreEqual(runs / references.Length, matchedWith.Count); } }
public ResultEntryAccumulator(HashedFingerprint hashedFingerprint, SubFingerprintData match, int hammingSimilarity) { BestMatch = new MatchedPair(hashedFingerprint, match, hammingSimilarity); Add(hashedFingerprint, match, hammingSimilarity); }
public void ShouldSortMatchesProperly() { List<SubFingerprintData> subFingerprints = new List<SubFingerprintData>(); var trackReference = new ModelReference<int>(0); const double OneFingerprintLength = 1.0d; const int CandidatesCount = 20; for (int i = 0; i < CandidatesCount; ++i) { var sub = new SubFingerprintData(GenericHashBuckets(), i, OneFingerprintLength * (CandidatesCount - i), new ModelReference<int>(i), trackReference); subFingerprints.Add(sub); } var acumulator = new ConcurrentDictionary<IModelReference, ResultEntryAccumulator>(); similarityUtility.AccumulateHammingSimilarity(subFingerprints, new HashedFingerprint(GenericSignature(), GenericHashBuckets(), 1, 0d, Enumerable.Empty<string>()), acumulator); var expected = Enumerable.Range(1, 20); var actual = acumulator[trackReference].Matches.Select(m => m.SubFingerprint.SequenceAt).ToList(); CollectionAssert.AreEqual(expected, actual); }
private IEnumerable<SubFingerprintData> GetSubFingerprintsForTrack(ModelReference<int> trackReference, int candidatesCount) { List<SubFingerprintData> subFingerprints = new List<SubFingerprintData>(); const double OneFingerprintLength = 0.256; for (int i = 0; i < candidatesCount - 1; ++i) { var sub = new SubFingerprintData(GenericHashBuckets(), i, OneFingerprintLength * i, new ModelReference<int>(i), trackReference); sub.Hashes[0] = 0; subFingerprints.Add(sub); } subFingerprints.Add(new SubFingerprintData(GenericHashBuckets(), candidatesCount - 1, OneFingerprintLength * candidatesCount, new ModelReference<int>(candidatesCount), trackReference)); return subFingerprints; }
public void OnlyTracksWithGroupIdAreConsideredAsPotentialCandidatesTest() { long[] buckets = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; var queryHash = new HashedFingerprint(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets, 0, 0); const int DefaultThreshold = 5; const int FirstTrackId = 20; const int FirstSubFingerprintId = 10; var firstTrackReference = new ModelReference<int>(FirstTrackId); SubFingerprintData firstResult = new SubFingerprintData( new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 1, 0.928, new ModelReference<int>(FirstSubFingerprintId), firstTrackReference); modelService.Setup( service => service.ReadSubFingerprintDataByHashBucketsThresholdWithGroupId(buckets, DefaultThreshold, "group-id")) .Returns(new List<SubFingerprintData> { firstResult }); modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)).Returns( new TrackData { ISRC = "isrc", TrackReference = firstTrackReference }); var queryResult = queryFingerprintService.Query( modelService.Object, new List<HashedFingerprint> { queryHash }, new CustomQueryConfiguration { TrackGroupId = "group-id" }); Assert.IsTrue(queryResult.IsSuccessful); Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); }
public void HammingSimilarityIsSummedUpAccrossAllSubFingerprintsTest() { long[] buckets = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; var queryHash = new HashedFingerprint(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets, 0, 0); const int DefaultThreshold = 5; const int FirstTrackId = 20; const int FirstSubFingerprintId = 10; const int SecondSubFingerprintId = 11; var firstTrackReference = new ModelReference<int>(FirstTrackId); SubFingerprintData firstResult = new SubFingerprintData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 1, 0, new ModelReference<int>(FirstSubFingerprintId), firstTrackReference); SubFingerprintData secondResult = new SubFingerprintData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 10, 12 }, 2, 0.928, new ModelReference<int>(SecondSubFingerprintId), firstTrackReference); modelService.Setup(service => service.ReadSubFingerprintDataByHashBucketsWithThreshold(buckets, DefaultThreshold)) .Returns(new List<SubFingerprintData> { firstResult, secondResult }); modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)) .Returns(new TrackData { ISRC = "isrc", TrackReference = firstTrackReference }); var queryResult = queryFingerprintService.Query(modelService.Object, new List<HashedFingerprint> { queryHash }, new DefaultQueryConfiguration()); Assert.IsTrue(queryResult.IsSuccessful); Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(9 + 8, queryResult.BestMatch.Similarity); Assert.AreEqual(1, queryResult.AnalyzedCandidatesCount); Assert.AreEqual(1, queryResult.ResultEntries.Count); }