public void OnlyTracksWithGroupIdAreConsideredAsPotentialCandidatesTest()
        {
            long[]             buckets               = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
            var                queryHash             = new HashData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets);
            const int          DefaultThreshold      = 5;
            const int          FirstTrackId          = 20;
            const int          FirstSubFingerprintId = 10;
            var                firstTrackReference   = new ModelReference <int>(FirstTrackId);
            SubFingerprintData firstResult           = new SubFingerprintData(
                new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 },
                new ModelReference <int>(FirstSubFingerprintId),
                firstTrackReference);

            modelService.Setup(
                service =>
                service.ReadSubFingerprintDataByHashBucketsThresholdWithGroupId(buckets, DefaultThreshold, "group-id"))
            .Returns(new List <SubFingerprintData> {
                firstResult
            });
            modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)).Returns(
                new TrackData {
                ISRC = "isrc", TrackReference = firstTrackReference
            });

            var queryResult = queryFingerprintService.Query(
                new List <HashData> {
                queryHash
            }, new CustomQueryConfiguration {
                TrackGroupId = "group-id"
            });

            Assert.IsTrue(queryResult.IsSuccessful);
            Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC);
            Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference);
        }
        public void HammingSimilarityIsSummedUpAccrossAllSubFingerprintsTest()
        {
            var       queryHash                 = new HashedFingerprint(GenericHashBuckets(), 0, 0, Enumerable.Empty <string>());
            const int FirstTrackId              = 20;
            const int FirstSubFingerprintId     = 10;
            const int SecondSubFingerprintId    = 11;
            var       firstTrackReference       = new ModelReference <int>(FirstTrackId);
            var       firstResult               = new SubFingerprintData(GenericHashBuckets(), 1, 0, new ModelReference <int>(FirstSubFingerprintId), firstTrackReference);
            var       secondResult              = new SubFingerprintData(GenericHashBuckets(), 2, 0.928f, new ModelReference <int>(SecondSubFingerprintId), firstTrackReference);
            var       defaultQueryConfiguration = new DefaultQueryConfiguration();

            modelService.Setup(service => service.SupportsBatchedSubFingerprintQuery).Returns(false);
            modelService.Setup(service => service.ReadSubFingerprints(It.IsAny <int[]>(), defaultQueryConfiguration))
            .Returns(new List <SubFingerprintData> {
                firstResult, secondResult
            });
            modelService.Setup(service => service.ReadTracksByReferences(new [] { firstTrackReference }))
            .Returns(new List <TrackData> {
                new TrackData {
                    ISRC = "isrc", TrackReference = firstTrackReference
                }
            });

            var queryResult = queryFingerprintService.Query(new List <HashedFingerprint> {
                queryHash
            }, defaultQueryConfiguration, modelService.Object);

            Assert.IsTrue(queryResult.ContainsMatches);
            Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC);
            Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference);
            Assert.AreEqual(200, queryResult.BestMatch.HammingSimilaritySum);
            Assert.AreEqual(1, queryResult.ResultEntries.Count());
        }
Beispiel #3
0
        public void DistributionOfHashesHasToBeUniform()
        {
            var lshAlgorithm = LocalitySensitiveHashingAlgorithm.Instance;

            var random = new Random();

            var storage = new RAMStorage(25);

            float one    = 8192f / 5512;
            var   config = new DefaultHashingConfig {
                NumberOfLSHTables = 25, NumberOfMinHashesPerTable = 4, HashBuckets = 0
            };

            var track = new ModelReference <int>(1);
            int l     = 100000;

            for (int i = 0; i < l; ++i)
            {
                var schema         = TestUtilities.GenerateRandomFingerprint(random, 200, 128, 32);
                var hash           = lshAlgorithm.Hash(new Fingerprint(schema, i * one, (uint)i, Array.Empty <byte>()), config);
                var subFingerprint = new SubFingerprintData(hash.HashBins, hash.SequenceNumber, hash.StartsAt, new ModelReference <uint>((uint)i), track);
                storage.AddSubFingerprint(subFingerprint);
            }

            var distribution = storage.HashCountsPerTable;

            foreach (var hashPerTable in distribution)
            {
                double collisions = (double)(l - hashPerTable) / l;
                Assert.IsTrue(collisions <= 0.01d, $"Less than 1% of collisions across 100K hashes: {collisions}");
            }
        }
        public void HammingSimilarityIsSummedUpAcrossAllSubFingerprintsTest()
        {
            var       queryHash                 = new HashedFingerprint(GenericHashBuckets(), 0, 0, Array.Empty <byte>());
            const int firstTrackId              = 20;
            const int firstSubFingerprintId     = 10;
            const int secondSubFingerprintId    = 11;
            var       firstTrackReference       = new ModelReference <int>(firstTrackId);
            var       firstResult               = new SubFingerprintData(GenericHashBuckets(), 1, 0, new ModelReference <int>(firstSubFingerprintId), firstTrackReference);
            var       secondResult              = new SubFingerprintData(GenericHashBuckets(), 2, 0.928f, new ModelReference <int>(secondSubFingerprintId), firstTrackReference);
            var       defaultQueryConfiguration = new DefaultQueryConfiguration();

            modelService.Setup(service => service.Query(
                                   It.IsAny <Hashes>(),
                                   It.IsAny <QueryConfiguration>())).Returns(new[] { firstResult, secondResult });

            modelService.Setup(service => service.ReadTracksByReferences(new[] { firstTrackReference })).Returns(
                new List <TrackData>
            {
                new TrackData("id", string.Empty, string.Empty, 0d, firstTrackReference)
            });

            var hashes = new Hashes(new List <HashedFingerprint> {
                queryHash
            }, 1.48f, DateTime.Now, Enumerable.Empty <string>());
            var queryResult = queryFingerprintService.Query(hashes, defaultQueryConfiguration, modelService.Object);

            Assert.IsTrue(queryResult.ContainsMatches);
            Assert.AreEqual("id", queryResult.BestMatch.Track.Id);
            Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference);
            Assert.AreEqual(200, queryResult.BestMatch.Score);
            Assert.AreEqual(1, queryResult.ResultEntries.Count());
        }
        public void HammingSimilarityIsSummedUpAccrossAllSubFingerprintsTest()
        {
            var queryHash = new HashedFingerprint(GenericSignature(), GenericHashBuckets(), 0, 0, Enumerable.Empty<string>());
            const int FirstTrackId = 20;
            const int FirstSubFingerprintId = 10;
            const int SecondSubFingerprintId = 11;
            var firstTrackReference = new ModelReference<int>(FirstTrackId);
            var firstResult = new SubFingerprintData(GenericHashBuckets(), 1, 0, new ModelReference<int>(FirstSubFingerprintId), firstTrackReference);
            var secondResult = new SubFingerprintData(GenericHashBuckets(), 2, 0.928, new ModelReference<int>(SecondSubFingerprintId), firstTrackReference);
            var defaultQueryConfiguration = new DefaultQueryConfiguration();

            modelService.Setup(service => service.SupportsBatchedSubFingerprintQuery).Returns(false);
            modelService.Setup(service => service.ReadSubFingerprints(It.IsAny<long[]>(), defaultQueryConfiguration))
                        .Returns(new List<SubFingerprintData> { firstResult, secondResult });
            modelService.Setup(service => service.ReadTrackByReference(firstTrackReference))
                        .Returns(new TrackData { ISRC = "isrc", TrackReference = firstTrackReference });

            var queryResult = queryFingerprintService.Query(new List<HashedFingerprint> { queryHash }, defaultQueryConfiguration, modelService.Object);

            Assert.IsTrue(queryResult.ContainsMatches);
            Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC);
            Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference);
            Assert.AreEqual(GenericSignature().Length * 2, queryResult.BestMatch.HammingSimilaritySum);
            Assert.AreEqual(1, queryResult.ResultEntries.Count());
        }
Beispiel #6
0
        public void FingerprintsCantMatchUniformlyAtRandom()
        {
            var lshAlgorithm = LocalitySensitiveHashingAlgorithm.Instance;

            var random = new Random();

            var storage = new RAMStorage(25);

            float one    = 8192f / 5512;
            var   config = new DefaultHashingConfig {
                NumberOfLSHTables = 25, NumberOfMinHashesPerTable = 4, HashBuckets = 0
            };

            var track = new ModelReference <int>(1);

            for (int i = 0; i < 100; ++i)
            {
                var schema         = TestUtilities.GenerateRandomFingerprint(random, 200, 128, 32);
                var hash           = lshAlgorithm.Hash(new Fingerprint(schema, i * one, (uint)i, Array.Empty <byte>()), config);
                var subFingerprint = new SubFingerprintData(hash.HashBins, hash.SequenceNumber, hash.StartsAt, new ModelReference <uint>((uint)i), track);
                storage.AddSubFingerprint(subFingerprint);
            }

            for (int i = 0; i < 10; ++i)
            {
                var schema = TestUtilities.GenerateRandomFingerprint(random, 200, 128, 32);
                var hash   = lshAlgorithm.Hash(new Fingerprint(schema, i * one, (uint)i, Array.Empty <byte>()), config);
                for (int j = 0; j < 25; ++j)
                {
                    var ids = storage.GetSubFingerprintsByHashTableAndHash(j, hash.HashBins[j]);
                    Assert.IsFalse(ids.Any());
                }
            }
        }
Beispiel #7
0
        public void SameQueryHashGeneratesMultipleTrackMatches()
        {
            var groupedQueryResults = new GroupedQueryResults(10d, DateTime.Now);

            var random = new Random(1);
            int runs   = 100;

            int[] counts   = new int[runs];
            var   trackRef = new ModelReference <uint>(1);
            int   k        = 0;

            Parallel.For(0, runs, i =>
            {
                counts[i]      = random.Next(5, 10);
                var queryPoint = new HashedFingerprint(new int[25], (uint)i, i * 1.48f, Array.Empty <byte>());
                for (int j = 0; j < counts[i]; ++j)
                {
                    var dbPoint = new SubFingerprintData(new int[25], (uint)k, k * 0.01f, new ModelReference <uint>((uint)Interlocked.Increment(ref k)), trackRef);
                    groupedQueryResults.Add(queryPoint, dbPoint, i);
                }
            });

            var allMatches = groupedQueryResults.GetMatchesForTrack(trackRef).ToList();

            Assert.AreEqual(counts.Sum(), allMatches.Count);
            Assert.AreEqual(runs, allMatches.Select(m => m.QuerySequenceNumber).Distinct().Count());
        }
Beispiel #8
0
        public void ShouldSerialize()
        {
            var sub = new SubFingerprintData(
                new[] { 1, 2, 3 },
                1,
                1f,
                new ModelReference <int>(1),
                new ModelReference <int>(2))
            {
                Clusters = new List <string>()
                {
                    "1", "2"
                }
            };

            using (var stream = new MemoryStream())
            {
                Serializer.SerializeWithLengthPrefix(stream, sub, PrefixStyle.Fixed32);
                byte[] serialized = stream.ToArray();

                using (var streamFinal = new MemoryStream(serialized))
                {
                    var deserialized = Serializer.DeserializeWithLengthPrefix <SubFingerprintData>(streamFinal, PrefixStyle.Fixed32);

                    Assert.AreEqual(sub, deserialized);
                }
            }
        }
        public void ShouldInsertEntriesInThreadSafeManner()
        {
            var storage       = new RAMStorage(50);
            var hashConverter = new HashConverter();

            var hashes = Enumerable.Range(0, 100).Select(b => (byte)b).ToArray();
            var longs  = hashConverter.ToInts(hashes, 25);

            int   tracksCount             = 520;
            int   subFingerprintsPerTrack = 33;
            float one = 8192f / 5512;

            Parallel.For(0, tracksCount, i =>
            {
                var trackReference = new ModelReference <uint>((uint)i);
                for (int j = 0; j < subFingerprintsPerTrack; ++j)
                {
                    var subFingerprintData = new SubFingerprintData(longs, (uint)j, j * one, new ModelReference <uint>((uint)j), trackReference, Array.Empty <byte>());
                    storage.AddSubFingerprint(subFingerprintData);
                }
            });

            for (int i = 0; i < 25; ++i)
            {
                var subFingerprints = storage.GetSubFingerprintsByHashTableAndHash(i, longs[i]);
                Assert.AreEqual(tracksCount * subFingerprintsPerTrack, subFingerprints.Count);
            }
        }
        public void HammingSimilarityIsSummedUpAccrossAllSubFingerprintsTest()
        {
            long[]             buckets                = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
            var                queryHash              = new HashData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets);
            const int          DefaultThreshold       = 5;
            const int          FirstTrackId           = 20;
            const int          FirstSubFingerprintId  = 10;
            const int          SecondSubFingerprintId = 11;
            var                firstTrackReference    = new ModelReference <int>(FirstTrackId);
            SubFingerprintData firstResult            = new SubFingerprintData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, new ModelReference <int>(FirstSubFingerprintId), firstTrackReference);
            SubFingerprintData secondResult           = new SubFingerprintData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 10, 12 }, new ModelReference <int>(SecondSubFingerprintId), firstTrackReference);

            modelService.Setup(service => service.ReadSubFingerprintDataByHashBucketsWithThreshold(buckets, DefaultThreshold))
            .Returns(new List <SubFingerprintData> {
                firstResult, secondResult
            });
            modelService.Setup(service => service.ReadTrackByReference(firstTrackReference))
            .Returns(new TrackData {
                ISRC = "isrc", TrackReference = firstTrackReference
            });

            var queryResult = queryFingerprintService.Query(new List <HashData> {
                queryHash
            }, new DefaultQueryConfiguration());

            Assert.IsTrue(queryResult.IsSuccessful);
            Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC);
            Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference);
            Assert.AreEqual(9 + 8, queryResult.BestMatch.Similarity);
            Assert.AreEqual(1, queryResult.AnalyzedCandidatesCount);
            Assert.AreEqual(1, queryResult.ResultEntries.Count);
        }
Beispiel #11
0
 internal SubFingerprintDataDTO(SubFingerprintData subFingerprintData)
 {
     Hashes                  = subFingerprintData.Hashes;
     SequenceNumber          = subFingerprintData.SequenceNumber;
     SequenceAt              = subFingerprintData.SequenceAt;
     SubFingerprintReference = (ulong)subFingerprintData.SubFingerprintReference.Id;
     TrackReference          = (ulong)subFingerprintData.TrackReference.Id;
 }
 public ResultEntryAccumulator Add(HashedFingerprint hashedFingerprint, SubFingerprintData match, int hammingSimilarity)
 {
     HammingSimilaritySum += hammingSimilarity;
     var matchedPair = new MatchedPair(hashedFingerprint, match, hammingSimilarity);
     ResetBestMatchIfAppropriate(matchedPair);
     matches.Add(matchedPair);
     return this;
 }
Beispiel #13
0
 internal SubFingerprintDataDTO(SubFingerprintData subFingerprintData)
 {
     Hashes                  = subFingerprintData.Hashes;
     SequenceNumber          = subFingerprintData.SequenceNumber;
     SequenceAt              = subFingerprintData.SequenceAt;
     SubFingerprintReference = subFingerprintData.SubFingerprintReference.Get <ulong>();
     TrackReference          = subFingerprintData.TrackReference.Get <ulong>();
 }
Beispiel #14
0
        public void ShouldIdentifyAsEqual()
        {
            var dto0 = new SubFingerprintData(new int[0], 0, 0, new ModelReference <int>(1), new ModelReference <int>(0));

            var dto1 = new SubFingerprintData(new int[0], 0, 0, new ModelReference <int>(1), new ModelReference <int>(0));

            Assert.AreEqual(dto0, dto1);
        }
        public void MaximumNumberOfReturnedTracksIsLessThanAnalyzedCandidatesResultsTest()
        {
            long[]             buckets                = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
            var                queryHash              = new HashData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets);
            const int          DefaultThreshold       = 5;
            const int          FirstTrackId           = 20;
            const int          SecondTrackId          = 21;
            const int          ThirdTrackId           = 22;
            const int          FirstSubFingerprintId  = 10;
            const int          SecondSubFingerprintId = 11;
            var                firstTrackReference    = new ModelReference <int>(FirstTrackId);
            var                thirdTrackReference    = new ModelReference <int>(ThirdTrackId);
            SubFingerprintData firstResult            = new SubFingerprintData(
                new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 },
                new ModelReference <int>(FirstSubFingerprintId),
                firstTrackReference);
            SubFingerprintData secondResult = new SubFingerprintData(
                new byte[] { 11, 2, 13, 4, 15, 6, 7, 8, 10, 12 },
                new ModelReference <int>(SecondSubFingerprintId),
                new ModelReference <int>(SecondTrackId));
            SubFingerprintData thirdResult = new SubFingerprintData(
                new byte[] { 1, 2, 3, 4, 5, 15, 7, 8, 10, 12 },
                new ModelReference <int>(SecondSubFingerprintId),
                new ModelReference <int>(ThirdTrackId));

            modelService.Setup(
                service => service.ReadSubFingerprintDataByHashBucketsWithThreshold(buckets, DefaultThreshold)).Returns(
                new List <SubFingerprintData> {
                firstResult, secondResult, thirdResult
            });
            modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)).Returns(
                new TrackData {
                ISRC = "isrc", TrackReference = firstTrackReference
            });
            modelService.Setup(service => service.ReadTrackByReference(thirdTrackReference)).Returns(
                new TrackData {
                ISRC = "isrc_2", TrackReference = thirdTrackReference
            });

            var queryResult = queryFingerprintService.Query(
                modelService.Object,
                new List <HashData> {
                queryHash
            },
                new CustomQueryConfiguration
            {
                MaximumNumberOfTracksToReturnAsResult = 2, ThresholdVotes = DefaultThreshold
            });

            Assert.IsTrue(queryResult.IsSuccessful);
            Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC);
            Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference);
            Assert.AreEqual(9, queryResult.BestMatch.Similarity);
            Assert.AreEqual(3, queryResult.AnalyzedCandidatesCount);
            Assert.AreEqual(2, queryResult.ResultEntries.Count);
            Assert.AreEqual(firstTrackReference, queryResult.ResultEntries[0].Track.TrackReference);
            Assert.AreEqual(thirdTrackReference, queryResult.ResultEntries[1].Track.TrackReference);
        }
Beispiel #16
0
 private void AsserSubFingerprintsAreEqual(SubFingerprintData expected, SubFingerprintData actual)
 {
     Assert.AreEqual(expected.SubFingerprintReference, actual.SubFingerprintReference);
     Assert.AreEqual(expected.TrackReference, actual.TrackReference);
     for (int i = 0; i < expected.Signature.Length; i++)
     {
         Assert.AreEqual(expected.Signature[i], actual.Signature[i]);
     }
 }
Beispiel #17
0
        public ResultEntryAccumulator Add(HashedFingerprint hashedFingerprint, SubFingerprintData match, int hammingSimilarity)
        {
            HammingSimilaritySum += hammingSimilarity;
            var matchedPair = new MatchedPair(hashedFingerprint, match, hammingSimilarity);

            ResetBestMatchIfAppropriate(matchedPair);
            matches.Add(matchedPair);
            return(this);
        }
        public void MaximumNumberOfReturnedTracksIsLessThanAnalyzedCandidatesResultsTest()
        {
            long[] buckets = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
            var queryHash = new HashedFingerprint(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets, 1, 0);
            const int DefaultThreshold = 5;
            const int FirstTrackId = 20;
            const int SecondTrackId = 21;
            const int ThirdTrackId = 22;
            const int FirstSubFingerprintId = 10;
            const int SecondSubFingerprintId = 11;
            var firstTrackReference = new ModelReference<int>(FirstTrackId);
            var thirdTrackReference = new ModelReference<int>(ThirdTrackId);
            var firstResult = new SubFingerprintData(
                new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 },
                1,
                0,
                new ModelReference<int>(FirstSubFingerprintId),
                firstTrackReference);
            SubFingerprintData secondResult = new SubFingerprintData(
                new byte[] { 11, 2, 13, 4, 15, 6, 7, 8, 10, 12 },
                2,
                0.928,
                new ModelReference<int>(SecondSubFingerprintId),
                new ModelReference<int>(SecondTrackId));
            SubFingerprintData thirdResult = new SubFingerprintData(
                new byte[] { 1, 2, 3, 4, 5, 15, 7, 8, 10, 12 },
                3,
                0.928 * 2,
                new ModelReference<int>(SecondSubFingerprintId),
                new ModelReference<int>(ThirdTrackId));

            modelService.Setup(
                service => service.ReadSubFingerprintDataByHashBucketsWithThreshold(buckets, DefaultThreshold)).Returns(
                    new List<SubFingerprintData> { firstResult, secondResult, thirdResult });
            modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)).Returns(
                new TrackData { ISRC = "isrc", TrackReference = firstTrackReference });
            modelService.Setup(service => service.ReadTrackByReference(thirdTrackReference)).Returns(
                new TrackData { ISRC = "isrc_2", TrackReference = thirdTrackReference });

            var queryResult = queryFingerprintService.Query(
                modelService.Object,
                new List<HashedFingerprint> { queryHash },
                new CustomQueryConfiguration
                    {
                        MaximumNumberOfTracksToReturnAsResult = 2, ThresholdVotes = DefaultThreshold 
                    });

            Assert.IsTrue(queryResult.IsSuccessful);
            Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC);
            Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference);
            Assert.AreEqual(9, queryResult.BestMatch.Similarity);
            Assert.AreEqual(3, queryResult.AnalyzedCandidatesCount);
            Assert.AreEqual(2, queryResult.ResultEntries.Count);
            Assert.AreEqual(firstTrackReference, queryResult.ResultEntries[0].Track.TrackReference);
            Assert.AreEqual(thirdTrackReference, queryResult.ResultEntries[1].Track.TrackReference);
        }
Beispiel #19
0
        public void ReadTest()
        {
            TrackData track                   = new TrackData("isrc", "artist", "title", "album", 1986, 200);
            var       trackReference          = TrackDao.InsertTrack(track);
            var       subFingerprintReference = SubFingerprintDao.InsertSubFingerprint(GenericSignature, trackReference);

            SubFingerprintData actual = SubFingerprintDao.ReadSubFingerprint(subFingerprintReference);

            AsserSubFingerprintsAreEqual(new SubFingerprintData(GenericSignature, subFingerprintReference, trackReference), actual);
        }
        private void AsserSubFingerprintsAreEqual(SubFingerprintData expected, SubFingerprintData actual)
        {
            Assert.AreEqual(expected.SubFingerprintReference, actual.SubFingerprintReference);
            Assert.AreEqual(expected.TrackReference, actual.TrackReference);
            for (int i = 0; i < expected.Signature.Length; i++)
            {
                Assert.AreEqual(expected.Signature[i], actual.Signature[i]);
            }

            Assert.AreEqual(expected.SequenceNumber, actual.SequenceNumber);
            Assert.IsTrue(System.Math.Abs(expected.SequenceAt - actual.SequenceAt) < Epsilon);
        }
Beispiel #21
0
        private SubFingerprintData GetSubFingerprintData(SubFingerprintDTO dto)
        {
            long[] resultHashBins = this.dictionaryToHashConverter.FromSolrDictionaryToHashes(dto.Hashes);
            var    sub            = new SubFingerprintData(
                resultHashBins,
                dto.SequenceNumber,
                dto.SequenceAt,
                new SolrModelReference(dto.SubFingerprintId),
                new SolrModelReference(dto.TrackId));

            return(sub);
        }
Beispiel #22
0
        private void AsserSubFingerprintsAreEqual(SubFingerprintData expected, SubFingerprintData actual)
        {
            Assert.AreEqual(expected.SubFingerprintReference, actual.SubFingerprintReference);
            Assert.AreEqual(expected.TrackReference, actual.TrackReference);
            for (int i = 0; i < expected.Signature.Length; i++)
            {
                Assert.AreEqual(expected.Signature[i], actual.Signature[i]);
            }

            Assert.AreEqual(expected.SequenceNumber, actual.SequenceNumber);
            Assert.IsTrue(System.Math.Abs(expected.SequenceAt - actual.SequenceAt) < Epsilon);
        }
        public void MaximumNumberOfReturnedTracksIsLessThanAnalyzedCandidatesResultsTest()
        {
            var queryHash = new HashedFingerprint(GenericSignature(), GenericHashBuckets(), 1, 0, Enumerable.Empty<string>());
            const int DefaultThreshold = 5;
            const int FirstTrackId = 20;
            const int SecondTrackId = 21;
            const int ThirdTrackId = 22;
            const int FirstSubFingerprintId = 10;
            const int SecondSubFingerprintId = 11;
            var firstTrackReference = new ModelReference<int>(FirstTrackId);
            var secondTrackReference = new ModelReference<int>(SecondTrackId);
            var firstResult = new SubFingerprintData(
                GenericHashBuckets(),
                1,
                0,
                new ModelReference<int>(FirstSubFingerprintId),
                firstTrackReference);
            var secondResult = new SubFingerprintData(
                GenericHashBuckets(),
                2,
                0.928,
                new ModelReference<int>(SecondSubFingerprintId),
                secondTrackReference);
            var thirdResult = new SubFingerprintData(
                GenericHashBuckets(),
                3,
                0.928 * 2,
                new ModelReference<int>(SecondSubFingerprintId),
                new ModelReference<int>(ThirdTrackId));

            var customQueryConfiguration = new DefaultQueryConfiguration { MaxTracksToReturn = 2, ThresholdVotes = DefaultThreshold };

            modelService.Setup(service => service.SupportsBatchedSubFingerprintQuery).Returns(false);
            modelService.Setup(
                service => service.ReadSubFingerprints(It.IsAny<long[]>(), customQueryConfiguration)).Returns(
                    new List<SubFingerprintData> { firstResult, secondResult, thirdResult });
            modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)).Returns(
                new TrackData { ISRC = "isrc", TrackReference = firstTrackReference });
            modelService.Setup(service => service.ReadTrackByReference(secondTrackReference)).Returns(
               new TrackData { ISRC = "isrc_1", TrackReference = secondTrackReference });

            var queryResult = queryFingerprintService.Query(new List<HashedFingerprint> { queryHash }, customQueryConfiguration, modelService.Object);

            Assert.IsTrue(queryResult.ContainsMatches);
            Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC);
            Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference);
            Assert.AreEqual(50, queryResult.BestMatch.HammingSimilaritySum);
            Assert.AreEqual(2, queryResult.ResultEntries.Count());
            var results = queryResult.ResultEntries.ToList();
            Assert.AreEqual(firstTrackReference, results[0].Track.TrackReference);
            Assert.AreEqual(secondTrackReference,  results[1].Track.TrackReference);
        }
 public void AccumulateHammingSimilarity(IEnumerable <SubFingerprintData> candidates, HashedFingerprint expected, ConcurrentDictionary <IModelReference, ResultEntryAccumulator> accumulator)
 {
     foreach (var subFingerprint in candidates)
     {
         byte[]             signature         = hashConverter.ToBytes(subFingerprint.Hashes, expected.SubFingerprint.Length);
         int                hammingSimilarity = CalculateHammingSimilarity(expected.SubFingerprint, signature);
         SubFingerprintData fingerprint       = subFingerprint;
         accumulator.AddOrUpdate(
             subFingerprint.TrackReference,
             reference => new ResultEntryAccumulator(expected, fingerprint, hammingSimilarity),
             (reference, entryAccumulator) => entryAccumulator.Add(expected, fingerprint, hammingSimilarity));
     }
 }
        public void MaximumNumberOfReturnedTracksIsLessThanAnalyzedCandidatesResultsTest()
        {
            var       queryHash              = new HashedFingerprint(GenericHashBuckets(), 1, 0, Array.Empty <byte>());
            const int defaultThreshold       = 5;
            const int firstTrackId           = 20;
            const int secondTrackId          = 21;
            const int thirdTrackId           = 22;
            const int firstSubFingerprintId  = 10;
            const int secondSubFingerprintId = 11;
            var       firstTrackReference    = new ModelReference <int>(firstTrackId);
            var       secondTrackReference   = new ModelReference <int>(secondTrackId);
            var       firstResult            = new SubFingerprintData(GenericHashBuckets(), 1, 0,
                                                                      new ModelReference <int>(firstSubFingerprintId),
                                                                      firstTrackReference);
            var secondResult = new SubFingerprintData(GenericHashBuckets(), 2, 0.928f,
                                                      new ModelReference <int>(secondSubFingerprintId),
                                                      secondTrackReference);
            var thirdResult = new SubFingerprintData(GenericHashBuckets(), 3, 0.928f * 2,
                                                     new ModelReference <int>(secondSubFingerprintId),
                                                     new ModelReference <int>(thirdTrackId));

            var customQueryConfiguration = new DefaultQueryConfiguration {
                MaxTracksToReturn = 2, ThresholdVotes = defaultThreshold
            };

            modelService
            .Setup(service => service.Query(It.IsAny <Hashes>(), customQueryConfiguration))
            .Returns(new[] { firstResult, secondResult, thirdResult });

            modelService.Setup(service => service.ReadTracksByReferences(new[] { firstTrackReference, secondTrackReference }))
            .Returns(new List <TrackData>
            {
                new TrackData("id", string.Empty, string.Empty, 0d, firstTrackReference),
                new TrackData("id_1", string.Empty, string.Empty, 0d, secondTrackReference)
            });

            var hashes = new Hashes(new List <HashedFingerprint> {
                queryHash
            }, 1.48f, DateTime.Now, Enumerable.Empty <string>());
            var queryResult = queryFingerprintService.Query(hashes, customQueryConfiguration, modelService.Object);

            Assert.IsTrue(queryResult.ContainsMatches);
            Assert.AreEqual("id", queryResult.BestMatch.Track.Id);
            Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference);
            Assert.AreEqual(100, queryResult.BestMatch.Score);
            Assert.AreEqual(2, queryResult.ResultEntries.Count());
            var results = queryResult.ResultEntries.ToList();

            Assert.AreEqual(firstTrackReference, results[0].Track.TrackReference);
            Assert.AreEqual(secondTrackReference, results[1].Track.TrackReference);
        }
        private IEnumerable <SubFingerprintData> GetSubFingerprints(int totalCount, int countPerTrack)
        {
            var subFingerprints = new List <SubFingerprintData>();
            int tracksCount     = totalCount / countPerTrack;

            for (int i = 0; i < totalCount; ++i)
            {
                int trackId        = i % tracksCount;
                var subFingerprint = new SubFingerprintData(new int[25], (uint)i, i * 1.48f, new ModelReference <ulong>((ulong)i), new ModelReference <int>(trackId));
                subFingerprints.Add(subFingerprint);
            }

            return(subFingerprints);
        }
Beispiel #27
0
        public void Add(HashedFingerprint hashedFingerprint, SubFingerprintData subFingerprintData, int hammingSimilarity)
        {
            similaritySumPerTrack.AddOrUpdate(subFingerprintData.TrackReference, hammingSimilarity, (key, oldHamming) => oldHamming + hammingSimilarity);
            var matchedWith = new MatchedWith(hashedFingerprint.StartsAt, subFingerprintData.SequenceAt, hammingSimilarity);

            if (!matches.TryGetValue(hashedFingerprint.SequenceNumber, out var matched))
            {
                matches.Add(hashedFingerprint.SequenceNumber, new Candidates(subFingerprintData.TrackReference, matchedWith));
            }
            else
            {
                matched.AddOrUpdateNewMatch(subFingerprintData.TrackReference, matchedWith);
            }
        }
Beispiel #28
0
        private IEnumerable <SubFingerprintData> GetSubFingerprintsForTrack(ModelReference <int> trackReference, int candidatesCount)
        {
            List <SubFingerprintData> subFingerprints = new List <SubFingerprintData>();
            const double OneFingerprintLength         = 0.256;

            for (int i = 0; i < candidatesCount - 1; ++i)
            {
                var sub = new SubFingerprintData(GenericHashBuckets(), i, OneFingerprintLength * i, new ModelReference <int>(i), trackReference);
                sub.Hashes[0] = 0;
                subFingerprints.Add(sub);
            }

            subFingerprints.Add(new SubFingerprintData(GenericHashBuckets(), candidatesCount - 1, OneFingerprintLength * candidatesCount, new ModelReference <int>(candidatesCount), trackReference));
            return(subFingerprints);
        }
Beispiel #29
0
        public void AddSubfingerprint(HashedFingerprint hashedFingerprint, IModelReference trackReference)
        {
            var subFingerprintReference = new ModelReference <ulong>((ulong)Interlocked.Increment(ref subFingerprintReferenceCounter));
            var subFingerprintData      = new SubFingerprintData(
                hashedFingerprint.HashBins,
                hashedFingerprint.SequenceNumber,
                hashedFingerprint.StartsAt,
                subFingerprintReference,
                trackReference)
            {
                Clusters = hashedFingerprint.Clusters
            };

            SubFingerprints[(ulong)subFingerprintData.SubFingerprintReference.Id] = subFingerprintData;
            InsertHashes(hashedFingerprint.HashBins, subFingerprintReference.Id);
        }
Beispiel #30
0
        public void ReadTest()
        {
            TrackData track   = new TrackData("isrc", "artist", "title", "album", 1986, 200);
            int       trackId = TrackDao.Insert(track);

            long subFingerprintId = SubFingerprintDao.Insert(GenericSignature, trackId);

            SubFingerprintData actual = SubFingerprintDao.ReadById(subFingerprintId);

            AsserSubFingerprintsAreEqual(
                new SubFingerprintData(
                    GenericSignature,
                    new ModelReference <long>(subFingerprintId),
                    new ModelReference <int>(trackId)),
                actual);
        }
Beispiel #31
0
 public void Add(HashedFingerprint queryFingerprint, SubFingerprintData resultSubFingerprint, double score)
 {
     lock (lockObject)
     {
         scoreSumPerTrack.AddOrUpdate(resultSubFingerprint.TrackReference, score, (key, old) => old + score);
         var matchedWith = new MatchedWith(queryFingerprint.SequenceNumber, queryFingerprint.StartsAt, resultSubFingerprint.SequenceNumber, resultSubFingerprint.SequenceAt, score);
         if (!sequenceToCandidates.TryGetValue(queryFingerprint.SequenceNumber, out Candidates candidates))
         {
             sequenceToCandidates.Add(queryFingerprint.SequenceNumber, new Candidates(resultSubFingerprint.TrackReference, matchedWith));
         }
         else
         {
             candidates.AddNewMatchForTrack(resultSubFingerprint.TrackReference, matchedWith);
         }
     }
 }
        public long Insert(byte[] signature, int trackId)
        {
            lock (lockObject)
            {
                counter++;
                SubFingerprintData subFingerprint = new SubFingerprintData(signature, new ModelReference <long>(counter), new ModelReference <int>(trackId));
                storage.SubFingerprints[counter] = subFingerprint;
                if (!storage.TracksHashes.ContainsKey(trackId))
                {
                    storage.TracksHashes[trackId] = new ConcurrentDictionary <long, HashData>();
                }

                storage.TracksHashes[trackId][counter] = new HashData {
                    SubFingerprint = signature
                };
                return(counter);
            }
        }
        private IEnumerable<SubFingerprintData> ConvertResults(IEnumerable<SubFingerprintDTO> results)
        {
            var all = new List<SubFingerprintData>();
            foreach (var dto in results)
            {
                long[] resultHashBins = this.dictionaryToHashConverter.FromSolrDictionary(dto.Hashes);
                byte[] signature = this.hashConverter.ToBytes(resultHashBins, 100); // TODO refactor, extracting this constant
                var sub = new SubFingerprintData(
                    signature,
                    dto.SequenceNumber,
                    dto.SequenceAt,
                    new SolrModelReference(dto.SubFingerprintId),
                    new SolrModelReference(dto.TrackId));
                all.Add(sub);
            }

            return all;
        }
Beispiel #34
0
        public void AccumulateHammingSimilarity(
            IEnumerable <SubFingerprintData> candidates,
            HashedFingerprint expected,
            ConcurrentDictionary <IModelReference, ResultEntryAccumulator> accumulator,
            int keysPerHash)
        {
            foreach (var subFingerprint in candidates)
            {
                int hammingSimilarity = CalculateHammingSimilarity(
                    expected.HashBins,
                    subFingerprint.Hashes,
                    keysPerHash);

                SubFingerprintData fingerprint = subFingerprint;
                accumulator.AddOrUpdate(
                    subFingerprint.TrackReference,
                    reference => new ResultEntryAccumulator(expected, fingerprint, hammingSimilarity),
                    (reference, entryAccumulator) => entryAccumulator.Add(expected, fingerprint, hammingSimilarity));
            }
        }
Beispiel #35
0
        public void MatchesShouldBeOrderedByQueryAt()
        {
            int runs = 1000;

            var groupedQueryResults = new GroupedQueryResults(Enumerable.Empty <HashedFingerprint>());
            var reference           = new ModelReference <int>(1);

            Parallel.For(0, runs, i =>
            {
                var hashed    = new HashedFingerprint(new int[0], (uint)i, i, new string[0]);
                var candidate = new SubFingerprintData(new int[0], (uint)i, runs - i, new ModelReference <uint>((uint)i), reference);
                groupedQueryResults.Add(hashed, candidate, i);
            });

            var matchedWith = groupedQueryResults.GetMatchesForTrackOrderedByQueryAt(reference);

            var ordered = matchedWith.Select(with => (int)with.QueryAt).ToList();

            CollectionAssert.AreEqual(Enumerable.Range(0, runs), ordered);
        }
Beispiel #36
0
        public void ShouldAccumulateResults()
        {
            int runs = 1000;

            var groupedQueryResults = new GroupedQueryResults(Enumerable.Empty <HashedFingerprint>());
            var references = new[] { 1, 2, 3, 4, 5 }.Select(id => new ModelReference <int>(id)).ToArray();

            Parallel.For(0, runs, i =>
            {
                var hashed    = new HashedFingerprint(new int[0], (uint)i, i * 0.05f, new string[0]);
                var candidate = new SubFingerprintData(new int[0], (uint)i, i * 0.07f, new ModelReference <uint>((uint)i), references[i % references.Length]);
                groupedQueryResults.Add(hashed, candidate, i);
            });

            Assert.IsTrue(groupedQueryResults.ContainsMatches);
            for (int i = 0; i < references.Length; ++i)
            {
                int pertrack = runs / references.Length;
                int ham      = (pertrack - 1) * runs / 2 + pertrack * i;
                Assert.AreEqual(ham, groupedQueryResults.GetHammingSimilaritySumForTrack(references[i]));
            }

            var modelReferences = groupedQueryResults.GetTopTracksByHammingSimilarity(references.Length * 2).ToList();

            for (int i = 0; i < references.Length; ++i)
            {
                Assert.AreEqual(references[references.Length - i - 1], modelReferences[i]);
            }

            var bestMatch = groupedQueryResults.GetBestMatchForTrack(references.Last());

            Assert.AreEqual((runs - 1) * 0.05f, bestMatch.QueryAt, 0.000001);
            Assert.AreEqual((runs - 1) * 0.07f, bestMatch.ResultAt, 0.000001);

            for (int i = 0; i < references.Length; ++i)
            {
                var matchedWith = groupedQueryResults.GetMatchesForTrackOrderedByQueryAt(references[i]).ToList();
                Assert.AreEqual(runs / references.Length, matchedWith.Count);
            }
        }
 public ResultEntryAccumulator(HashedFingerprint hashedFingerprint, SubFingerprintData match, int hammingSimilarity)
 {
     BestMatch = new MatchedPair(hashedFingerprint, match, hammingSimilarity);
     Add(hashedFingerprint, match, hammingSimilarity);
 }
        public void ShouldSortMatchesProperly()
        {
            List<SubFingerprintData> subFingerprints = new List<SubFingerprintData>();
            var trackReference = new ModelReference<int>(0);
            const double OneFingerprintLength = 1.0d;
            const int CandidatesCount = 20;
            for (int i = 0; i < CandidatesCount; ++i)
            {
                var sub = new SubFingerprintData(GenericHashBuckets(), i, OneFingerprintLength * (CandidatesCount - i), new ModelReference<int>(i), trackReference);
                subFingerprints.Add(sub);
            }

            var acumulator = new ConcurrentDictionary<IModelReference, ResultEntryAccumulator>();

            similarityUtility.AccumulateHammingSimilarity(subFingerprints, new HashedFingerprint(GenericSignature(), GenericHashBuckets(), 1, 0d, Enumerable.Empty<string>()), acumulator);

            var expected = Enumerable.Range(1, 20);
            var actual = acumulator[trackReference].Matches.Select(m => m.SubFingerprint.SequenceAt).ToList();

            CollectionAssert.AreEqual(expected, actual);
        }
        private IEnumerable<SubFingerprintData> GetSubFingerprintsForTrack(ModelReference<int> trackReference, int candidatesCount)
        {
            List<SubFingerprintData> subFingerprints = new List<SubFingerprintData>();
            const double OneFingerprintLength = 0.256;
            for (int i = 0; i < candidatesCount - 1; ++i)
            {
                var sub = new SubFingerprintData(GenericHashBuckets(), i, OneFingerprintLength * i, new ModelReference<int>(i), trackReference);
                sub.Hashes[0] = 0;
                subFingerprints.Add(sub);
            }

            subFingerprints.Add(new SubFingerprintData(GenericHashBuckets(), candidatesCount - 1, OneFingerprintLength * candidatesCount, new ModelReference<int>(candidatesCount), trackReference));
            return subFingerprints;
        }
        public void OnlyTracksWithGroupIdAreConsideredAsPotentialCandidatesTest()
        {
            long[] buckets = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
            var queryHash = new HashedFingerprint(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets, 0, 0);
            const int DefaultThreshold = 5;
            const int FirstTrackId = 20;
            const int FirstSubFingerprintId = 10;
            var firstTrackReference = new ModelReference<int>(FirstTrackId);
            SubFingerprintData firstResult = new SubFingerprintData(
                new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 },
                1,
                0.928,
                new ModelReference<int>(FirstSubFingerprintId),
                firstTrackReference);
      
            modelService.Setup(
                service =>
                service.ReadSubFingerprintDataByHashBucketsThresholdWithGroupId(buckets, DefaultThreshold, "group-id"))
                .Returns(new List<SubFingerprintData> { firstResult });
            modelService.Setup(service => service.ReadTrackByReference(firstTrackReference)).Returns(
                new TrackData { ISRC = "isrc", TrackReference = firstTrackReference });

            var queryResult = queryFingerprintService.Query(
                modelService.Object,
                new List<HashedFingerprint> { queryHash },
                new CustomQueryConfiguration { TrackGroupId = "group-id" });

            Assert.IsTrue(queryResult.IsSuccessful);
            Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC);
            Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference);
        }
        public void HammingSimilarityIsSummedUpAccrossAllSubFingerprintsTest()
        {
            long[] buckets = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
            var queryHash = new HashedFingerprint(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 11 }, buckets, 0, 0);
            const int DefaultThreshold = 5;
            const int FirstTrackId = 20;
            const int FirstSubFingerprintId = 10;
            const int SecondSubFingerprintId = 11;
            var firstTrackReference = new ModelReference<int>(FirstTrackId);
            SubFingerprintData firstResult = new SubFingerprintData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 1, 0, new ModelReference<int>(FirstSubFingerprintId), firstTrackReference);
            SubFingerprintData secondResult = new SubFingerprintData(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 10, 12 }, 2, 0.928, new ModelReference<int>(SecondSubFingerprintId), firstTrackReference);

            modelService.Setup(service => service.ReadSubFingerprintDataByHashBucketsWithThreshold(buckets, DefaultThreshold))
                        .Returns(new List<SubFingerprintData> { firstResult, secondResult });
            modelService.Setup(service => service.ReadTrackByReference(firstTrackReference))
                        .Returns(new TrackData { ISRC = "isrc", TrackReference = firstTrackReference });

            var queryResult = queryFingerprintService.Query(modelService.Object, new List<HashedFingerprint> { queryHash }, new DefaultQueryConfiguration());

            Assert.IsTrue(queryResult.IsSuccessful);
            Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC);
            Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference);
            Assert.AreEqual(9 + 8, queryResult.BestMatch.Similarity);
            Assert.AreEqual(1, queryResult.AnalyzedCandidatesCount);
            Assert.AreEqual(1, queryResult.ResultEntries.Count);
        }