public void HammingSimilarityIsSummedUpAcrossAllSubFingerprintsTest() { var queryHash = new HashedFingerprint(GenericHashBuckets(), 0, 0, Array.Empty <byte>()); const int firstTrackId = 20; const int firstSubFingerprintId = 10; const int secondSubFingerprintId = 11; var firstTrackReference = new ModelReference <int>(firstTrackId); var firstResult = new SubFingerprintData(GenericHashBuckets(), 1, 0, new ModelReference <int>(firstSubFingerprintId), firstTrackReference); var secondResult = new SubFingerprintData(GenericHashBuckets(), 2, 0.928f, new ModelReference <int>(secondSubFingerprintId), firstTrackReference); var defaultQueryConfiguration = new DefaultQueryConfiguration(); modelService.Setup(service => service.Query( It.IsAny <Hashes>(), It.IsAny <QueryConfiguration>())).Returns(new[] { firstResult, secondResult }); modelService.Setup(service => service.ReadTracksByReferences(new[] { firstTrackReference })).Returns( new List <TrackData> { new TrackData("id", string.Empty, string.Empty, 0d, firstTrackReference) }); var hashes = new Hashes(new List <HashedFingerprint> { queryHash }, 1.48f, DateTime.Now, Enumerable.Empty <string>()); var queryResult = queryFingerprintService.Query(hashes, defaultQueryConfiguration, modelService.Object); Assert.IsTrue(queryResult.ContainsMatches); Assert.AreEqual("id", queryResult.BestMatch.Track.Id); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(200, queryResult.BestMatch.Score); Assert.AreEqual(1, queryResult.ResultEntries.Count()); }
public QueryCommand(IFingerprintCommandBuilder fingerprintCommandBuilder, IQueryFingerprintService queryFingerprintService) { this.fingerprintCommandBuilder = fingerprintCommandBuilder; this.queryFingerprintService = queryFingerprintService; FingerprintConfiguration = new EfficientFingerprintConfigurationForQuerying(); QueryConfiguration = new DefaultQueryConfiguration(); }
public void HammingSimilarityIsSummedUpAccrossAllSubFingerprintsTest() { var queryHash = new HashedFingerprint(GenericHashBuckets(), 0, 0, Enumerable.Empty <string>()); const int FirstTrackId = 20; const int FirstSubFingerprintId = 10; const int SecondSubFingerprintId = 11; var firstTrackReference = new ModelReference <int>(FirstTrackId); var firstResult = new SubFingerprintData(GenericHashBuckets(), 1, 0, new ModelReference <int>(FirstSubFingerprintId), firstTrackReference); var secondResult = new SubFingerprintData(GenericHashBuckets(), 2, 0.928f, new ModelReference <int>(SecondSubFingerprintId), firstTrackReference); var defaultQueryConfiguration = new DefaultQueryConfiguration(); modelService.Setup(service => service.SupportsBatchedSubFingerprintQuery).Returns(false); modelService.Setup(service => service.ReadSubFingerprints(It.IsAny <int[]>(), defaultQueryConfiguration)) .Returns(new List <SubFingerprintData> { firstResult, secondResult }); modelService.Setup(service => service.ReadTracksByReferences(new [] { firstTrackReference })) .Returns(new List <TrackData> { new TrackData { ISRC = "isrc", TrackReference = firstTrackReference } }); var queryResult = queryFingerprintService.Query(new List <HashedFingerprint> { queryHash }, defaultQueryConfiguration, modelService.Object); Assert.IsTrue(queryResult.ContainsMatches); Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(200, queryResult.BestMatch.HammingSimilaritySum); Assert.AreEqual(1, queryResult.ResultEntries.Count()); }
public CustomQueryConfiguration() { DefaultQueryConfiguration defaultConfiguration = new DefaultQueryConfiguration(); NumberOfLSHTables = defaultConfiguration.NumberOfLSHTables; NumberOfMinHashesPerTable = defaultConfiguration.NumberOfMinHashesPerTable; ThresholdVotes = defaultConfiguration.ThresholdVotes; MaximumNumberOfTracksToReturnAsResult = defaultConfiguration.MaximumNumberOfTracksToReturnAsResult; }
public void CustomQueryConfigurationInheritsDefaultValuesTest() { var queryConfiguration = new CustomQueryConfiguration(); var defaultConfiguration = new DefaultQueryConfiguration(); Assert.AreEqual(defaultConfiguration.ThresholdVotes, queryConfiguration.ThresholdVotes); Assert.AreEqual(defaultConfiguration.MaximumNumberOfTracksToReturnAsResult, queryConfiguration.MaximumNumberOfTracksToReturnAsResult); }
public void QueryCommandIsBuiltWithCustomQueryConfigCorrectly() { var customConfig = new DefaultQueryConfiguration(); var command = queryCommandBuilder.BuildQueryCommand() .From("path-to-file") .WithQueryConfig(customConfig) .UsingServices(modelService.Object, audioService.Object); Assert.AreSame(command.QueryConfiguration, customConfig); }
public async Task ReadByTrackGroupIdWorksAsExpectedTest() { var modelReferenceTracker = new UIntModelReferenceTracker(); var firstTrack = new TrackInfo("id-1", string.Empty, string.Empty, new Dictionary <string, string> { { "group-id", "first-group-id" } }, MediaType.Audio); var hashedFingerprintsForFirstTrack = await GetHashedFingerprints(); var firstTrackData = InsertTrackAndHashes(firstTrack, hashedFingerprintsForFirstTrack, modelReferenceTracker); var secondTrack = new TrackInfo("id-2", string.Empty, string.Empty, new Dictionary <string, string> { { "group-id", "second-group-id" } }, MediaType.Audio); var hashedFingerprintsForSecondTrack = await GetHashedFingerprints(); var secondTrackData = InsertTrackAndHashes(secondTrack, hashedFingerprintsForSecondTrack, modelReferenceTracker); const int thresholdVotes = 25; var queryConfigWithFirstGroupId = new DefaultQueryConfiguration { ThresholdVotes = thresholdVotes, MetaFieldsFilter = new Dictionary <string, string> { { "group-id", "first-group-id" } } }; var queryConfigWithSecondGroupId = new DefaultQueryConfiguration { ThresholdVotes = thresholdVotes, MetaFieldsFilter = new Dictionary <string, string> { { "group-id", "second-group-id" } } }; foreach (var hashedFingerprint in hashedFingerprintsForFirstTrack) { var subFingerprintData = subFingerprintDao.ReadSubFingerprints(new[] { hashedFingerprint.HashBins }, queryConfigWithFirstGroupId).ToList(); Assert.AreEqual(1, subFingerprintData.Count); Assert.AreEqual(firstTrackData.TrackReference, subFingerprintData[0].TrackReference); subFingerprintData = subFingerprintDao.ReadSubFingerprints(new[] { hashedFingerprint.HashBins }, queryConfigWithSecondGroupId).ToList(); Assert.AreEqual(1, subFingerprintData.Count); Assert.AreEqual(secondTrackData.TrackReference, subFingerprintData[0].TrackReference); subFingerprintData = subFingerprintDao .ReadSubFingerprints(new[] { hashedFingerprint.HashBins }, new DefaultQueryConfiguration { ThresholdVotes = thresholdVotes }) .ToList(); Assert.AreEqual(2, subFingerprintData.Count); } }
public void MaximumNumberOfReturnedTracksIsLessThanAnalyzedCandidatesResultsTest() { var queryHash = new HashedFingerprint(GenericHashBuckets(), 1, 0, Array.Empty <byte>()); const int defaultThreshold = 5; const int firstTrackId = 20; const int secondTrackId = 21; const int thirdTrackId = 22; const int firstSubFingerprintId = 10; const int secondSubFingerprintId = 11; var firstTrackReference = new ModelReference <int>(firstTrackId); var secondTrackReference = new ModelReference <int>(secondTrackId); var firstResult = new SubFingerprintData(GenericHashBuckets(), 1, 0, new ModelReference <int>(firstSubFingerprintId), firstTrackReference); var secondResult = new SubFingerprintData(GenericHashBuckets(), 2, 0.928f, new ModelReference <int>(secondSubFingerprintId), secondTrackReference); var thirdResult = new SubFingerprintData(GenericHashBuckets(), 3, 0.928f * 2, new ModelReference <int>(secondSubFingerprintId), new ModelReference <int>(thirdTrackId)); var customQueryConfiguration = new DefaultQueryConfiguration { MaxTracksToReturn = 2, ThresholdVotes = defaultThreshold }; modelService .Setup(service => service.Query(It.IsAny <Hashes>(), customQueryConfiguration)) .Returns(new[] { firstResult, secondResult, thirdResult }); modelService.Setup(service => service.ReadTracksByReferences(new[] { firstTrackReference, secondTrackReference })) .Returns(new List <TrackData> { new TrackData("id", string.Empty, string.Empty, 0d, firstTrackReference), new TrackData("id_1", string.Empty, string.Empty, 0d, secondTrackReference) }); var hashes = new Hashes(new List <HashedFingerprint> { queryHash }, 1.48f, DateTime.Now, Enumerable.Empty <string>()); var queryResult = queryFingerprintService.Query(hashes, customQueryConfiguration, modelService.Object); Assert.IsTrue(queryResult.ContainsMatches); Assert.AreEqual("id", queryResult.BestMatch.Track.Id); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(100, queryResult.BestMatch.Score); Assert.AreEqual(2, queryResult.ResultEntries.Count()); var results = queryResult.ResultEntries.ToList(); Assert.AreEqual(firstTrackReference, results[0].Track.TrackReference); Assert.AreEqual(secondTrackReference, results[1].Track.TrackReference); }
private QueryResult GetQueryResultForSong(string queryAudioFile, double secondsToAnalyze, int startAtSecond) { QueryResult queryResult = null; DefaultQueryConfiguration queryConfiguration = new DefaultQueryConfiguration(); System.Threading.Tasks.Task <QueryResult> queryTask = QueryCommandBuilder.Instance.BuildQueryCommand() .From(queryAudioFile, secondsToAnalyze, startAtSecond) .WithQueryConfig(queryConfiguration) .UsingServices(mModelService, mAudioService) .Query(); queryResult = queryTask.Result; return(queryResult); }
public void NoResultsReturnedFromUnderlyingStorageTest() { var queryHash = new HashedFingerprint(new[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 0, 0, Array.Empty <byte>()); var customQueryConfiguration = new DefaultQueryConfiguration { MaxTracksToReturn = 1, ThresholdVotes = 10, FingerprintConfiguration = new DefaultFingerprintConfiguration() }; modelService.Setup(service => service.Query(It.IsAny <Hashes>(), customQueryConfiguration)).Returns(new List <SubFingerprintData>()); var hashes = new Hashes(new List <HashedFingerprint> { queryHash }, 148f, DateTime.Now, Enumerable.Empty <string>()); var queryResult = queryFingerprintService.Query(hashes, customQueryConfiguration, modelService.Object); Assert.IsFalse(queryResult.ContainsMatches); Assert.IsNull(queryResult.BestMatch); }
public void ShouldGetBestCandidatesByHammingDistance() { var modelService = new Mock <IModelService>(MockBehavior.Strict); var trackReference = new ModelReference <int>(3); modelService.Setup(service => service.ReadTracksByReferences(new[] { trackReference })).Returns(new List <TrackData> { new TrackData { ISRC = "isrc-1234-1234", TrackReference = trackReference } }); var queryConfiguration = new DefaultQueryConfiguration { MaxTracksToReturn = 1 }; var query = new List <HashedFingerprint> { new HashedFingerprint(null, 1, 0, Enumerable.Empty <string>()), new HashedFingerprint(null, 1, 4, Enumerable.Empty <string>()), new HashedFingerprint(null, 1, 8, Enumerable.Empty <string>()) }; var first = new ResultEntryAccumulator(query[0], new SubFingerprintData(null, 1, 0, null, null), 100); var second = new ResultEntryAccumulator(query[1], new SubFingerprintData(null, 1, 4, null, null), 99); var third = new ResultEntryAccumulator(query[2], new SubFingerprintData(null, 1, 8, null, null), 101); var hammingSimilarties = new Dictionary <IModelReference, ResultEntryAccumulator> { { new ModelReference <int>(1), first }, { new ModelReference <int>(2), second }, { new ModelReference <int>(3), third }, }; var best = queryMath.GetBestCandidates( query, hammingSimilarties, queryConfiguration.MaxTracksToReturn, modelService.Object, queryConfiguration.FingerprintConfiguration); Assert.AreEqual(1, best.Count); Assert.AreEqual("isrc-1234-1234", best[0].Track.ISRC); Assert.AreEqual(9.48d, best[0].QueryLength, 0.01); Assert.AreEqual(0d, best[0].TrackStartsAt); modelService.VerifyAll(); }
public void NoResultsReturnedFromUnderlyingStorageTest() { var queryHash = new HashedFingerprint(new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 0, 0, Enumerable.Empty <string>()); var customQueryConfiguration = new DefaultQueryConfiguration { MaxTracksToReturn = 1, ThresholdVotes = 10, FingerprintConfiguration = new DefaultFingerprintConfiguration() }; modelService.Setup(service => service.SupportsBatchedSubFingerprintQuery).Returns(false); modelService.Setup(service => service.ReadSubFingerprints(It.IsAny <int[]>(), customQueryConfiguration)).Returns(new List <SubFingerprintData>()); var queryResult = queryFingerprintService.Query(new List <HashedFingerprint> { queryHash }, customQueryConfiguration, modelService.Object); Assert.IsFalse(queryResult.ContainsMatches); Assert.IsNull(queryResult.BestMatch); Assert.AreEqual(0, queryResult.ResultEntries.Count()); }
internal QueryCommand(IFingerprintCommandBuilder fingerprintCommandBuilder, IQueryFingerprintService queryFingerprintService) { this.fingerprintCommandBuilder = fingerprintCommandBuilder; this.queryFingerprintService = queryFingerprintService; QueryConfiguration = new DefaultQueryConfiguration(); }
public void MaximumNumberOfReturnedTracksIsLessThanAnalyzedCandidatesResultsTest() { var queryHash = new HashedFingerprint(GenericHashBuckets(), 1, 0, Enumerable.Empty <string>()); const int DefaultThreshold = 5; const int FirstTrackId = 20; const int SecondTrackId = 21; const int ThirdTrackId = 22; const int FirstSubFingerprintId = 10; const int SecondSubFingerprintId = 11; var firstTrackReference = new ModelReference <int>(FirstTrackId); var secondTrackReference = new ModelReference <int>(SecondTrackId); var firstResult = new SubFingerprintData( GenericHashBuckets(), 1, 0, new ModelReference <int>(FirstSubFingerprintId), firstTrackReference); var secondResult = new SubFingerprintData( GenericHashBuckets(), 2, 0.928f, new ModelReference <int>(SecondSubFingerprintId), secondTrackReference); var thirdResult = new SubFingerprintData( GenericHashBuckets(), 3, 0.928f * 2, new ModelReference <int>(SecondSubFingerprintId), new ModelReference <int>(ThirdTrackId)); var customQueryConfiguration = new DefaultQueryConfiguration { MaxTracksToReturn = 2, ThresholdVotes = DefaultThreshold }; modelService.Setup(service => service.SupportsBatchedSubFingerprintQuery).Returns(false); modelService.Setup( service => service.ReadSubFingerprints(It.IsAny <int[]>(), customQueryConfiguration)).Returns( new List <SubFingerprintData> { firstResult, secondResult, thirdResult }); modelService .Setup(service => service.ReadTracksByReferences(new[] { firstTrackReference, secondTrackReference })) .Returns(new List <TrackData> { new TrackData { ISRC = "isrc", TrackReference = firstTrackReference }, new TrackData { ISRC = "isrc_1", TrackReference = secondTrackReference } }); var queryResult = queryFingerprintService.Query(new List <HashedFingerprint> { queryHash }, customQueryConfiguration, modelService.Object); Assert.IsTrue(queryResult.ContainsMatches); Assert.AreEqual("isrc", queryResult.BestMatch.Track.ISRC); Assert.AreEqual(firstTrackReference, queryResult.BestMatch.Track.TrackReference); Assert.AreEqual(100, queryResult.BestMatch.HammingSimilaritySum); Assert.AreEqual(2, queryResult.ResultEntries.Count()); var results = queryResult.ResultEntries.ToList(); Assert.AreEqual(firstTrackReference, results[0].Track.TrackReference); Assert.AreEqual(secondTrackReference, results[1].Track.TrackReference); }
/// <summary> /// Find duplicates between existing tracks in the database /// </summary> /// <param name = "callback">Callback invoked at each processed track</param> /// <returns>Sets of duplicates</returns> public HashSet <TrackData>[] FindDuplicates(Action <TrackData, int, int> callback) { var tracks = modelService.ReadAllTracks().ToList(); var duplicates = new List <HashSet <TrackData> >(); int total = tracks.Count, current = 0; var queryConfiguration = new DefaultQueryConfiguration { MaxTracksToReturn = int.MaxValue, ThresholdVotes = 4 }; foreach (var track in tracks) { var trackDuplicates = new HashSet <TrackData>(); var hashedFingerprints = modelService.ReadHashedFingerprintsByTrack(track.TrackReference); var max = hashedFingerprints.Max(_ => _.StartsAt); var min = hashedFingerprints.Min(_ => _.StartsAt); var hashes = new Hashes(hashedFingerprints, GetLength(min, max, queryConfiguration.FingerprintConfiguration.FingerprintLengthInSeconds)); var result = queryFingerprintService.Query(hashes, queryConfiguration, modelService); if (result.ContainsMatches) { foreach (var resultEntry in result.ResultEntries) { if (resultEntry.Confidence < FalsePositivesThreshold || track.Equals(resultEntry.Track)) { continue; } trackDuplicates.Add(resultEntry.Track); } if (trackDuplicates.Any()) { HashSet <TrackData> duplicatePair = new HashSet <TrackData>(trackDuplicates) { track }; duplicates.Add(duplicatePair); } } callback?.Invoke(track, total, ++current); } for (int i = 0; i < duplicates.Count - 1; i++) { HashSet <TrackData> set = duplicates[i]; for (int j = i + 1; j < duplicates.Count; j++) { IEnumerable <TrackData> result = set.Intersect(duplicates[j]); if (result.Any()) { foreach (var track in duplicates[j]) { // collapse all duplicates in one set set.Add(track); } duplicates.RemoveAt(j); /*Remove the duplicate set*/ j--; } } } return(duplicates.ToArray()); }
public CustomQueryConfiguration() { DefaultQueryConfiguration defaultConfiguration = new DefaultQueryConfiguration(); NumberOfLSHTables = defaultConfiguration.NumberOfLSHTables; NumberOfMinHashesPerTable = defaultConfiguration.NumberOfMinHashesPerTable; }