public void SameQueryHashGeneratesMultipleTrackMatches() { var groupedQueryResults = new GroupedQueryResults(10d, DateTime.Now); var random = new Random(1); int runs = 100; int[] counts = new int[runs]; var trackRef = new ModelReference <uint>(1); int k = 0; Parallel.For(0, runs, i => { counts[i] = random.Next(5, 10); var queryPoint = new HashedFingerprint(new int[25], (uint)i, i * 1.48f, Array.Empty <byte>()); for (int j = 0; j < counts[i]; ++j) { var dbPoint = new SubFingerprintData(new int[25], (uint)k, k * 0.01f, new ModelReference <uint>((uint)Interlocked.Increment(ref k)), trackRef); groupedQueryResults.Add(queryPoint, dbPoint, i); } }); var allMatches = groupedQueryResults.GetMatchesForTrack(trackRef).ToList(); Assert.AreEqual(counts.Sum(), allMatches.Count); Assert.AreEqual(runs, allMatches.Select(m => m.QuerySequenceNumber).Distinct().Count()); }
private GroupedQueryResults GetSimilaritiesUsingNonBatchedStrategy(IEnumerable <HashedFingerprint> queryFingerprints, QueryConfiguration configuration, IModelService modelService) { var hashedFingerprints = queryFingerprints.ToList(); var groupedResults = new GroupedQueryResults(hashedFingerprints); int hashesPerTable = configuration.FingerprintConfiguration.HashingConfig.NumberOfMinHashesPerTable; Parallel.ForEach(hashedFingerprints, queryFingerprint => { var subFingerprints = modelService.ReadSubFingerprints(queryFingerprint.HashBins, configuration); foreach (var subFingerprint in subFingerprints) { int hammingSimilarity = similarityUtility.CalculateHammingSimilarity(queryFingerprint.HashBins, subFingerprint.Hashes, hashesPerTable); groupedResults.Add(queryFingerprint, subFingerprint, hammingSimilarity); } }); return(groupedResults); }
private GroupedQueryResults GetSimilaritiesUsingBatchedStrategy(IEnumerable <HashedFingerprint> queryFingerprints, QueryConfiguration configuration, IModelService modelService) { var hashedFingerprints = queryFingerprints as List <HashedFingerprint> ?? queryFingerprints.ToList(); var allCandidates = modelService.ReadSubFingerprints(hashedFingerprints.Select(querySubfingerprint => querySubfingerprint.HashBins), configuration); var groupedResults = new GroupedQueryResults(hashedFingerprints); int hashesPerTable = configuration.FingerprintConfiguration.HashingConfig.NumberOfMinHashesPerTable; Parallel.ForEach(hashedFingerprints, queryFingerprint => { var subFingerprints = allCandidates.Where(candidate => queryMath.IsCandidatePassingThresholdVotes(queryFingerprint, candidate, configuration.ThresholdVotes)); foreach (var subFingerprint in subFingerprints) { int hammingSimilarity = similarityUtility.CalculateHammingSimilarity(queryFingerprint.HashBins, subFingerprint.Hashes, hashesPerTable); groupedResults.Add(queryFingerprint, subFingerprint, hammingSimilarity); } }); return(groupedResults); }
public IEnumerable <Coverage> GetCoverages(TrackData trackData, GroupedQueryResults groupedQueryResults, QueryConfiguration configuration) { var fingerprintConfiguration = configuration.FingerprintConfiguration; var matches = groupedQueryResults.GetMatchesForTrackOrderedByQueryAt(trackData.TrackReference); double queryLength = groupedQueryResults.GetQueryLength(fingerprintConfiguration); if (configuration.AllowMultipleMatchesOfTheSameTrackInQuery) { var sequences = longestIncreasingTrackSequence.FindAllIncreasingTrackSequences(matches); var filtered = OverlappingRegionFilter.FilterOverlappingSequences(sequences); return(filtered.Select(matchedSequence => GetCoverage(matchedSequence, queryLength, fingerprintConfiguration.FingerprintLengthInSeconds))); } return(new List <Coverage> { GetCoverage(matches, queryLength, fingerprintConfiguration.FingerprintLengthInSeconds) }); }
public void ShouldCalculateQueryLengthCorrectly() { var configuration = new DefaultFingerprintConfiguration(); float delta = 0.05f; int runs = 1000; var bag = new ConcurrentBag <HashedFingerprint>(); Parallel.For(0, runs, i => { var hashed = new HashedFingerprint(new int[0], (uint)i, i * delta, new string[0]); bag.Add(hashed); }); var groupedQueryResult = new GroupedQueryResults(bag); double length = groupedQueryResult.GetQueryLength(configuration); Assert.AreEqual(length, delta * (runs - 1) + configuration.FingerprintLengthInSeconds, 0.0001); }
private GroupedQueryResults GetSimilaritiesUsingBatchedStrategy(IEnumerable <HashedFingerprint> queryFingerprints, QueryConfiguration configuration, DateTime relativeTo, IModelService modelService) { var hashedFingerprints = queryFingerprints as List <HashedFingerprint> ?? queryFingerprints.ToList(); var result = modelService.Query(hashedFingerprints.Select(hashedFingerprint => hashedFingerprint.HashBins), configuration); double queryLength = hashedFingerprints.QueryLength(configuration.FingerprintConfiguration); var groupedResults = new GroupedQueryResults(queryLength, relativeTo); Parallel.ForEach(hashedFingerprints, queryFingerprint => { var subFingerprints = result.Where(queryResult => QueryMath.IsCandidatePassingThresholdVotes(queryFingerprint.HashBins, queryResult.Hashes, configuration.ThresholdVotes)); foreach (var subFingerprint in subFingerprints) { double score = scoreAlgorithm.GetScore(queryFingerprint, subFingerprint, configuration); groupedResults.Add(queryFingerprint, subFingerprint, score); } }); return(groupedResults); }
public IEnumerable <Coverage> GetCoverages(TrackData trackData, GroupedQueryResults groupedQueryResults, QueryConfiguration configuration) { var fingerprintConfiguration = configuration.FingerprintConfiguration; var matches = groupedQueryResults.GetMatchesForTrack(trackData.TrackReference).ToList(); if (!matches.Any()) { return(Enumerable.Empty <Coverage>()); } double queryLength = groupedQueryResults.QueryLength; if (configuration.AllowMultipleMatchesOfTheSameTrackInQuery) { return(matches.EstimateIncreasingCoverages(queryLength, trackData.Length, fingerprintConfiguration.FingerprintLengthInSeconds, configuration.PermittedGap)); } return(new[] { matches.EstimateCoverage(queryLength, trackData.Length, fingerprintConfiguration.FingerprintLengthInSeconds, configuration.PermittedGap) }); }
public void MatchesShouldBeOrderedByQueryAt() { int runs = 1000; var groupedQueryResults = new GroupedQueryResults(Enumerable.Empty <HashedFingerprint>()); var reference = new ModelReference <int>(1); Parallel.For(0, runs, i => { var hashed = new HashedFingerprint(new int[0], (uint)i, i, new string[0]); var candidate = new SubFingerprintData(new int[0], (uint)i, runs - i, new ModelReference <uint>((uint)i), reference); groupedQueryResults.Add(hashed, candidate, i); }); var matchedWith = groupedQueryResults.GetMatchesForTrackOrderedByQueryAt(reference); var ordered = matchedWith.Select(with => (int)with.QueryAt).ToList(); CollectionAssert.AreEqual(Enumerable.Range(0, runs), ordered); }
public void ShouldAccumulateResults() { int runs = 1000; var groupedQueryResults = new GroupedQueryResults(Enumerable.Empty <HashedFingerprint>()); var references = new[] { 1, 2, 3, 4, 5 }.Select(id => new ModelReference <int>(id)).ToArray(); Parallel.For(0, runs, i => { var hashed = new HashedFingerprint(new int[0], (uint)i, i * 0.05f, new string[0]); var candidate = new SubFingerprintData(new int[0], (uint)i, i * 0.07f, new ModelReference <uint>((uint)i), references[i % references.Length]); groupedQueryResults.Add(hashed, candidate, i); }); Assert.IsTrue(groupedQueryResults.ContainsMatches); for (int i = 0; i < references.Length; ++i) { int pertrack = runs / references.Length; int ham = (pertrack - 1) * runs / 2 + pertrack * i; Assert.AreEqual(ham, groupedQueryResults.GetHammingSimilaritySumForTrack(references[i])); } var modelReferences = groupedQueryResults.GetTopTracksByHammingSimilarity(references.Length * 2).ToList(); for (int i = 0; i < references.Length; ++i) { Assert.AreEqual(references[references.Length - i - 1], modelReferences[i]); } var bestMatch = groupedQueryResults.GetBestMatchForTrack(references.Last()); Assert.AreEqual((runs - 1) * 0.05f, bestMatch.QueryAt, 0.000001); Assert.AreEqual((runs - 1) * 0.07f, bestMatch.ResultAt, 0.000001); for (int i = 0; i < references.Length; ++i) { var matchedWith = groupedQueryResults.GetMatchesForTrackOrderedByQueryAt(references[i]).ToList(); Assert.AreEqual(runs / references.Length, matchedWith.Count); } }
public IEnumerable <Coverage> GetCoverages(TrackData trackData, GroupedQueryResults groupedQueryResults, QueryConfiguration configuration) { var fingerprintConfiguration = configuration.FingerprintConfiguration; var matches = groupedQueryResults.GetMatchesForTrack(trackData.TrackReference).ToList(); if (!matches.Any()) { return(Enumerable.Empty <Coverage>()); } double queryLength = groupedQueryResults.QueryLength; if (configuration.AllowMultipleMatchesOfTheSameTrackInQuery) { double allowedGap = Math.Min(trackData.Length, queryLength); var sequences = longestIncreasingTrackSequence.FindAllIncreasingTrackSequences(matches, allowedGap); return(sequences.Select(sequence => new Coverage(sequence, queryLength, trackData.Length, fingerprintConfiguration.FingerprintLengthInSeconds, configuration.PermittedGap))); } return(new[] { matches.EstimateCoverage(queryLength, trackData.Length, fingerprintConfiguration.FingerprintLengthInSeconds, configuration.PermittedGap) }); }