public void ShouldBeAbleToGenerateMultipleTimesDifferentSignatures() { double howSimilarAreVectors = 0.4; int topWavelets = 200, vectorLength = 8192; var similarityUtility = new SimilarityUtility(); double similarity = 0; int simulationRuns = 20000, aggreeOn = 0; for (int i = 0; i < simulationRuns; ++i) { var arrays = this.GenerateVectors(howSimilarAreVectors, topWavelets, vectorLength); Assert.AreEqual(topWavelets, arrays.Item1.TrueCounts()); Assert.AreEqual(topWavelets, arrays.Item2.TrueCounts()); aggreeOn += arrays.Item1.AgreeOn(arrays.Item2); similarity += similarityUtility.CalculateJaccardSimilarity(arrays.Item1.ToBools(), arrays.Item2.ToBools()); } double averageSimilarityOnTrueBits = (double)aggreeOn / simulationRuns; Assert.AreEqual( averageSimilarityOnTrueBits, howSimilarAreVectors * topWavelets, 1.0, "Actual Average Similarity on True bits: " + averageSimilarityOnTrueBits); // values that match are counted one time, values that don't count twice (1 0 | 1 0) - don't match on 2 bits, even though they are generated from 1 wavelet double jaccardSimilarity = (howSimilarAreVectors * topWavelets) / ((2 * topWavelets) - (howSimilarAreVectors * topWavelets)); Assert.AreEqual(jaccardSimilarity, similarity / simulationRuns, 0.1, "Jaccard Similarity is not as requested: " + (similarity / simulationRuns)); }
public void CalculateJaccardSimilarityCorrect() { bool[] first = new[] { true, true, false, true, false, true, false, false, true, true }; bool[] second = new[] { false, true, false, true, false, true, false, false, true, true }; var result = SimilarityUtility.CalculateJaccardSimilarity(first, second); Assert.AreEqual(5f / 6, result); }
public void CalculateHammingSimilarityCorrect() { byte[] first = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; byte[] second = new byte[] { 1, 2, 3, 8, 5, 9, 7, 8, 11, 13 }; var result = SimilarityUtility.CalculateHammingSimilarity(first, second); Assert.AreEqual(6, result); }
private void GetFingerprintSimilarity(IFingerprintCommand databaseSong, IFingerprintCommand querySong, SimilarityResult results) { double sum = 0; List <bool[]> fingerprintsDatabaseSong = databaseSong.Fingerprint() .Result .Select(fingerprint => fingerprint) .ToList(); List <bool[]> fingerprintsQuerySong = querySong.Fingerprint() .Result .Select(fingerprint => fingerprint) .ToList(); double max = double.MinValue; double min = double.MaxValue; int comparisonsCount = 0; for (int i = 0; i < fingerprintsDatabaseSong.Count; i++) { for (int j = 0; j < fingerprintsQuerySong.Count; j++) { double value = SimilarityUtility.CalculateJaccardSimilarity(fingerprintsDatabaseSong[i], fingerprintsQuerySong[j]); if (value > max) { max = value; } if (value < min) { min = value; } sum += value; comparisonsCount++; } } results.SumJaqSimilarityBetweenDatabaseAndQuerySong += sum; results.AverageJaqSimilarityBetweenDatabaseAndQuerySong += sum / comparisonsCount; if (max > results.MaxJaqSimilarityBetweenDatabaseAndQuerySong) { results.MaxJaqSimilarityBetweenDatabaseAndQuerySong = max; } if (min < results.MinJaqSimilarityBetweenDatabaseAndQuerySong) { results.MinJaqSimilarityBetweenDatabaseAndQuerySong = min; } results.NumberOfAnalizedFingerprints = comparisonsCount; }
public QueryResult Query(IModelService modelService, IEnumerable <HashData> hashes, IQueryConfiguration queryConfiguration) { var hammingSimilarities = new Dictionary <IModelReference, int>(); foreach (var hash in hashes) { var subFingerprints = GetSubFingerprints(modelService, hash, queryConfiguration); foreach (var subFingerprint in subFingerprints) { int similarity = SimilarityUtility.CalculateHammingSimilarity(hash.SubFingerprint, subFingerprint.Signature); if (hammingSimilarities.ContainsKey(subFingerprint.TrackReference)) { hammingSimilarities[subFingerprint.TrackReference] += similarity; } else { hammingSimilarities.Add(subFingerprint.TrackReference, similarity); } } } if (hammingSimilarities.Any()) { var topMatches = hammingSimilarities.OrderByDescending(pair => pair.Value).Take(queryConfiguration.MaximumNumberOfTracksToReturnAsResult); var resultSet = topMatches.Select(match => new ResultEntry { Track = modelService.ReadTrackByReference(match.Key), Similarity = match.Value }).ToList(); return(new QueryResult { ResultEntries = resultSet, IsSuccessful = true, AnalyzedCandidatesCount = hammingSimilarities.Count }); } return(new QueryResult { ResultEntries = Enumerable.Empty <ResultEntry>().ToList(), IsSuccessful = false, AnalyzedCandidatesCount = 0 }); }