示例#1
0
        public void ShouldNotGenerateAudioCollisions()
        {
            var lshAlgorithm = LocalitySensitiveHashingAlgorithm.Instance;

            var random = new Random(1);
            int width = 128, height = 32;

            var hashingConfig = new DefaultHashingConfig();
            var tables        = Enumerable.Range(0, 25).Select(index => new ConcurrentDictionary <int, int>()).ToArray();

            int runs = 10000;

            for (int i = 0; i < 10000; ++i)
            {
                var tinyFingerprint = TestUtilities.GenerateRandomFingerprint(random, 200, width, height);

                var fingerprint = new Fingerprint(tinyFingerprint, 0, 0, Array.Empty <byte>());

                var hashedFingerprint = lshAlgorithm.Hash(fingerprint, hashingConfig);

                for (int table = 0; table < tables.Length; ++table)
                {
                    int key = hashedFingerprint.HashBins[table];
                    tables[table].AddOrUpdate(key, 1, (k, old) => old + 1);
                }
            }

            foreach (var table in tables)
            {
                Assert.IsTrue(table.Count > runs * 0.9);
            }
        }
示例#2
0
        public void DistributionOfHashesHasToBeUniform()
        {
            var lshAlgorithm = LocalitySensitiveHashingAlgorithm.Instance;

            var random = new Random();

            var storage = new RAMStorage(25);

            float one    = 8192f / 5512;
            var   config = new DefaultHashingConfig {
                NumberOfLSHTables = 25, NumberOfMinHashesPerTable = 4, HashBuckets = 0
            };

            var track = new ModelReference <int>(1);
            int l     = 100000;

            for (int i = 0; i < l; ++i)
            {
                var schema         = TestUtilities.GenerateRandomFingerprint(random, 200, 128, 32);
                var hash           = lshAlgorithm.Hash(new Fingerprint(schema, i * one, (uint)i, Array.Empty <byte>()), config);
                var subFingerprint = new SubFingerprintData(hash.HashBins, hash.SequenceNumber, hash.StartsAt, new ModelReference <uint>((uint)i), track);
                storage.AddSubFingerprint(subFingerprint);
            }

            var distribution = storage.HashCountsPerTable;

            foreach (var hashPerTable in distribution)
            {
                double collisions = (double)(l - hashPerTable) / l;
                Assert.IsTrue(collisions <= 0.01d, $"Less than 1% of collisions across 100K hashes: {collisions}");
            }
        }
示例#3
0
        public void FingerprintsCantMatchUniformlyAtRandom()
        {
            var lshAlgorithm = LocalitySensitiveHashingAlgorithm.Instance;

            var random = new Random();

            var storage = new RAMStorage(25);

            float one    = 8192f / 5512;
            var   config = new DefaultHashingConfig {
                NumberOfLSHTables = 25, NumberOfMinHashesPerTable = 4, HashBuckets = 0
            };

            var track = new ModelReference <int>(1);

            for (int i = 0; i < 100; ++i)
            {
                var schema         = TestUtilities.GenerateRandomFingerprint(random, 200, 128, 32);
                var hash           = lshAlgorithm.Hash(new Fingerprint(schema, i * one, (uint)i, Array.Empty <byte>()), config);
                var subFingerprint = new SubFingerprintData(hash.HashBins, hash.SequenceNumber, hash.StartsAt, new ModelReference <uint>((uint)i), track);
                storage.AddSubFingerprint(subFingerprint);
            }

            for (int i = 0; i < 10; ++i)
            {
                var schema = TestUtilities.GenerateRandomFingerprint(random, 200, 128, 32);
                var hash   = lshAlgorithm.Hash(new Fingerprint(schema, i * one, (uint)i, Array.Empty <byte>()), config);
                for (int j = 0; j < 25; ++j)
                {
                    var ids = storage.GetSubFingerprintsByHashTableAndHash(j, hash.HashBins[j]);
                    Assert.IsFalse(ids.Any());
                }
            }
        }
示例#4
0
        public void ShouldMatchAccordingToTheTheory()
        {
            var lsh          = LocalitySensitiveHashingAlgorithm.Instance;
            int bands        = 25; // segments
            int rows         = 4;
            int topWavelets  = 200;
            int vectorLength = 128 * 32 * 2;

            var hashingConfig = new DefaultHashingConfig();

            double[] howSimilars               = { 0.3, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9 };
            double[] avgCandidatesFound        = new double[howSimilars.Length];
            double[] probabilityOfAMatch       = new double[howSimilars.Length];
            double[] atLeastOneCandidateFounds = new double[howSimilars.Length];

            Parallel.For(0, howSimilars.Length, i =>
            {
                var random               = new Random(i);
                double howSimilar        = howSimilars[i];
                double jaccardSimilarity = howSimilar * topWavelets / (2 * topWavelets - howSimilar * topWavelets);
                probabilityOfAMatch[i]   = Math.Round(1 - Math.Pow(1 - Math.Pow(jaccardSimilarity, rows), bands), 4);

                int simulationRuns           = 10000;
                int agreeOn                  = 0;
                int atLeastOneCandidateFound = 0;
                for (int j = 0; j < simulationRuns; ++j)
                {
                    var arrays     = TestUtilities.GenerateSimilarFingerprints(random, howSimilar, topWavelets, vectorLength);
                    var hashed1    = lsh.Hash(new Fingerprint(arrays.Item1, 0, 0), hashingConfig);
                    var hashed2    = lsh.Hash(new Fingerprint(arrays.Item2, 0, 0), hashingConfig);
                    int agreeCount = AgreeOn(hashed1.HashBins, hashed2.HashBins);
                    if (agreeCount > 0)
                    {
                        atLeastOneCandidateFound++;
                    }

                    agreeOn += agreeCount;
                }

                avgCandidatesFound[i]        = Math.Round((double)agreeOn / simulationRuns, 4);
                atLeastOneCandidateFounds[i] = Math.Round((double)atLeastOneCandidateFound / simulationRuns, 4);
            });

            Console.WriteLine("Bands {0}, Rows {1}, Top Wavelets {2}", bands, rows, topWavelets);

            string header = $"{"Actual Similarity",5}{"Th. At Least One",19}{"Pr. At Least One",18}{"Avg. Candidates Found",25}";

            Console.WriteLine(header);

            for (int i = 0; i < howSimilars.Length; ++i)
            {
                Console.WriteLine("{0,5:0.0000}{1,20:0.0000}{2,18:0.0000}{3,20:0.0000}", howSimilars[i], probabilityOfAMatch[i], atLeastOneCandidateFounds[i], avgCandidatesFound[i]);
            }

            for (int i = 0; i < howSimilars.Length; ++i)
            {
                Assert.AreEqual(probabilityOfAMatch[i], atLeastOneCandidateFounds[i], 0.05);
            }
        }
示例#5
0
        internal SubFingerprintDao(ISolrOperations <SubFingerprintDTO> solr, IDictionaryToHashConverter dictionaryToHashConverter, IHashConverter hashConverter, ISolrQueryBuilder solrQueryBuilder)
        {
            this.solr = solr;
            this.dictionaryToHashConverter = dictionaryToHashConverter;
            this.hashConverter             = hashConverter;
            this.solrQueryBuilder          = solrQueryBuilder;
            var hashinConfig = new DefaultHashingConfig();

            fingerprintLength = hashinConfig.NumberOfLSHTables * hashinConfig.NumberOfMinHashesPerTable;
        }
示例#6
0
        public void ShouldBeAbleToControlReturnedCandidatesWithThresholdParameter()
        {
            int l = 25, k = 4, width = 128, height = 72;

            var hashingConfig = new DefaultHashingConfig
            {
                Width = width, Height = height, NumberOfLSHTables = l, NumberOfMinHashesPerTable = k
            };

            var lsh = LocalitySensitiveHashingAlgorithm.Instance;

            double[] howSimilarly       = { 0.3, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9 };
            int[]    expectedThresholds = { 0, 0, 0, 2, 3, 5, 7, 11 };

            const int simulations = 10000;

            Parallel.For(0, howSimilarly.Length, r =>
            {
                var random           = new Random((r + 1) * 100);
                double howSimilar    = howSimilarly[r];
                int topWavelets      = (int)(0.035 * width * height);
                var agreeOn          = new List <int>();
                var hammingDistances = new List <int>();
                for (int i = 0; i < simulations; ++i)
                {
                    var fingerprints    = TestUtilities.GenerateSimilarFingerprints(random, howSimilar, topWavelets, width * height * 2);
                    int hammingDistance = similarity.CalculateHammingDistance(fingerprints.Item1.ToBools(), fingerprints.Item2.ToBools());
                    hammingDistances.Add(hammingDistance);
                    var hashed1    = lsh.HashImage(new Fingerprint(fingerprints.Item1, 0, 0, Array.Empty <byte>()), hashingConfig);
                    var hashed2    = lsh.HashImage(new Fingerprint(fingerprints.Item2, 0, 0, Array.Empty <byte>()), hashingConfig);
                    int agreeCount = AgreeOn(hashed1.HashBins, hashed2.HashBins);
                    agreeOn.Add(agreeCount);
                }

                int requested = (int)((1 - howSimilar) * topWavelets * 2);
                Assert.AreEqual(requested, hammingDistances.Average(), 1);
                Assert.AreEqual(expectedThresholds[r], Math.Floor(agreeOn.Average()));
                Console.WriteLine($"Similarity: {howSimilar: 0.00}, Avg. Table Matches {agreeOn.Average(): 0.000}");
            });
        }
示例#7
0
        public void ShouldNotGenerateVideoCollisions()
        {
            var lshAlgorithm = LocalitySensitiveHashingAlgorithm.Instance;

            var    random = new Random(1);
            int    width = 128, height = 72;
            double topWaveletsPercentage = 0.04;

            var hashingConfig            = new DefaultHashingConfig()
            {
                Width = width, Height = height
            };

            var tables = Enumerable.Range(0, 25).Select(index => new ConcurrentDictionary <int, int>()).ToArray();

            int runs = 10000;

            for (int i = 0; i < 10000; ++i)
            {
                var tinyFingerprint = TestUtilities.GenerateRandomFingerprint(random, (int)(width * height * topWaveletsPercentage), width, height);

                var fingerprint = new Fingerprint(tinyFingerprint, 0, 0, Array.Empty <byte>());

                var hashedFingerprint = lshAlgorithm.HashImage(fingerprint, hashingConfig);

                for (int table = 0; table < tables.Length; ++table)
                {
                    int key = hashedFingerprint.HashBins[table];
                    tables[table].AddOrUpdate(key, 1, (k, old) => old + 1);
                }
            }

            Console.WriteLine(string.Join(",", tables.Select(t => t.Count)));
            foreach (var table in tables)
            {
                Assert.IsTrue(table.Count > runs * 0.9);
            }
        }