Ejemplo n.º 1
0
        public void ShouldCorrectlyCalculateHammingDistanceBetweenLongs()
        {
            var length = 50000;

            for (int run = 0; run < 1000; run++)
            {
                var x = GenerateByteArray(length);
                var a = hashConverter.ToInts(x, length / 4);

                var y = GenerateByteArray(length);
                var b = hashConverter.ToInts(y, length / 4);

                var byteSimilarity = similarityUtility.CalculateHammingSimilarity(x, y);
                var longSimilarity = similarityUtility.CalculateHammingSimilarity(a, b, 4);

                Assert.AreEqual(byteSimilarity, longSimilarity);
            }
        }
        /// <summary>
        ///   Compute LSH hash buckets which will be inserted into hash tables.
        ///   Each fingerprint will have a candidate in each of the hash tables.
        /// </summary>
        /// <param name = "minHashes">Min Hashes gathered from every fingerprint [N = 100]</param>
        /// <param name = "numberOfHashTables">Number of hash tables [L = 25]</param>
        /// <param name = "numberOfHashesPerTable">Number of min hashes per key [N = 4]</param>
        /// <param name = "hashBucketsCount">Max number of hash buckets per hash table</param>
        /// <returns>Collection of Pairs with Key = Hash table index, Value = Hash bin</returns>
        protected virtual int[] GroupIntoHashTables(byte[] minHashes, int numberOfHashTables, int numberOfHashesPerTable, int hashBucketsCount)
        {
            int[] hashes = hashConverter.ToInts(minHashes, numberOfHashTables);

            if (hashBucketsCount == 0)
            {
                return(hashes);
            }

            for (int i = 0; i < hashes.Length; ++i)
            {
                hashes[i] = System.Math.Abs(hashes[i] * LargePrime % hashBucketsCount);
            }

            return(hashes);
        }