예제 #1
0
 public Fingerprint(IEncodedFingerprintSchema schema, float startAt, uint sequenceNumber, byte[] originalPoint)
 {
     Schema         = schema;
     StartsAt       = startAt;
     SequenceNumber = sequenceNumber;
     OriginalPoint  = originalPoint;
 }
예제 #2
0
        public int[] Hash(IEncodedFingerprintSchema schema, int n)
        {
            if (n > PermutationsCount)
            {
                throw new ArgumentException("n should not exceed number of available hash functions: " + PermutationsCount);
            }

            int[][] perms   = permutations.GetPermutations();
            int[]   minHash = new int[n];
            for (int i = 0; i < n; i++)
            {
                minHash[i] = perms[i].Length;
                for (int j = 0; j < perms[i].Length; j++)
                {
                    int indexAt = perms[i][j];
                    if (schema.IsTrueAt(indexAt))
                    {
                        minHash[i] = j;
                        break;
                    }
                }
            }

            return(minHash);
        }
예제 #3
0
        /// <summary>
        /// Compute Min Hash signature of a fingerprint
        /// </summary>
        /// <param name="fingerprint">
        /// Fingerprint signature
        /// </param>
        /// <param name="n">
        /// The number of hash functions to use
        /// </param>
        /// <returns>
        /// N-sized sub-fingerprint (length of the permutations number)
        /// </returns>
        /// <remarks>
        /// The basic idea in the Min Hashing scheme is to randomly permute the rows and for each
        /// column c(i) compute its hash value h(c(i)) as the index of the first row under the permutation that has a 1 in that column.
        /// I.e. http://infolab.stanford.edu/~ullman/mmds/book.pdf s.3.3.4
        /// </remarks>
        private byte[] ComputeMinHashSignature(IEncodedFingerprintSchema fingerprint, int n)
        {
            if (n > PermutationsCount)
            {
                throw new ArgumentException("n should not exceed number of available hash functions: " + PermutationsCount);
            }

            int[][] perms   = permutations.GetPermutations();
            byte[]  minHash = new byte[n]; /*100*/
            for (int i = 0; i < n; i++)
            {
                minHash[i] = 255; /*The probability of occurrence of 1 after position 255 is very insignificant*/
                for (byte j = 0; j < perms[i].Length /*256*/; j++)
                {
                    int indexAt = perms[i][j];
                    if (fingerprint.IsTrueAt(indexAt))
                    {
                        minHash[i] = j; /*Looking for first occurrence of '1'*/
                        break;
                    }
                }
            }

            return(minHash); /*Array of 100 elements with bit turned ON if permutation captured successfully a TRUE bit*/
        }
예제 #4
0
 public byte[] Hash(IEncodedFingerprintSchema fingerprint, int n)
 {
     return(ComputeMinHashSignature(fingerprint, n));
 }
예제 #5
0
 public Fingerprint(IEncodedFingerprintSchema schema, float startAt, uint sequenceNumber)
 {
     Schema         = schema;
     StartsAt       = startAt;
     SequenceNumber = sequenceNumber;
 }
예제 #6
0
 public Fingerprint(IEncodedFingerprintSchema signature, float startAt, uint sequenceNumber)
 {
     Signature      = signature;
     StartsAt       = startAt;
     SequenceNumber = sequenceNumber;
 }