A keyed universal hash function is a function that produces up to 32-bits that meet the definition of a universal hash (and an additional 32 bits that do not meet the requirement and so, in theory could be biased), guaranteeing that: (1) an attacker cannot construct inputs that are likely to have a bias of 0/1 at any bit position (2) an attacker cannot construct two values that will collide with a greater-than-random chance so long as that attacker (a) does not know the key, and (b) is unable to observe the outputs of the function. In contrast, standard hash functions do not guarantee that attackers cannot easily generate collisions, / even if the application using the function prefixes or postfixes the value to be hashed with a key. This leaves systems that use standard (non-universal) hash functions vulnerable to algorithmic complexity attacks. See: https://www.usenix.org/legacy/events/sec03/tech/full_papers/crosby/crosby.pdf When generating a hash, the lower 32 bits generated by the function should be thrown out unless you are certain that using bits that may be subject to attacker bias will cause no harm. For example, for a hash ring that uses all 64-bit points, where the first 32 are the universal hash bits, collisions should be strictly less likely than in a ring that only uses the 32 safe bits---though your ring entries will require twice as much space.) An instance of a universal hash function is re-entrant so, once an instance has been constructed, the hash function can be safely called from multiple threads to perform more than one hash at a time. For more information on the construction used in this implementation, see https://en.wikipedia.org/wiki/Universal_hashing (this construction is the last one in the "hashing vectors" section, labled with the superscript "strong") and http://comjnl.oxfordjournals.org/content/57/11/1624.full.pdf (this is the simple "Multilinear" construction)
        public void UniversalHashTestBias()
        {
            Pseudorandom pseudo = new Pseudorandom();
            UniversalHashFunction f = new UniversalHashFunction("Louis Tully as played by Rick Moranis!");
            ulong trials = 100000000;

            ulong[] bitCounts = new ulong[64];

            for (ulong trial = 0; trial < trials; trial++)
            {
                string randomString = pseudo.GetString(8);
                UInt64 supposedlyUnbiasedBits = f.Hash(randomString, UniversalHashFunction.MaximumNumberOfResultBitsAllowing32BiasedBits);
                for (int bit=0; bit < bitCounts.Length; bit++)
                {
                    if ((supposedlyUnbiasedBits & (0x8000000000000000ul >> bit)) != 0ul)
                        bitCounts[bit]++;
                }
            }

            double[] biases = bitCounts.Select(count => ( (0.5d - (((double)count) / ((double)trials)))) / 0.5d ).ToArray();

            /// The first 32 bits should be unbiased
            for (int bit = 0; bit < 32; bit++)
            {
                double bias = biases[bit];
                double biasAbs = Math.Abs(bias);
                Assert.True(biasAbs < 0.0005d);
            }

        }
Esempio n. 2
0
        /// <summary>
        /// Construct a filter array.
        /// </summary>
        /// <param name="numberOfBitsInArray">The size of the array in bits.</param>
        /// <param name="maximumBitIndexesPerElement">The maximum (and default) number of indexes (bits) in the array to associate with elements.</param>
        /// <param name="initilizeBitsOfArrayAtRandom">If set to true, the bits of the filter array will be set to 0 or 1 at random (indpendently, each with probability 0.5).</param>
        /// <param name="saltForHashFunctions">A salt used to generate the hash functions.
        /// Any two filter arrays generated with the same salt will use the same hash functions.
        /// The salt should be kept secret from attackerse who might try to manipulate the selection of elements,
        /// such as to intentionally cause bit collisions with the array.</param>
        public FilterArray(int numberOfBitsInArray, int maximumBitIndexesPerElement, bool initilizeBitsOfArrayAtRandom,
            string saltForHashFunctions = "")
        {
            // Align on byte boundary to guarantee no less than numberOfBitsInArray
            int capacityInBytes = (numberOfBitsInArray + 7) / 8;

            // Create hash functions to map elements to indexes in the bit array.
            HashFunctionsMappingElementsToBitsInTheArray = new UniversalHashFunction[maximumBitIndexesPerElement];
            for (int i = 0; i < HashFunctionsMappingElementsToBitsInTheArray.Length; i++)
            {
                HashFunctionsMappingElementsToBitsInTheArray[i] =
                    new UniversalHashFunction(i + ":" + saltForHashFunctions, 64);
            }

            if (initilizeBitsOfArrayAtRandom)
            {
                // Initialize the bit array setting ~half the bits randomly to zero by using the
                // cryptographic random number generator.
                byte[] initialBitValues = new byte[capacityInBytes];
                StrongRandomNumberGenerator.GetBytes(initialBitValues);
                BitArray = new BitArray(initialBitValues);
            }
            else
            {
                // Start with all bits of the array set to zero.
                BitArray = new BitArray(capacityInBytes * 8);
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Construct a binomial sketch, in which a set of k hash functions (k=NumberOfIndexes) will map any
        /// key to k points with an array of n bits (sizeInBits).
        /// When one Adds a key to a binomial sketch, a random bit among the subset of k that are currently 0 will be set to 1.
        /// To ensure roughly half the bits remain zero, a random index from the subset of all k bits that are currently 1 will be set to 0.
        /// 
        /// Over time, popular keys will have almost all of their bits set and unpopular keys will be expected to have roughly half their bits set.
        /// </summary>
        /// <param name="sizeInBits">The total number of bits to maintain in the table.  In theoretical discussions of bloom filters and sketches
        /// in general, this is usually referrted to by the letter n.</param>
        /// <param name="numberOfIndexes">The number of indexes to map each key to, each of which is assigned a unique pseudorandom
        /// hash function.</param>
        /// <param name="keyToPreventAlgorithmicComplexityAttacks">A pseudorandom seed that allows the same sketch to be created
        /// twice, but (if kept secret) prevents an attacker from knowing the distribution of hashes and thus counters
        /// algorithmic complexity attacks.</param>
        public BinomialSketch(int sizeInBits, int numberOfIndexes, string keyToPreventAlgorithmicComplexityAttacks)
        {
            NumberOfIndexes = numberOfIndexes;
            string keyToPreventAlgorithmicComplexityAttacks1 = keyToPreventAlgorithmicComplexityAttacks ?? "";
            SizeInBits = sizeInBits;
            _maxNumberOfObservationsAccountingForAging = (ulong) SizeInBits/(ulong) (NumberOfIndexes*2);
            // Align on next byte boundary
            if ((SizeInBits & 7) != 0)
                SizeInBits = (sizeInBits + 8) ^ 0x7;
            int capacityInBytes = SizeInBits / 8;

            _universalHashFunctions = new UniversalHashFunction[numberOfIndexes];
            for (int i = 0; i < _universalHashFunctions.Length; i++)
            {
                _universalHashFunctions[i] =
                    new UniversalHashFunction(i.ToString() + keyToPreventAlgorithmicComplexityAttacks1, 64);
            }
            // Initialize the sketch setting ~half the bits randomly to zero by using the
            // cryptographic random number generator.
            byte[] initialSketchValues = new byte[capacityInBytes];
            StrongRandomNumberGenerator.GetBytes(initialSketchValues);
            _sketch = new BitArray(initialSketchValues);

            // binomialProbability[i] = (n choose k) * (p)^k * (1-p)^(n-k)
            // since p=.5, this is (n choose k) 0.5^(n)
            double[] binomialProbability = new double[numberOfIndexes + 1];
            double probabilityOfAnyGivenValue = Math.Pow(0.5d, numberOfIndexes);
            double nChooseK = 1d;
            for (int k = 0; k <= numberOfIndexes/2; k++)
            {
                binomialProbability[k] = binomialProbability[numberOfIndexes-k] =
                    nChooseK * probabilityOfAnyGivenValue;
                nChooseK *= (numberOfIndexes - k)/(1d + k);
            }

            _cumulativeProbabilitySetByChance = new double[numberOfIndexes + 1];
            _cumulativeProbabilitySetByChance[numberOfIndexes] = binomialProbability[numberOfIndexes];
            for (int k = numberOfIndexes; k > 0; k--)
                _cumulativeProbabilitySetByChance[k-1] =
                    _cumulativeProbabilitySetByChance[k] + binomialProbability[k-1];
        }
 /// <summary>
 /// Create a client for a distributed binomial ladder filter
 /// </summary>
 /// <param name="numberOfShards">The number of shards that the bit array of the binomial ladder filter will be divided into.
 /// The greater the number of shards, the more evently it can be distributed.  However, the number of shards should still
 /// be a few orders of magnitude smaller than the ladder height.</param>
 /// <param name="defaultHeightOfLadder">The default ladder height for elements on the ladder.</param>
 /// <param name="shardToHostMapping">An object that maps each shard number to the host responsible for that shard.</param>
 /// <param name="configurationKey">A key used to protect the hashing from algorithmic complexity attacks.
 /// This key should not be unique to the application using the filter and should not be known to any untrusted
 /// systems that might control which elements get sent to the filter.  If an attacker could submit elements to the filter
 /// and knew this key, the attacker could arrange for all elements to go to the same shard and in so doing overload that shard.</param>
 /// <param name="mininmumCacheFreshnessRequired">The maximum time that an element should be kept in the cache of elements at the top of their ladder.
 /// In other words, how long to bound the possible time that an element may still appear to be at the top of its ladder in the cache
 /// when it is no longer at the top of the ladder based on the filter array.  Defaults to one minute.</param>
 public DistributedBinomialLadderFilterClient(int numberOfShards, int defaultHeightOfLadder, IDistributedResponsibilitySet<RemoteHost> shardToHostMapping, string configurationKey, TimeSpan? mininmumCacheFreshnessRequired = null)
 {
     NumberOfShards = numberOfShards;
     MaxLadderHeight = defaultHeightOfLadder;
     MinimumCacheFreshnessRequired = mininmumCacheFreshnessRequired ?? new TimeSpan(0,0,1);
     CacheOfElementsAtTopOfLadder = new FixedSizeLruCache<string, DateTime>(2*NumberOfShards);
     ShardHashFunction = new UniversalHashFunction(configurationKey);
     ShardToHostMapping = shardToHostMapping;
 }