Example #1
0
        /// <include file="documentation.xml" path="/Collections/BloomFilter/TuningConstructor/*"/>
        public BloomFilter(int itemCount, float falsePositiveRate, IMultiHashProvider <T> hashProvider, int maxHashCount)
        {
            if (itemCount < 0 || falsePositiveRate <= 0 || falsePositiveRate >= 1 || maxHashCount < 0)
            {
                throw new ArgumentOutOfRangeException();
            }
            if (itemCount == 0)
            {
                itemCount = 1;        // prevent a bit count of zero
            }
            if (hashProvider == null)
            {
                hashProvider = MultiHashProvider <T> .Default;
            }
            if (hashProvider.HashCount <= 0)
            {
                throw new ArgumentException("The hash provider does not support any hash functions.");
            }
            maxHashCount = maxHashCount == 0 ? hashProvider.HashCount : Math.Min(maxHashCount, hashProvider.HashCount);

            // assuming an optimal number of hash functions, the required bit count is -(itemCount * ln(falsePositiveRate) / ln(2)^2).
            // given that number of bits, the optimal number of hash functions is bitCount * ln(2) / itemCount. we can factor out ln(2)
            // and itemCount, giving hashFunctions = -ln(falsePositiveRate) / ln(2). we'll multiply by the reciprocal and then round
            int hashCount = Math.Min(maxHashCount, (int)(Math.Log(falsePositiveRate) * -1.4426950408890 + 0.5));

            // since the hash count wasn't an exact integer, and may have been clipped by maxHashCount, we'll recalculate the bit count
            // to be optimal for the actual number of hash functions. the optimal number of bits for a given false positive rate and
            // number of hash functions can be computed using the following general formula:
            // falsePositiveRate ~= (1 - e^(-hashCount * itemCount / bitCount)) ^ hashCount
            // if we use p = falsePositiveRate, k = hashCount, n = itemCount, and m = bitCount, we can solve for m:
            // p ~= (1 - e^(-kn/m))^k
            // p^(1/k) ~= 1 - e^(-kn/m)
            // 1 - p^(1/k) ~= e^(-kn/m)
            // ln(1 - p^(1/k)) ~= -kn / m
            // -kn / ln(1 - p^(1/k)) ~= m
            long bitCount = (long)Math.Round((double)-hashCount * itemCount /
                                             Math.Log(1 - Math.Pow(falsePositiveRate, 1.0 / hashCount)) + 0.5); // round the result up

            // 4294967264 is the largest number of bits that won't round up to a number greater than 2^32 when we do the rounding below
            // TODO: with this limit, we only get Bloom filters up to 512MB. we should increase this, especially on 64-bit architectures
            // where we have native 64-bit ints
            if (bitCount > 4294967264)
            {
                throw new ArgumentException("Too many bits (" + bitCount.ToString() + ") would be required.");
            }

            this.bits         = new uint[(int)(bitCount / 32 + ((bitCount & 31) == 0 ? 0 : 1))]; // round up to the nearest 32 bits
            this.hashProvider = hashProvider;
            this.hashCount    = (int)hashCount;
        }
Example #2
0
        /// <include file="documentation.xml" path="/Collections/BloomFilter/DirectConstructor/*"/>
        public BloomFilter(int bitCount, int maxHashCount, IMultiHashProvider <T> hashProvider)
        {
            if (bitCount <= 0 || maxHashCount <= 0)
            {
                throw new ArgumentOutOfRangeException();
            }
            if (hashProvider == null)
            {
                hashProvider = MultiHashProvider <T> .Default;
            }
            if (hashProvider.HashCount <= 0)
            {
                throw new ArgumentException("The hash provider does not support any hash functions.");
            }

            this.bits         = new uint[bitCount / 32 + ((bitCount & 31) == 0 ? 0 : 1)]; // round up to the nearest 32 bits
            this.hashProvider = hashProvider;
            this.hashCount    = Math.Min(hashProvider.HashCount, maxHashCount);
        }
Example #3
0
 /// <include file="documentation.xml" path="/Collections/BloomFilter/TuningConstructor/*[not(@name='maxHashCount')]"/>
 public BloomFilter(int itemCount, float falsePositiveRate, IMultiHashProvider <T> hashProvider)
     : this(itemCount, falsePositiveRate, hashProvider, 0)
 {
 }