/// <include file="documentation.xml" path="/Collections/BloomFilter/TuningConstructor/*"/> public BloomFilter(int itemCount, float falsePositiveRate, IMultiHashProvider <T> hashProvider, int maxHashCount) { if (itemCount < 0 || falsePositiveRate <= 0 || falsePositiveRate >= 1 || maxHashCount < 0) { throw new ArgumentOutOfRangeException(); } if (itemCount == 0) { itemCount = 1; // prevent a bit count of zero } if (hashProvider == null) { hashProvider = MultiHashProvider <T> .Default; } if (hashProvider.HashCount <= 0) { throw new ArgumentException("The hash provider does not support any hash functions."); } maxHashCount = maxHashCount == 0 ? hashProvider.HashCount : Math.Min(maxHashCount, hashProvider.HashCount); // assuming an optimal number of hash functions, the required bit count is -(itemCount * ln(falsePositiveRate) / ln(2)^2). // given that number of bits, the optimal number of hash functions is bitCount * ln(2) / itemCount. we can factor out ln(2) // and itemCount, giving hashFunctions = -ln(falsePositiveRate) / ln(2). we'll multiply by the reciprocal and then round int hashCount = Math.Min(maxHashCount, (int)(Math.Log(falsePositiveRate) * -1.4426950408890 + 0.5)); // since the hash count wasn't an exact integer, and may have been clipped by maxHashCount, we'll recalculate the bit count // to be optimal for the actual number of hash functions. the optimal number of bits for a given false positive rate and // number of hash functions can be computed using the following general formula: // falsePositiveRate ~= (1 - e^(-hashCount * itemCount / bitCount)) ^ hashCount // if we use p = falsePositiveRate, k = hashCount, n = itemCount, and m = bitCount, we can solve for m: // p ~= (1 - e^(-kn/m))^k // p^(1/k) ~= 1 - e^(-kn/m) // 1 - p^(1/k) ~= e^(-kn/m) // ln(1 - p^(1/k)) ~= -kn / m // -kn / ln(1 - p^(1/k)) ~= m long bitCount = (long)Math.Round((double)-hashCount * itemCount / Math.Log(1 - Math.Pow(falsePositiveRate, 1.0 / hashCount)) + 0.5); // round the result up // 4294967264 is the largest number of bits that won't round up to a number greater than 2^32 when we do the rounding below // TODO: with this limit, we only get Bloom filters up to 512MB. we should increase this, especially on 64-bit architectures // where we have native 64-bit ints if (bitCount > 4294967264) { throw new ArgumentException("Too many bits (" + bitCount.ToString() + ") would be required."); } this.bits = new uint[(int)(bitCount / 32 + ((bitCount & 31) == 0 ? 0 : 1))]; // round up to the nearest 32 bits this.hashProvider = hashProvider; this.hashCount = (int)hashCount; }
/// <include file="documentation.xml" path="/Collections/BloomFilter/DirectConstructor/*"/> public BloomFilter(int bitCount, int maxHashCount, IMultiHashProvider <T> hashProvider) { if (bitCount <= 0 || maxHashCount <= 0) { throw new ArgumentOutOfRangeException(); } if (hashProvider == null) { hashProvider = MultiHashProvider <T> .Default; } if (hashProvider.HashCount <= 0) { throw new ArgumentException("The hash provider does not support any hash functions."); } this.bits = new uint[bitCount / 32 + ((bitCount & 31) == 0 ? 0 : 1)]; // round up to the nearest 32 bits this.hashProvider = hashProvider; this.hashCount = Math.Min(hashProvider.HashCount, maxHashCount); }
/// <include file="documentation.xml" path="/Collections/BloomFilter/TuningConstructor/*[not(@name='maxHashCount')]"/> public BloomFilter(int itemCount, float falsePositiveRate, IMultiHashProvider <T> hashProvider) : this(itemCount, falsePositiveRate, hashProvider, 0) { }