Beispiel #1
0
        /// <summary>
        /// Gets an enumerable whose items are probablisticly distinct, this may exclude some distinct items that a normal <see cref="Enumerable.Distinct{TSource}(IEnumerable{TSource})"/> would include
        /// </summary>
        /// <typeparam name="T">Item type</typeparam>
        /// <param name="enumerable">Enumerable to operate over</param>
        /// <param name="expectedItems">How many distinct items you expect to process</param>
        /// <param name="errorRate">Desired error (false positive) rate expressed as value between 0.0 and 1.0</param>
        /// <param name="h1">Hash function</param>
        /// <param name="h2">Another hash function</param>
        /// <returns></returns>
        public static IEnumerable <T> ProbabilisticDistinct <T>(this IEnumerable <T> enumerable, long expectedItems, double errorRate, Func <T, int> h1, Func <T, int> h2)
        {
            IBloomFilterParameters   parameters    = BloomUtils.CalculateBloomParameters(expectedItems, errorRate);
            Func <IBloomFilter <T> > filterFactory = () => new SparseFastBloomFilter <T>(parameters, h1, h2);

            return(new ProbabilisticDistinctEnumerable <T>(enumerable, filterFactory));
        }
Beispiel #2
0
        /// <summary>
        /// Creates a new filter
        /// </summary>
        /// <param name="storage">Bloom Filter storage</param>
        /// <param name="parameters">Parameters</param>
        /// <param name="hashFunctions">Hash Functions</param>
        protected BaseHybridBloomFilter(IBloomFilterStorage storage, IBloomFilterParameters parameters, IEnumerable <Func <T, int> > hashFunctions)
            : base(storage)
        {
            if (parameters.NumberOfBits < 1)
            {
                throw new ArgumentException("Number of bits must be >= 1", "parameters");
            }
            if (hashFunctions == null)
            {
                throw new ArgumentNullException("hashFunctions");
            }
            this._hashFunctions = new List <Func <T, int> >(hashFunctions);
            this._hashFunctions.RemoveAll(f => f == null);
            if (this._hashFunctions.Count <= 1)
            {
                throw new ArgumentException("A bloom filter requires at least 2 hash functions", "hashFunctions");
            }
            if (parameters.NumberOfBits <= this._hashFunctions.Count)
            {
                throw new ArgumentException("Number of bits must be bigger than the number of hash functions", "parameters");
            }

            this.NumberOfBits = parameters.NumberOfBits;
            this._parameters  = parameters;
        }
        /// <summary>
        /// Creates a new
        /// </summary>
        /// <param name="storage">Bloom Filter Storage</param>
        /// <param name="parameters">Parameters</param>
        /// <param name="h1">First hash function</param>
        /// <param name="h2">Second hash function</param>
        protected BaseFastBloomFilter(IBloomFilterStorage storage, IBloomFilterParameters parameters, Func <T, int> h1, Func <T, int> h2)
            : base(storage)
        {
            if (parameters == null)
            {
                throw new ArgumentNullException("parameters", "Paramaeters cannot be null");
            }
            if (h1 == null)
            {
                throw new ArgumentException("Hash functions cannot be null", "h1");
            }
            if (h2 == null)
            {
                throw new ArgumentException("Hash functions cannot be null", "h2");
            }
            if (parameters.NumberOfBits <= parameters.NumberOfHashFunctions)
            {
                throw new ArgumentException("Number of bits must be bigger than the number of hash functions", "parameters");
            }

            this._parameters  = parameters;
            this.NumberOfBits = parameters.NumberOfBits;
            this._h1          = h1;
            this._h2          = h2;
        }
Beispiel #4
0
        public void TestMethod1()
        {
            IBloomFilterParameters parameters = BloomUtils.CalculateBloomParameters(100, 0.01);

            Console.WriteLine(parameters.NumberOfBits);
            Console.WriteLine(parameters.NumberOfHashFunctions);
        }
        public void CheckParameterCalculation(long expectedItems, long errorRate, int expectedNumBits, int expectedNumHashFunctions)
        {
            IBloomFilterParameters parameters = BloomUtils.CalculateBloomParameters(expectedItems, errorRate);

            Assert.AreEqual(expectedNumBits, parameters.NumberOfBits);
            Assert.AreEqual(expectedNumHashFunctions, parameters.NumberOfHashFunctions);

            CheckErrorRate(expectedItems, errorRate, parameters);
        }
 /// <summary>
 /// Creates new storage
 /// </summary>
 /// <param name="parameters">Parameters</param>
 public SparseArrayStorage(IBloomFilterParameters parameters)
 {
     if (parameters == null)
     {
         throw new ArgumentNullException("parameters");
     }
     if (parameters.NumberOfBits <= 0)
     {
         throw new ArgumentException("Number of bits must be > 0", "parameters");
     }
     this._bits = new BlockSparseArray <bool>(parameters.NumberOfBits);
 }
Beispiel #7
0
        /*
         * ln p = -(m/n) * ((ln 2)^2).
         */

        /// <summary>
        /// Given some parameters and the expected number of items calculates the error rate
        /// </summary>
        /// <param name="expectedItems">Expected number of items that will be added to the filter</param>
        /// <param name="parameters">Bloom Filter Parameters</param>
        /// <returns>Error Rate as a value between 0 and 1.0</returns>
        public static double CalculateErrorRate(long expectedItems, IBloomFilterParameters parameters)
        {
            if (expectedItems < 1)
            {
                throw new ArgumentException("expectedItems must be >= 1", "expectedItems");
            }
            if (parameters == null)
            {
                throw new ArgumentNullException("parameters");
            }

            double lnP = (-1d * ((double)parameters.NumberOfBits / expectedItems)) * Math.Pow(Math.Log(2), 2d);

            return(Math.Pow(Math.E, lnP));
        }
        // Test cases are based on values calculated at http://hur.st/bloomfilter

        private void CheckErrorRate(long expectedItems, long expectedErrorRate, IBloomFilterParameters parameters)
        {
            long actualErrorRate = CalculateErrorRate(expectedItems, parameters);

            Console.WriteLine("n = {0}, p = 1 in {1}", expectedItems, actualErrorRate);
            Assert.AreEqual(expectedErrorRate, actualErrorRate);

            // If we half the number of items we add the error rate should decrease
            // NB - Since we are expressing error rate as 1 in p actual value will increase
            actualErrorRate = CalculateErrorRate(expectedItems / 2, parameters);
            Console.WriteLine("n = {0}, p = 1 in {1}", expectedItems / 2, actualErrorRate);
            Assert.IsTrue(actualErrorRate > expectedErrorRate);

            // If we double the number of items we add the error rate should increase
            // NB - Since we are expressing error rate as 1 in p actual value will decrease
            actualErrorRate = CalculateErrorRate(expectedItems * 2, parameters);
            Console.WriteLine("n = {0}, p = 1 in {1}", expectedItems * 2, actualErrorRate);
            Assert.IsTrue(actualErrorRate < expectedErrorRate);
        }
 /// <summary>
 /// Creates a new filter
 /// </summary>
 /// <param name="parameters">Parameters</param>
 /// <param name="hashFunctions">Hash functions</param>
 public HybridBloomFilter(IBloomFilterParameters parameters, IEnumerable <Func <T, int> > hashFunctions)
     : base(new ArrayStorage(parameters.NumberOfBits), parameters, hashFunctions)
 {
 }
 /// <summary>
 /// Creates a new filter
 /// </summary>
 /// <param name="parameters">Parameters</param>
 /// <param name="h1">Hash function 1</param>
 /// <param name="h2">Hash function 2</param>
 public SparseFastBloomFilter(IBloomFilterParameters parameters, Func <T, int> h1, Func <T, int> h2)
     : base(new SparseArrayStorage(parameters), parameters, h1, h2)
 {
 }
        private static long CalculateErrorRate(long expectedItems, IBloomFilterParameters parameters)
        {
            double calcErrorRate = BloomUtils.CalculateErrorRate(expectedItems, parameters);

            return(Convert.ToInt64(1 / calcErrorRate));
        }
Beispiel #12
0
 /// <summary>
 /// Creates a new filter
 /// </summary>
 /// <param name="parameters">Parameters</param>
 /// <param name="h1">Hash function 1</param>
 /// <param name="h2">Hash function 2</param>
 public FastBloomFilter(IBloomFilterParameters parameters, Func <T, int> h1, Func <T, int> h2)
     : base(new ArrayStorage(parameters.NumberOfBits), parameters, h1, h2)
 {
 }
 /// <summary>
 /// Creates a new filter
 /// </summary>
 /// <param name="parameters">Parameters</param>
 /// <param name="hashFunctions">Hash functions</param>
 public SparseHybridBloomFilter(IBloomFilterParameters parameters, IEnumerable <Func <T, int> > hashFunctions)
     : base(new SparseArrayStorage(parameters), parameters, hashFunctions)
 {
 }