/// <summary> /// Creates a new HyperLogLog with m registers. Returns an error if m isn't a /// power of two. /// </summary> /// <param name="m">Number of registers (must be a power of two)</param> public HyperLogLog(uint m) { if ((m & (m - 1)) != 0) { throw new ArgumentException(String.Format("{0} is not a power of two", m)); } this.Registers = new byte[m]; this.M = m; this.B = (uint)Math.Ceiling(Math.Log(m, 2)); this.Alpha = CalculateAlpha(m); this.Hash = Defaults.GetDefaultHashAlgorithm(); }
/// <summary> /// NewDeletableBloomFilter creates a new DeletableBloomFilter optimized to store /// n items with a specified target false-positive rate. The r value determines /// the number of bits to use to store collision information. This controls the /// deletability of an element. Refer to the paper for selecting an optimal value. /// </summary> /// <param name="n">Number of items</param> /// <param name="r">Number of bits to use to store collision information</param> /// <param name="fpRate">Desired false positive rate</param> public DeletableBloomFilter(uint n, uint r, double fpRate) { var m = Utils.OptimalM(n, fpRate); var k = Utils.OptimalK(fpRate); this.Buckets = new Buckets(m - r, 1); this.Collisions = new Buckets(r, 1); this.Hash = Defaults.GetDefaultHashAlgorithm(); this.M = m - r; this.RegionSize = (m - r) / r; this.k = k; this.IndexBuffer = new uint[k]; }
/// <summary> /// Creates a new special case of Stable Bloom Filter which is a traditional /// Bloom filter with m bits and an optimal number of hash functions for the /// target false-positive rate. Unlike the stable variant, data is not evicted /// and a cell contains a maximum of 1 hash value. /// </summary> /// <param name="m">Number of cells to decrement</param> /// <param name="fpRate">Desired false-positive rate</param> /// <returns></returns> public static StableBloomFilter NewUnstableBloomFilter(uint m, double fpRate) { var cells = new Buckets(m, 1); var k = Utils.OptimalK(fpRate); return(new StableBloomFilter { Hash = Defaults.GetDefaultHashAlgorithm(), M = m, k = k, p = 0, Max = cells.MaxBucketValue(), cells = cells, IndexBuffer = new uint[k] }); }
/// <summary> /// Creates a new partitioned Bloom filter optimized to store n items with a /// specified target false-positive rate. /// </summary> /// <param name="n">Number of items</param> /// <param name="fpRate">Desired false-positive rate</param> public PartitionedBloomFilter(uint n, double fpRate) { var m = Utils.OptimalM(n, fpRate); var k = Utils.OptimalK(fpRate); var partitions = new Buckets[k]; var s = (uint)Math.Ceiling((double)m / (double)k); for (uint i = 0; i < k; i++) { partitions[i] = new Buckets(s, 1); } this.Partitions = partitions; this.Hash = Defaults.GetDefaultHashAlgorithm(); this.M = m; this.k = k; this.S = s; }
/// <summary> /// Creates a new Count-Min Sketch whose relative accuracy is within a factor of /// epsilon with probability delta. Both of these parameters affect the space and /// time complexity. /// </summary> /// <param name="epsilon">Relative-accuracy factor</param> /// <param name="delta">Relative-accuracy probability</param> public CountMinSketch(double epsilon, double delta) { var width = (uint)(Math.Ceiling(Math.E / epsilon)); var depth = (uint)(Math.Ceiling(Math.Log(1 / delta))); this.Matrix = new UInt64[depth][]; for (int i = 0; i < depth; i++) { this.Matrix[i] = new UInt64[width]; } this.Width = width; this.Depth = depth; this.epsilon = epsilon; this.delta = delta; this.Hash = Defaults.GetDefaultHashAlgorithm(); }
/// <summary> /// Creates a new Cuckoo Bloom filter optimized to store n items with a specified /// target false-positive rate. /// </summary> /// <param name="n">Number of items to store</param> /// <param name="fpRate">Target false-positive rate</param> public CuckooBloomFilter(uint n, double fpRate) { var b = (uint)4; var f = CalculateF(b, fpRate); var m = Power2(n / f * 8); var buckets = new byte[m][][]; for (uint i = 0; i < m; i++) { buckets[i] = new byte[b][]; } this.Buckets = buckets; this.Hash = Defaults.GetDefaultHashAlgorithm(); this.M = m; this.B = b; this.F = f; this.N = n; }
/// <summary> /// Creates a new Stable Bloom Filter with m cells and d bits allocated per cell /// optimized for the target false-positive rate. Use NewDefaultStableFilter if /// you don't want to calculate d. /// </summary> /// <param name="m">Number of cells to decrement</param> /// <param name="d">Bits per cell</param> /// <param name="fpRate">Desired false-positive rate</param> public StableBloomFilter(uint m, byte d, double fpRate) { var k = Utils.OptimalK(fpRate) / 2; if (k > m) { k = m; } else if (k <= 0) { k = 1; } var cells = new Buckets(m, d); this.Hash = Defaults.GetDefaultHashAlgorithm(); this.M = m; this.k = k; this.p = OptimalStableP(m, k, d, fpRate); this.Max = cells.MaxBucketValue(); this.cells = cells; this.IndexBuffer = new uint[k]; }
/// <summary> /// Instantiates an InverseBloomFilter with the specified capacity. /// </summary> /// <param name="capacity">The capacity of the filter</param> public InverseBloomFilter(uint capacity) { this.Array = new byte[capacity][]; this.Hash = Defaults.GetDefaultHashAlgorithm(); this.capacity = capacity; }