/// <summary> /// Creates a hash function with the implementation id <paramref name="id" /> /// </summary> /// <param name="id">Identifies a particular implementation of a hash function</param> /// <returns>The relevant hash function implementation</returns> /// <remarks>This method instantiates a new instance on each call. Make sure to reuse instances when appropriate</remarks> internal static IHashFunction GetHashFunction(HashFunctionId id) { switch (id) { case HashFunctionId.Murmur3: return new Murmur3(); case HashFunctionId.Fnv1A: return new Fnv1A(); default: throw new NotImplementedException($"Support not implemented for hash function of type {id}"); } }
/// <summary> /// Creates a hash function with the implementation id <paramref name="id" /> /// </summary> /// <param name="id">Identifies a particular implementation of a hash function</param> /// <returns>The relevant hash function implementation</returns> /// <remarks>This method instantiates a new instance on each call. Make sure to reuse instances when appropriate</remarks> internal static IHashFunction GetHashFunction(HashFunctionId id) { switch (id) { case HashFunctionId.Murmur3: return(new Murmur3()); case HashFunctionId.Fnv1A: return(new Fnv1A()); default: throw new NotImplementedException(string.Format("Support not implemented for hash function of type {0}", id)); } }
/// <summary> /// Creates a CardinalityEstimator with the given <paramref name="state" /> /// </summary> internal CardinalityEstimator(CardinalityEstimatorState state) { this.bitsPerIndex = state.BitsPerIndex; this.bitsForHll = 64 - this.bitsPerIndex; this.m = (int)Math.Pow(2, this.bitsPerIndex); this.alphaM = GetAlphaM(this.m); this.subAlgorithmSelectionThreshold = GetSubAlgorithmSelectionThreshold(this.bitsPerIndex); // Init the hash function this.hashFunctionId = state.HashFunctionId; this.hashFunction = HashFunctionFactory.GetHashFunction(this.hashFunctionId); // Init the direct count this.directCount = state.DirectCount != null ? new HashSet <ulong>(state.DirectCount) : null; // Init the sparse representation this.isSparse = state.IsSparse; this.lookupSparse = state.LookupSparse != null ? new Dictionary <ushort, byte>(state.LookupSparse) : null; this.lookupDense = state.LookupDense; this.CountAdditions = state.CountAdditions; // Each element in the sparse representation takes 15 bytes, and there is some constant overhead this.sparseMaxElements = Math.Max(0, this.m / 15 - 10); // If necessary, switch to the dense representation if (this.sparseMaxElements <= 0) { SwitchToDenseRepresentation(); } // if DirectCount is not null, populate the HLL lookup with its elements. This allows serialization to include only directCount if (this.directCount != null) { // since we are re-initializing the object, we need to reset isSparse to true and sparse lookup isSparse = true; this.lookupSparse = new Dictionary <ushort, byte>(); foreach (ulong element in this.directCount) { AddElementHash(element); } } else { this.directCount = null; } }
/// <summary> /// Creates state for an empty CardinalityEstimator : DirectCount and LookupSparse are empty, LookupDense is null. /// </summary> /// <param name="b"><see cref="CardinalityEstimator(int, HashFunctionId)" /></param> /// <param name="hashFunctionId"><see cref="CardinalityEstimator(int, HashFunctionId)" /></param> private static CardinalityEstimatorState CreateEmptyState(int b, HashFunctionId hashFunctionId) { if (b < 4 || b > 16) { throw new ArgumentOutOfRangeException("b", b, "Accuracy out of range, legal range is 4 <= BitsPerIndex <= 16"); } return(new CardinalityEstimatorState { BitsPerIndex = b, DirectCount = new HashSet <ulong>(), IsSparse = true, LookupSparse = new Dictionary <ushort, byte>(), LookupDense = null, HashFunctionId = hashFunctionId, CountAdditions = 0, }); }
/// <summary> /// C'tor /// </summary> /// <param name="b"> /// Number of bits determining accuracy and memory consumption, in the range [4, 16] (higher = greater accuracy and memory usage). /// For large cardinalities, the standard error is 1.04 * 2^(-b/2), and the memory consumption is bounded by 2^b kilobytes. /// The default value of 14 typically yields 3% error or less across the entire range of cardinalities (usually much less), /// and uses up to ~16kB of memory. b=4 yields less than ~100% error and uses less than 1kB. b=16 uses up to ~64kB and usually yields 1% /// error or less /// </param> /// <param name="hashFunctionId">Type of hash function to use. Default is Murmur3, and FNV-1a is provided for legacy support</param> public CardinalityEstimator(int b = 14, HashFunctionId hashFunctionId = HashFunctionId.Murmur3) : this(CreateEmptyState(b, hashFunctionId)) { }
/// <summary> /// C'tor /// </summary> /// <param name="b"> /// Number of bits determining accuracy and memory consumption, in the range [4, 16] (higher = greater accuracy and memory usage). /// For large cardinalities, the standard error is 1.04 * 2^(-b/2), and the memory consumption is bounded by 2^b kilobytes. /// The default value of 14 typically yields 3% error or less across the entire range of cardinalities (usually much less), /// and uses up to ~16kB of memory. b=4 yields less than ~100% error and uses less than 1kB. b=16 uses up to ~64kB and usually yields 1% /// error or less /// </param> /// <param name="hashFunctionId">Type of hash function to use. Default is Murmur3, and FNV-1a is provided for legacy support</param> /// <param name="useDirectCounting"> /// True if direct count should be used for up to <see cref="DirectCounterMaxElements"/> elements. /// False if direct count should be avoided and use always estimation, even for low cardinalities. /// </param> public CardinalityEstimator(int b = 14, HashFunctionId hashFunctionId = HashFunctionId.Murmur3, bool useDirectCounting = true) : this(CreateEmptyState(b, hashFunctionId, useDirectCounting)) { }