/// <summary>
 ///     Creates a hash function with the implementation id <paramref name="id" />
 /// </summary>
 /// <param name="id">Identifies a particular implementation of a hash function</param>
 /// <returns>The relevant hash function implementation</returns>
 /// <remarks>This method instantiates a new instance on each call. Make sure to reuse instances when appropriate</remarks>
 internal static IHashFunction GetHashFunction(HashFunctionId id)
 {
     switch (id)
     {
         case HashFunctionId.Murmur3:
             return new Murmur3();
         case HashFunctionId.Fnv1A:
             return new Fnv1A();
         default:
             throw new NotImplementedException($"Support not implemented for hash function of type {id}");
     }
 }
示例#2
0
        /// <summary>
        ///     Creates a hash function with the implementation id <paramref name="id" />
        /// </summary>
        /// <param name="id">Identifies a particular implementation of a hash function</param>
        /// <returns>The relevant hash function implementation</returns>
        /// <remarks>This method instantiates a new instance on each call. Make sure to reuse instances when appropriate</remarks>
        internal static IHashFunction GetHashFunction(HashFunctionId id)
        {
            switch (id)
            {
            case HashFunctionId.Murmur3:
                return(new Murmur3());

            case HashFunctionId.Fnv1A:
                return(new Fnv1A());

            default:
                throw new NotImplementedException(string.Format("Support not implemented for hash function of type {0}", id));
            }
        }
        /// <summary>
        ///     Creates a CardinalityEstimator with the given <paramref name="state" />
        /// </summary>
        internal CardinalityEstimator(CardinalityEstimatorState state)
        {
            this.bitsPerIndex = state.BitsPerIndex;
            this.bitsForHll   = 64 - this.bitsPerIndex;
            this.m            = (int)Math.Pow(2, this.bitsPerIndex);
            this.alphaM       = GetAlphaM(this.m);
            this.subAlgorithmSelectionThreshold = GetSubAlgorithmSelectionThreshold(this.bitsPerIndex);

            // Init the hash function
            this.hashFunctionId = state.HashFunctionId;
            this.hashFunction   = HashFunctionFactory.GetHashFunction(this.hashFunctionId);

            // Init the direct count
            this.directCount = state.DirectCount != null ? new HashSet <ulong>(state.DirectCount) : null;

            // Init the sparse representation
            this.isSparse       = state.IsSparse;
            this.lookupSparse   = state.LookupSparse != null ? new Dictionary <ushort, byte>(state.LookupSparse) : null;
            this.lookupDense    = state.LookupDense;
            this.CountAdditions = state.CountAdditions;

            // Each element in the sparse representation takes 15 bytes, and there is some constant overhead
            this.sparseMaxElements = Math.Max(0, this.m / 15 - 10);
            // If necessary, switch to the dense representation
            if (this.sparseMaxElements <= 0)
            {
                SwitchToDenseRepresentation();
            }

            // if DirectCount is not null, populate the HLL lookup with its elements.  This allows serialization to include only directCount
            if (this.directCount != null)
            {
                // since we are re-initializing the object, we need to reset isSparse to true and sparse lookup
                isSparse          = true;
                this.lookupSparse = new Dictionary <ushort, byte>();
                foreach (ulong element in this.directCount)
                {
                    AddElementHash(element);
                }
            }
            else
            {
                this.directCount = null;
            }
        }
        /// <summary>
        ///     Creates state for an empty CardinalityEstimator : DirectCount and LookupSparse are empty, LookupDense is null.
        /// </summary>
        /// <param name="b"><see cref="CardinalityEstimator(int, HashFunctionId)" /></param>
        /// <param name="hashFunctionId"><see cref="CardinalityEstimator(int, HashFunctionId)" /></param>
        private static CardinalityEstimatorState CreateEmptyState(int b, HashFunctionId hashFunctionId)
        {
            if (b < 4 || b > 16)
            {
                throw new ArgumentOutOfRangeException("b", b, "Accuracy out of range, legal range is 4 <= BitsPerIndex <= 16");
            }

            return(new CardinalityEstimatorState
            {
                BitsPerIndex = b,
                DirectCount = new HashSet <ulong>(),
                IsSparse = true,
                LookupSparse = new Dictionary <ushort, byte>(),
                LookupDense = null,
                HashFunctionId = hashFunctionId,
                CountAdditions = 0,
            });
        }
 /// <summary>
 ///     C'tor
 /// </summary>
 /// <param name="b">
 ///     Number of bits determining accuracy and memory consumption, in the range [4, 16] (higher = greater accuracy and memory usage).
 ///     For large cardinalities, the standard error is 1.04 * 2^(-b/2), and the memory consumption is bounded by 2^b kilobytes.
 ///     The default value of 14 typically yields 3% error or less across the entire range of cardinalities (usually much less),
 ///     and uses up to ~16kB of memory.  b=4 yields less than ~100% error and uses less than 1kB. b=16 uses up to ~64kB and usually yields 1%
 ///     error or less
 /// </param>
 /// <param name="hashFunctionId">Type of hash function to use. Default is Murmur3, and FNV-1a is provided for legacy support</param>
 public CardinalityEstimator(int b = 14, HashFunctionId hashFunctionId = HashFunctionId.Murmur3) : this(CreateEmptyState(b, hashFunctionId))
 {
 }
 /// <summary>
 ///     C'tor
 /// </summary>
 /// <param name="b">
 ///     Number of bits determining accuracy and memory consumption, in the range [4, 16] (higher = greater accuracy and memory usage).
 ///     For large cardinalities, the standard error is 1.04 * 2^(-b/2), and the memory consumption is bounded by 2^b kilobytes.
 ///     The default value of 14 typically yields 3% error or less across the entire range of cardinalities (usually much less),
 ///     and uses up to ~16kB of memory.  b=4 yields less than ~100% error and uses less than 1kB. b=16 uses up to ~64kB and usually yields 1%
 ///     error or less
 /// </param>
 /// <param name="hashFunctionId">Type of hash function to use. Default is Murmur3, and FNV-1a is provided for legacy support</param>
 /// <param name="useDirectCounting">
 ///     True if direct count should be used for up to <see cref="DirectCounterMaxElements"/> elements.
 ///     False if direct count should be avoided and use always estimation, even for low cardinalities.
 /// </param>
 public CardinalityEstimator(int b = 14, HashFunctionId hashFunctionId = HashFunctionId.Murmur3, bool useDirectCounting = true) : this(CreateEmptyState(b, hashFunctionId, useDirectCounting))
 {
 }