Beispiel #1
0
 /// <summary>
 /// The constructor for deserialization.
 /// </summary>
 public HyperLogLogCore(SerializationInfo info, StreamingContext context)
 {
     _b         = info.GetByte("b");
     _m         = 1 << _b;
     _alpha     = HyperLogLogInternals.CalculateConstantAlphaCorrectionFactor(_b);
     _registers = (byte[])info.GetValue("registers", typeof(byte[]));
 }
Beispiel #2
0
        /// <summary>
        /// Adds object's hash to the structure.
        /// Hash needs to be produced by a 'good' hash function.
        /// In practice it does not have to be cryptographycally secure.
        /// </summary>
        public void AddHash(ulong hash)
        {
            uint registerIndex         = HyperLogLogInternals.CalculateRegisterIndex(hash, _b);
            byte proposedRegisterValue = HyperLogLogInternals.PositionOfLeftMostOne(hash, _b);
            byte newValueOfRegister    = Math.Max(proposedRegisterValue, _registers[registerIndex]);

            _registers[registerIndex] = newValueOfRegister;
        }
Beispiel #3
0
        /// <summary>
        /// Creates HyperLogLogCore instance.
        /// </summary>
        /// <param name="b">
        /// Number of bits of hash used to calculate register index.
        /// There will be 2^b registers.
        /// The bigger the value of b the better accuraccy of count will be achieved.
        /// On the other hand, the greater the value of b, more memory will be used for the registers.
        /// </param>
        public HyperLogLogCore(byte b)
        {
            if (b < 4 || b > 16)
            {
                throw new ArgumentOutOfRangeException(nameof(b), "Parameter 'b' must have value between 4 inclusive and 16 inclusive");
            }

            _b         = b;
            _m         = 1 << _b;
            _registers = new byte[_m];
            _alpha     = HyperLogLogInternals.CalculateConstantAlphaCorrectionFactor(_b);
        }
Beispiel #4
0
        /// <summary>
        /// Returns estimated count of distict hashes added to HyperLogLogCore.
        /// There is not caching, estimated count is calculated on every call of this method.
        /// </summary>
        public int CalculateEstimatedCount()
        {
            double z = 0;

            for (int j = 0; j < _m; j++)
            {
                z += 1.0 / (1 << _registers[j]);
            }
            z = 1 / z;

            double rawEstimate = _alpha * _m * _m * z;

            return(HyperLogLogInternals.AdjustForSmallOrBigRanges(rawEstimate, _registers));
        }