/// <summary>
        /// Add an item to the estimator.
        /// </summary>
        /// <param name="item">The entity to add</param>
        /// <remarks>based upon the strata, the value is either added to an IBF or to the b-bit minwise estimator.</remarks>
        public void Add(TEntity item)
        {
            var idHash     = _configuration.IdHash(_configuration.GetId(item));
            var entityHash = _configuration.EntityHash(item);

            if (!_strataEstimator.ConditionalAdd(idHash, entityHash))
            {
                if (_minwiseEstimator == null)
                {
                    _minwiseReplacementCount++;
                }
                else
                {
                    _minwiseEstimator.Add(new KeyValuePair <int, int>(idHash, entityHash));
                }
            }
        }
Example #2
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="configuration">The configuration</param>
 /// <param name="bitSize">The number of bits to store per hash</param>
 /// <param name="hashCount">The number of hash functions to use.</param>
 /// <param name="capacity">The capacity (should be a close approximation of the number of elements added)</param>
 /// <remarks>By using bitSize = 1 or bitSize = 2, the accuracy is decreased, thus the hashCount needs to be increased. However, when resemblance is not too small, for example > 0.5, bitSize = 1 can yield similar results as bitSize = 64 with only 3 times the hash count.</remarks>
 public BitMinwiseHashEstimator(
     IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration,
     byte bitSize,
     int hashCount,
     long capacity)
 {
     _hashCount     = hashCount;
     _configuration = configuration;
     _hashFunctions = GenerateHashes();
     _bitSize       = bitSize;
     _capacity      = _configuration.FoldingStrategy?.ComputeFoldableSize(capacity, 0) ?? capacity;
     _entityHash    = e => unchecked ((int)(ulong)(_configuration.EntityHash(e) + configuration.IdHash(_configuration.GetId(e))));
     _slots         = new Lazy <int[]>(() => GetMinHashSlots(_hashCount, _capacity));
 }