/// <summary> /// Add an item to the estimator. /// </summary> /// <param name="item">The entity to add</param> /// <remarks>based upon the strata, the value is either added to an IBF or to the b-bit minwise estimator.</remarks> public void Add(TEntity item) { var idHash = _configuration.IdHash(_configuration.GetId(item)); var entityHash = _configuration.EntityHash(item); if (!_strataEstimator.ConditionalAdd(idHash, entityHash)) { if (_minwiseEstimator == null) { _minwiseReplacementCount++; } else { _minwiseEstimator.Add(new KeyValuePair <int, int>(idHash, entityHash)); } } }
/// <summary> /// Constructor /// </summary> /// <param name="configuration">The configuration</param> /// <param name="bitSize">The number of bits to store per hash</param> /// <param name="hashCount">The number of hash functions to use.</param> /// <param name="capacity">The capacity (should be a close approximation of the number of elements added)</param> /// <remarks>By using bitSize = 1 or bitSize = 2, the accuracy is decreased, thus the hashCount needs to be increased. However, when resemblance is not too small, for example > 0.5, bitSize = 1 can yield similar results as bitSize = 64 with only 3 times the hash count.</remarks> public BitMinwiseHashEstimator( IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration, byte bitSize, int hashCount, long capacity) { _hashCount = hashCount; _configuration = configuration; _hashFunctions = GenerateHashes(); _bitSize = bitSize; _capacity = _configuration.FoldingStrategy?.ComputeFoldableSize(capacity, 0) ?? capacity; _entityHash = e => unchecked ((int)(ulong)(_configuration.EntityHash(e) + configuration.IdHash(_configuration.GetId(e)))); _slots = new Lazy <int[]>(() => GetMinHashSlots(_hashCount, _capacity)); }