/// <summary> /// Fold the strata estimator data. /// </summary> /// <typeparam name="TEntity">The entity type</typeparam> /// <typeparam name="TId">The identifier type</typeparam> /// <typeparam name="TCount">The count type</typeparam> /// <param name="estimatorData"></param> /// <param name="configuration"></param> /// <param name="factor">The factor to fold by</param> /// <returns>The <paramref name="estimatorData"/> folded by <paramref name="factor"/>.</returns> internal static HybridEstimatorFullData <int, TCount> Fold <TEntity, TId, TCount>( this IHybridEstimatorFullData <int, TCount> estimatorData, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration, uint factor) where TCount : struct where TId : struct { if (estimatorData == null) { return(null); } var minWiseFold = Math.Max( 1L, configuration .FoldingStrategy? .GetAllFoldFactors(estimatorData.BitMinwiseEstimator?.Capacity ?? 1L) .OrderBy(f => f) .FirstOrDefault(f => f > factor) ?? 1L); return(new HybridEstimatorFullData <int, TCount> { ItemCount = estimatorData.ItemCount, BitMinwiseEstimator = estimatorData.BitMinwiseEstimator?.Fold((uint)minWiseFold), StrataEstimator = estimatorData.StrataEstimator?.Fold(configuration.ConvertToEstimatorConfiguration(), factor) }); }
/// <summary> /// Decode the given strata estimators. /// </summary> /// <typeparam name="TEntity">The entity type</typeparam> /// <typeparam name="TId">The type of the entity identifier</typeparam> /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam> /// <param name="data">Estimator data</param> /// <param name="otherEstimatorData">The other estimate</param> /// <param name="configuration">The Bloom filter configuration</param> /// <param name="maxStrata">The maximum strata</param> /// <param name="destructive">When <c>true</c> the <paramref name="data"/> will be altered and no longer usable, else <c>false</c></param> /// <returns></returns> internal static long?Decode <TEntity, TId, TCount>(this IStrataEstimatorData <int, TCount> data, IStrataEstimatorData <int, TCount> otherEstimatorData, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration, byte maxStrata, bool destructive = false) where TId : struct where TCount : struct { if (data == null || otherEstimatorData == null) { return(null); } var strataConfig = configuration.ConvertToEstimatorConfiguration(); var decodeFactor = Math.Max(data.DecodeCountFactor, otherEstimatorData.DecodeCountFactor); var hasDecoded = false; var setA = new HashSet <int>(); var minStrata = Math.Min(data.StrataCount, otherEstimatorData.StrataCount); for (var i = minStrata - 1; i >= 0; i--) { var ibf = data.GetFilterForStrata(i); var estimatorIbf = i >= otherEstimatorData.StrataCount ? null : otherEstimatorData.GetFilterForStrata(i); if (ibf == null && estimatorIbf == null) { if (i < maxStrata) { hasDecoded = true; } continue; } var decodeResult = ibf.SubtractAndDecode(estimatorIbf, strataConfig, setA, setA, setA, destructive); if (decodeResult != true) { if (!hasDecoded) { return(null); } //compensate for the fact that a failed decode can still contribute counts by lowering the i+1 as more decodes succeeded var addedFactor = decodeResult.HasValue ? 1 / Math.Pow(2, data.StrataCount - (i + 1)) : 1; return((long)(Math.Pow(2, i + addedFactor) * decodeFactor * setA.Count)); } hasDecoded = true; } if (!hasDecoded) { return(null); } return((long)(decodeFactor * setA.Count)); }
/// <summary> /// Intersect the given strata estimators. /// </summary> /// <typeparam name="TEntity">The entity type</typeparam> /// <typeparam name="TId">The type of the entity identifier</typeparam> /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam> /// <param name="data">Estimator data</param> /// <param name="otherEstimatorData">The other estimate</param> /// <param name="configuration">The Bloom filter configuration</param> /// <param name="maxStrata">The maximum strata</param> /// <param name="destructive">When <c>true</c> the <paramref name="data"/> will be altered and no longer usable, else <c>false</c></param> /// <returns></returns> internal static StrataEstimatorData <int, TCount> Intersect <TEntity, TId, TCount>(this IStrataEstimatorData <int, TCount> data, IStrataEstimatorData <int, TCount> otherEstimatorData, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration) where TId : struct where TCount : struct { if (data == null && otherEstimatorData == null) { return(null); } if (data == null) { return(new StrataEstimatorData <int, TCount> { BlockSize = otherEstimatorData.BlockSize, DecodeCountFactor = otherEstimatorData.DecodeCountFactor, HashFunctionCount = otherEstimatorData.HashFunctionCount, StrataCount = otherEstimatorData.StrataCount }); } var strataConfig = configuration.ConvertToEstimatorConfiguration(); var fold = configuration.FoldingStrategy?.GetFoldFactors(data.BlockSize, otherEstimatorData?.BlockSize ?? data.BlockSize); var res = new StrataEstimatorData <int, TCount> { BlockSize = data.BlockSize / (fold?.Item1 ?? 1L), BloomFilterStrataIndexes = data.BloomFilterStrataIndexes, BloomFilters = data.BloomFilters?.Select(b => b.ConvertToBloomFilterData(strataConfig)).ToArray(), DecodeCountFactor = data.DecodeCountFactor, HashFunctionCount = data.HashFunctionCount, StrataCount = data.StrataCount }; var minStrata = Math.Min(data.StrataCount, otherEstimatorData.StrataCount); for (var i = minStrata - 1; i >= 0; i--) { var ibf = data.GetFilterForStrata(i); var estimatorIbf = i >= otherEstimatorData.StrataCount ? null : otherEstimatorData.GetFilterForStrata(i); if (ibf == null && estimatorIbf == null) { continue; } res.BloomFilters[i] = ibf.Intersect(strataConfig, estimatorIbf); } return(res); }
/// <summary> /// Initialize /// </summary> /// <param name="capacity">The capacity (number of items to be added)</param> /// <param name="bitSize">The bit size for the bit minwise estimator</param> /// <param name="minWiseHashCount">The minwise hash count</param> public void Initialize( long capacity, byte bitSize, int minWiseHashCount) { var max = Math.Pow(2, _strataEstimator.StrataLimit); var minWiseCapacity = Math.Max( (uint)(capacity * (1 - (max - Math.Pow(2, _strataEstimator.StrataLimit - _strataEstimator.MaxStrata)) / max)), 1); if (_configuration.FoldingStrategy != null) { minWiseCapacity = (uint)_configuration.FoldingStrategy.ComputeFoldableSize(minWiseCapacity, 2); } _minwiseEstimator = new BitMinwiseHashEstimator <KeyValuePair <int, int>, int, TCount>( _configuration.ConvertToEstimatorConfiguration(), bitSize, minWiseHashCount, minWiseCapacity); _minwiseReplacementCount = 0L; }
/// <summary> /// Fold the strata estimator data. /// </summary> /// <typeparam name="TEntity"></typeparam> /// <typeparam name="TId"></typeparam> /// <typeparam name="TCount"></typeparam> /// <param name="estimatorData"></param> /// <param name="configuration"></param> /// <param name="factor"></param> /// <returns></returns> internal static StrataEstimatorData <int, TCount> Fold <TEntity, TId, TCount>( this IStrataEstimatorData <int, TCount> estimatorData, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration, uint factor) where TCount : struct where TId : struct { if (estimatorData?.BloomFilters == null) { return(null); } var filterConfig = configuration.ConvertToEstimatorConfiguration(); var res = new StrataEstimatorData <int, TCount> { BloomFilters = estimatorData.BloomFilters == null ? null : new InvertibleBloomFilterData <int, int, TCount> [estimatorData.BloomFilters.Length], BloomFilterStrataIndexes = estimatorData.BloomFilterStrataIndexes?.ToArray(), BlockSize = estimatorData.BlockSize / factor, DecodeCountFactor = estimatorData.DecodeCountFactor, StrataCount = estimatorData.StrataCount }; for (var j = 0L; j < res.BloomFilters.Length; j++) { estimatorData .BloomFilters[j] .SyncCompressionProviders(filterConfig); res.BloomFilters[j] = estimatorData .BloomFilters[j] .Fold(filterConfig, factor) .ConvertToBloomFilterData(filterConfig); } return(res); }