/// <summary> /// Get the Bloom filter for the given strata /// </summary> /// <typeparam name="TCount">The type for the occurence count</typeparam> /// <typeparam name="TId">Type of the identifier</typeparam> /// <param name="estimatorData"></param> /// <param name="strata"></param> /// <returns></returns> /// <remarks>Some serializers (*cough* protobuf) simply drop null values from the array. This is mostly harmless work-around.</remarks> internal static IInvertibleBloomFilterData <TId, int, TCount> GetFilterForStrata <TId, TCount>( this IStrataEstimatorData <TId, TCount> estimatorData, int strata) where TCount : struct where TId : struct { if (estimatorData?.BloomFilters == null) { return(null); } var indexes = estimatorData.BloomFilterStrataIndexes; if (indexes != null && indexes.Length > 0) { for (var j = indexes.Length - 1; j >= 0; j--) { if (indexes[j] == strata) { return(estimatorData.BloomFilters[j]); } } } else if (strata < estimatorData.BloomFilters.Length) { return(estimatorData.BloomFilters[strata]); } return(null); }
/// <summary> /// Decode the given strata estimators. /// </summary> /// <typeparam name="TEntity">The entity type</typeparam> /// <typeparam name="TId">The type of the entity identifier</typeparam> /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam> /// <param name="data">Estimator data</param> /// <param name="otherEstimatorData">The other estimate</param> /// <param name="configuration">The Bloom filter configuration</param> /// <param name="maxStrata">The maximum strata</param> /// <param name="destructive">When <c>true</c> the <paramref name="data"/> will be altered and no longer usable, else <c>false</c></param> /// <returns></returns> internal static long?Decode <TEntity, TId, TCount>(this IStrataEstimatorData <int, TCount> data, IStrataEstimatorData <int, TCount> otherEstimatorData, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration, byte maxStrata, bool destructive = false) where TId : struct where TCount : struct { if (data == null || otherEstimatorData == null) { return(null); } var strataConfig = configuration.ConvertToEstimatorConfiguration(); var decodeFactor = Math.Max(data.DecodeCountFactor, otherEstimatorData.DecodeCountFactor); var hasDecoded = false; var setA = new HashSet <int>(); var minStrata = Math.Min(data.StrataCount, otherEstimatorData.StrataCount); for (var i = minStrata - 1; i >= 0; i--) { var ibf = data.GetFilterForStrata(i); var estimatorIbf = i >= otherEstimatorData.StrataCount ? null : otherEstimatorData.GetFilterForStrata(i); if (ibf == null && estimatorIbf == null) { if (i < maxStrata) { hasDecoded = true; } continue; } var decodeResult = ibf.SubtractAndDecode(estimatorIbf, strataConfig, setA, setA, setA, destructive); if (decodeResult != true) { if (!hasDecoded) { return(null); } //compensate for the fact that a failed decode can still contribute counts by lowering the i+1 as more decodes succeeded var addedFactor = decodeResult.HasValue ? 1 / Math.Pow(2, data.StrataCount - (i + 1)) : 1; return((long)(Math.Pow(2, i + addedFactor) * decodeFactor * setA.Count)); } hasDecoded = true; } if (!hasDecoded) { return(null); } return((long)(decodeFactor * setA.Count)); }
/// <summary> /// Restore the strata estimator from the given data /// </summary> /// <param name="data"></param> public void Rehydrate(IStrataEstimatorData <int, TCount> data) { if (data == null) { return; } BlockSize = data.BlockSize; MaxStrata = data.StrataCount; ErrorRate = data.ErrorRate; HashFunctionCount = data.HashFunctionCount; DecodeCountFactor = data.DecodeCountFactor; CreateFilters(data); }
/// <summary> /// Compress the strata estimator data. /// </summary> /// <typeparam name="TEntity"></typeparam> /// <typeparam name="TId"></typeparam> /// <typeparam name="TCount"></typeparam> /// <param name="estimatorData"></param> /// <param name="configuration"></param> /// <returns></returns> internal static StrataEstimatorData <int, TCount> Compress <TEntity, TId, TCount>( this IStrataEstimatorData <int, TCount> estimatorData, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration) where TCount : struct where TId : struct { if (configuration?.FoldingStrategy == null || estimatorData == null) { return(null); } var fold = configuration.FoldingStrategy?.FindCompressionFactor(configuration, estimatorData.BlockSize, estimatorData.BlockSize, estimatorData.ItemCount); var res = fold.HasValue ? estimatorData.Fold(configuration, fold.Value) : null; return(res); }
/// <summary> /// Create filters /// </summary> /// <param name="estimatorData">Filter data to rehydrate.</param> private void CreateFilters(IStrataEstimatorData <int, TCount> estimatorData = null) { var configuration = Configuration.ConvertToEstimatorConfiguration(); HashFunctionCount = configuration.BestHashFunctionCount(BlockSize, ErrorRate); for (var idx = 0; idx < StrataFilters.Length; idx++) { if (idx >= MaxStrata) { StrataFilters[idx] = null; continue; } var filterData = estimatorData.GetFilterForStrata(idx); //lazily create Strata filters. StrataFilters[idx] = new Lazy <InvertibleBloomFilter <KeyValuePair <int, int>, int, TCount> >(() => { var res = new InvertibleBloomFilter <KeyValuePair <int, int>, int, TCount>(configuration); //capacity doesn't really matter, the capacity is basically the block size. res.Initialize(BlockSize, BlockSize, HashFunctionCount); res.Rehydrate(filterData); return(res); }); } }
/// <summary> /// Fold the strata estimator data. /// </summary> /// <typeparam name="TEntity"></typeparam> /// <typeparam name="TId"></typeparam> /// <typeparam name="TCount"></typeparam> /// <param name="estimatorData"></param> /// <param name="configuration"></param> /// <param name="factor"></param> /// <returns></returns> internal static StrataEstimatorData <int, TCount> Fold <TEntity, TId, TCount>( this IStrataEstimatorData <int, TCount> estimatorData, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration, uint factor) where TCount : struct where TId : struct { if (estimatorData?.BloomFilters == null) { return(null); } var filterConfig = configuration.ConvertToEstimatorConfiguration(); var res = new StrataEstimatorData <int, TCount> { BloomFilters = estimatorData.BloomFilters == null ? null : new InvertibleBloomFilterData <int, int, TCount> [estimatorData.BloomFilters.Length], BloomFilterStrataIndexes = estimatorData.BloomFilterStrataIndexes?.ToArray(), BlockSize = estimatorData.BlockSize / factor, DecodeCountFactor = estimatorData.DecodeCountFactor, StrataCount = estimatorData.StrataCount }; for (var j = 0L; j < res.BloomFilters.Length; j++) { estimatorData .BloomFilters[j] .SyncCompressionProviders(filterConfig); res.BloomFilters[j] = estimatorData .BloomFilters[j] .Fold(filterConfig, factor) .ConvertToBloomFilterData(filterConfig); } return(res); }
/// <summary> /// Intersect the given estimator data. /// </summary> /// <param name="estimator">Estimator data to intersect with.</param> /// <returns></returns> private void Intersect(IStrataEstimatorData <int, TCount> estimator, bool destructive = false) { Rehydrate(Extract() .Intersect(estimator, Configuration)); }
/// <summary> /// Decode the given estimator data. /// </summary> /// <param name="estimator">Estimator data to subtract.</param> /// <param name="destructive">When <c>true</c> the values in this estimator will be altered and rendered useless, else <c>false</c>.</param> /// <returns></returns> private long?Decode(IStrataEstimatorData <int, TCount> estimator, bool destructive = false) { return(Extract() .Decode(estimator, Configuration, MaxStrata, destructive)); }
/// <summary> /// Intersect the given strata estimators. /// </summary> /// <typeparam name="TEntity">The entity type</typeparam> /// <typeparam name="TId">The type of the entity identifier</typeparam> /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam> /// <param name="data">Estimator data</param> /// <param name="otherEstimatorData">The other estimate</param> /// <param name="configuration">The Bloom filter configuration</param> /// <param name="maxStrata">The maximum strata</param> /// <param name="destructive">When <c>true</c> the <paramref name="data"/> will be altered and no longer usable, else <c>false</c></param> /// <returns></returns> internal static StrataEstimatorData <int, TCount> Intersect <TEntity, TId, TCount>(this IStrataEstimatorData <int, TCount> data, IStrataEstimatorData <int, TCount> otherEstimatorData, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration) where TId : struct where TCount : struct { if (data == null && otherEstimatorData == null) { return(null); } if (data == null) { return(new StrataEstimatorData <int, TCount> { BlockSize = otherEstimatorData.BlockSize, DecodeCountFactor = otherEstimatorData.DecodeCountFactor, HashFunctionCount = otherEstimatorData.HashFunctionCount, StrataCount = otherEstimatorData.StrataCount }); } var strataConfig = configuration.ConvertToEstimatorConfiguration(); var fold = configuration.FoldingStrategy?.GetFoldFactors(data.BlockSize, otherEstimatorData?.BlockSize ?? data.BlockSize); var res = new StrataEstimatorData <int, TCount> { BlockSize = data.BlockSize / (fold?.Item1 ?? 1L), BloomFilterStrataIndexes = data.BloomFilterStrataIndexes, BloomFilters = data.BloomFilters?.Select(b => b.ConvertToBloomFilterData(strataConfig)).ToArray(), DecodeCountFactor = data.DecodeCountFactor, HashFunctionCount = data.HashFunctionCount, StrataCount = data.StrataCount }; var minStrata = Math.Min(data.StrataCount, otherEstimatorData.StrataCount); for (var i = minStrata - 1; i >= 0; i--) { var ibf = data.GetFilterForStrata(i); var estimatorIbf = i >= otherEstimatorData.StrataCount ? null : otherEstimatorData.GetFilterForStrata(i); if (ibf == null && estimatorIbf == null) { continue; } res.BloomFilters[i] = ibf.Intersect(strataConfig, estimatorIbf); } return(res); }