/// <summary> /// Decode the hybrid estimator data instances. /// </summary> /// <typeparam name="TEntity">The type of the entity</typeparam> /// <typeparam name="TId">The type of the entity identifier</typeparam> /// <typeparam name="TCount">The type of the occurence count for the Bloom filter.</typeparam> /// <param name="estimator">The estimator</param> /// <param name="otherEstimatorData">The other estimator</param> /// <param name="configuration">Configuration</param> /// <param name="destructive">When <c>true</c> the values of <paramref name="estimator"/> will be altered rendering it useless, otherwise <c>false</c></param> /// <returns>An estimate of the difference between two sets based upon the estimators.</returns> internal static long?Decode <TEntity, TId, TCount>(this IHybridEstimatorData <int, TCount> estimator, IHybridEstimatorData <int, TCount> otherEstimatorData, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration, bool destructive = false) where TCount : struct where TId : struct { if (estimator == null && otherEstimatorData == null) { return(0L); } if (estimator == null || estimator.ItemCount <= 0L) { return(otherEstimatorData.ItemCount); } if (otherEstimatorData == null || otherEstimatorData.ItemCount <= 0) { return(estimator.ItemCount); } var decodeFactor = Math.Max(estimator.StrataEstimator?.DecodeCountFactor ?? 1.0D, otherEstimatorData.StrataEstimator?.DecodeCountFactor ?? 1.0D); var strataDecode = estimator .StrataEstimator .Decode(otherEstimatorData.StrataEstimator, configuration, estimator.StrataEstimator.StrataCount, destructive); if (!strataDecode.HasValue) { return(null); } var similarity = estimator.BitMinwiseEstimator?.Similarity(otherEstimatorData.BitMinwiseEstimator); if (similarity.HasValue) { strataDecode += (long)(decodeFactor * ((1 - similarity) / (1 + similarity)) * (estimator.BitMinwiseEstimator.ItemCount + otherEstimatorData.BitMinwiseEstimator.ItemCount)); } var strataMin = Math.Min( otherEstimatorData.StrataEstimator?.StrataCount ?? 0, estimator.StrataEstimator?.StrataCount ?? 0); var decodedItemCount = estimator.StrataEstimator.StrataItemCount(strataMin) + (similarity.HasValue ?(estimator.BitMinwiseEstimator?.ItemCount ?? 0L) : 0L) + otherEstimatorData.StrataEstimator.StrataItemCount(strataMin) + (similarity.HasValue ? (otherEstimatorData.BitMinwiseEstimator?.ItemCount ?? 0L) : 0L); if (decodedItemCount > 0) { //assume differences for the items counted, but not in the strata estimator or bit minwise estimator, contribute proportionally. strataDecode = (long)Math.Ceiling(1.0D * strataDecode.Value * (estimator.ItemCount + otherEstimatorData.ItemCount) / decodedItemCount); } //use upperbound on set difference. return(Math.Min(strataDecode.Value, estimator.ItemCount + otherEstimatorData.ItemCount)); }
/// <summary> /// Rehydrate the hybrid estimator /// </summary> /// <param name="data">The data to restore</param> /// <remarks>This rehydrate is lossy, since it can't restore the bit minwise estimator.</remarks> public void Rehydrate(IHybridEstimatorData <int, TCount> data) { if (data == null) { return; } _minwiseEstimator = null; _strataEstimator.Rehydrate(data.StrataEstimator); _minwiseReplacementCount = Math.Max(0, data.ItemCount - (_strataEstimator.ItemCount + (_minwiseEstimator?.ItemCount ?? 0L))); }
/// <summary> /// Decode the given hybrid estimator data. /// </summary> /// <param name="estimator">The estimator for the other set.</param> /// <param name="destructive">When <c>true</c> the values in this estimator will be altered and rendered useless, else <c>false</c>.</param> /// <returns>An estimate of the number of differences between the two sets that the estimators are based upon.</returns> public long?Decode(IHybridEstimatorData <int, TCount> estimator, bool destructive = false) { if (estimator == null) { return(ItemCount); } IHybridEstimator <TEntity, int, TCount> self = this; return(self .Extract() .Decode(estimator, _configuration)); }
/// <summary> /// Create an estimator that matches the given <paramref name="data"/> estimator. /// </summary> /// <typeparam name="TEntity">The type of the entity</typeparam> /// <typeparam name="TId">The type of the identifier</typeparam> /// <typeparam name="TCount">The type of the occurence count</typeparam> /// <param name="data">The estimator data to match</param> /// <param name="configuration">The Bloom filter configuration</param> /// <param name="setSize">The (estimated) size of the set to add to the estimator.</param> /// <returns>An estimator</returns> public IHybridEstimator <TEntity, int, TCount> CreateMatchingEstimator <TEntity, TId, TCount>( IHybridEstimatorData <int, TCount> data, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration, long setSize) where TCount : struct where TId : struct { var estimator = new HybridEstimator <TEntity, TId, TCount>( data.StrataEstimator.BlockSize, data.StrataEstimator.StrataCount, configuration, fixedBlockSize: true) { DecodeCountFactor = data.StrataEstimator.DecodeCountFactor }; if (data.BitMinwiseEstimator != null) { estimator.Initialize(setSize, data.BitMinwiseEstimator.BitSize, data.BitMinwiseEstimator.HashCount); } return(estimator); }