Exemplo n.º 1
0
        /// <summary>
        /// Decode the hybrid estimator data instances.
        /// </summary>
        /// <typeparam name="TEntity">The type of the entity</typeparam>
        /// <typeparam name="TId">The type of the entity identifier</typeparam>
        /// <typeparam name="TCount">The type of the occurence count for the Bloom filter.</typeparam>
        /// <param name="estimator">The estimator</param>
        /// <param name="otherEstimatorData">The other estimator</param>
        /// <param name="configuration">Configuration</param>
        /// <param name="destructive">When <c>true</c> the values of <paramref name="estimator"/> will be altered rendering it useless, otherwise <c>false</c></param>
        /// <returns>An estimate of the difference between two sets based upon the estimators.</returns>
        internal static long?Decode <TEntity, TId, TCount>(this IHybridEstimatorData <int, TCount> estimator,
                                                           IHybridEstimatorData <int, TCount> otherEstimatorData,
                                                           IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration,
                                                           bool destructive = false)
            where TCount : struct
            where TId : struct
        {
            if (estimator == null &&
                otherEstimatorData == null)
            {
                return(0L);
            }
            if (estimator == null ||
                estimator.ItemCount <= 0L)
            {
                return(otherEstimatorData.ItemCount);
            }
            if (otherEstimatorData == null ||
                otherEstimatorData.ItemCount <= 0)
            {
                return(estimator.ItemCount);
            }
            var decodeFactor = Math.Max(estimator.StrataEstimator?.DecodeCountFactor ?? 1.0D,
                                        otherEstimatorData.StrataEstimator?.DecodeCountFactor ?? 1.0D);
            var strataDecode = estimator
                               .StrataEstimator
                               .Decode(otherEstimatorData.StrataEstimator, configuration, estimator.StrataEstimator.StrataCount, destructive);

            if (!strataDecode.HasValue)
            {
                return(null);
            }
            var similarity = estimator.BitMinwiseEstimator?.Similarity(otherEstimatorData.BitMinwiseEstimator);

            if (similarity.HasValue)
            {
                strataDecode += (long)(decodeFactor * ((1 - similarity) / (1 + similarity)) *
                                       (estimator.BitMinwiseEstimator.ItemCount + otherEstimatorData.BitMinwiseEstimator.ItemCount));
            }
            var strataMin = Math.Min(
                otherEstimatorData.StrataEstimator?.StrataCount ?? 0,
                estimator.StrataEstimator?.StrataCount ?? 0);

            var decodedItemCount = estimator.StrataEstimator.StrataItemCount(strataMin) +
                                   (similarity.HasValue ?(estimator.BitMinwiseEstimator?.ItemCount ?? 0L) : 0L) +
                                   otherEstimatorData.StrataEstimator.StrataItemCount(strataMin) +
                                   (similarity.HasValue ? (otherEstimatorData.BitMinwiseEstimator?.ItemCount ?? 0L) : 0L);

            if (decodedItemCount > 0)
            {
                //assume differences for the items counted, but not in the strata estimator or bit minwise estimator, contribute proportionally.
                strataDecode = (long)Math.Ceiling(1.0D * strataDecode.Value * (estimator.ItemCount + otherEstimatorData.ItemCount) / decodedItemCount);
            }
            //use upperbound on set difference.
            return(Math.Min(strataDecode.Value, estimator.ItemCount + otherEstimatorData.ItemCount));
        }
 /// <summary>
 /// Rehydrate the hybrid estimator
 /// </summary>
 /// <param name="data">The data to restore</param>
 /// <remarks>This rehydrate is lossy, since it can't restore the bit minwise estimator.</remarks>
 public void Rehydrate(IHybridEstimatorData <int, TCount> data)
 {
     if (data == null)
     {
         return;
     }
     _minwiseEstimator = null;
     _strataEstimator.Rehydrate(data.StrataEstimator);
     _minwiseReplacementCount = Math.Max(0, data.ItemCount - (_strataEstimator.ItemCount + (_minwiseEstimator?.ItemCount ?? 0L)));
 }
        /// <summary>
        /// Decode the given hybrid estimator data.
        /// </summary>
        /// <param name="estimator">The estimator for the other set.</param>
        /// <param name="destructive">When <c>true</c> the values in this estimator will be altered and rendered useless, else <c>false</c>.</param>
        /// <returns>An estimate of the number of differences between the two sets that the estimators are based upon.</returns>
        public long?Decode(IHybridEstimatorData <int, TCount> estimator,
                           bool destructive = false)
        {
            if (estimator == null)
            {
                return(ItemCount);
            }
            IHybridEstimator <TEntity, int, TCount> self = this;

            return(self
                   .Extract()
                   .Decode(estimator, _configuration));
        }
        /// <summary>
        /// Create an estimator that matches the given <paramref name="data"/> estimator.
        /// </summary>
        /// <typeparam name="TEntity">The type of the entity</typeparam>
        /// <typeparam name="TId">The type of the identifier</typeparam>
        /// <typeparam name="TCount">The type of the occurence count</typeparam>
        /// <param name="data">The estimator data to match</param>
        /// <param name="configuration">The Bloom filter configuration</param>
        /// <param name="setSize">The (estimated) size of the set to add to the estimator.</param>
        /// <returns>An estimator</returns>
        public IHybridEstimator <TEntity, int, TCount> CreateMatchingEstimator <TEntity, TId, TCount>(
            IHybridEstimatorData <int, TCount> data,
            IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration,
            long setSize)
            where TCount : struct
            where TId : struct
        {
            var estimator = new HybridEstimator <TEntity, TId, TCount>(
                data.StrataEstimator.BlockSize,
                data.StrataEstimator.StrataCount,
                configuration,
                fixedBlockSize: true)
            {
                DecodeCountFactor = data.StrataEstimator.DecodeCountFactor
            };

            if (data.BitMinwiseEstimator != null)
            {
                estimator.Initialize(setSize, data.BitMinwiseEstimator.BitSize, data.BitMinwiseEstimator.HashCount);
            }
            return(estimator);
        }