コード例 #1
0
        /// <summary>
        /// Fold the strata estimator data.
        /// </summary>
        /// <typeparam name="TEntity">The entity type</typeparam>
        /// <typeparam name="TId">The identifier type</typeparam>
        /// <typeparam name="TCount">The count type</typeparam>
        /// <param name="estimatorData"></param>
        /// <param name="configuration"></param>
        /// <param name="factor">The factor to fold by</param>
        /// <returns>The <paramref name="estimatorData"/> folded by <paramref name="factor"/>.</returns>
        internal static HybridEstimatorFullData <int, TCount> Fold <TEntity, TId, TCount>(
            this IHybridEstimatorFullData <int, TCount> estimatorData,
            IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration,
            uint factor)
            where TCount : struct
            where TId : struct
        {
            if (estimatorData == null)
            {
                return(null);
            }
            var minWiseFold = Math.Max(
                1L,
                configuration
                .FoldingStrategy?
                .GetAllFoldFactors(estimatorData.BitMinwiseEstimator?.Capacity ?? 1L)
                .OrderBy(f => f)
                .FirstOrDefault(f => f > factor) ?? 1L);

            return(new HybridEstimatorFullData <int, TCount>
            {
                ItemCount = estimatorData.ItemCount,
                BitMinwiseEstimator = estimatorData.BitMinwiseEstimator?.Fold((uint)minWiseFold),
                StrataEstimator =
                    estimatorData.StrataEstimator?.Fold(configuration.ConvertToEstimatorConfiguration(), factor)
            });
        }
コード例 #2
0
        /// <summary>
        /// Decode the given strata estimators.
        /// </summary>
        /// <typeparam name="TEntity">The entity type</typeparam>
        /// <typeparam name="TId">The type of the entity identifier</typeparam>
        /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam>
        /// <param name="data">Estimator data</param>
        /// <param name="otherEstimatorData">The other estimate</param>
        /// <param name="configuration">The Bloom filter configuration</param>
        /// <param name="maxStrata">The maximum strata</param>
        /// <param name="destructive">When <c>true</c> the <paramref name="data"/> will be altered and no longer usable, else <c>false</c></param>
        /// <returns></returns>
        internal static long?Decode <TEntity, TId, TCount>(this IStrataEstimatorData <int, TCount> data,
                                                           IStrataEstimatorData <int, TCount> otherEstimatorData,
                                                           IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration,
                                                           byte maxStrata,
                                                           bool destructive = false)
            where TId : struct
            where TCount : struct
        {
            if (data == null || otherEstimatorData == null)
            {
                return(null);
            }
            var strataConfig = configuration.ConvertToEstimatorConfiguration();
            var decodeFactor = Math.Max(data.DecodeCountFactor, otherEstimatorData.DecodeCountFactor);
            var hasDecoded   = false;
            var setA         = new HashSet <int>();
            var minStrata    = Math.Min(data.StrataCount, otherEstimatorData.StrataCount);

            for (var i = minStrata - 1; i >= 0; i--)
            {
                var ibf          = data.GetFilterForStrata(i);
                var estimatorIbf = i >= otherEstimatorData.StrataCount
                    ? null
                    : otherEstimatorData.GetFilterForStrata(i);
                if (ibf == null &&
                    estimatorIbf == null)
                {
                    if (i < maxStrata)
                    {
                        hasDecoded = true;
                    }
                    continue;
                }
                var decodeResult = ibf.SubtractAndDecode(estimatorIbf, strataConfig, setA, setA, setA, destructive);
                if (decodeResult != true)
                {
                    if (!hasDecoded)
                    {
                        return(null);
                    }
                    //compensate for the fact that a failed decode can still contribute counts by lowering the i+1 as more decodes succeeded
                    var addedFactor = decodeResult.HasValue ? 1 / Math.Pow(2, data.StrataCount - (i + 1)) : 1;
                    return((long)(Math.Pow(2, i + addedFactor) * decodeFactor * setA.Count));
                }
                hasDecoded = true;
            }
            if (!hasDecoded)
            {
                return(null);
            }
            return((long)(decodeFactor * setA.Count));
        }
コード例 #3
0
        /// <summary>
        /// Intersect the given strata estimators.
        /// </summary>
        /// <typeparam name="TEntity">The entity type</typeparam>
        /// <typeparam name="TId">The type of the entity identifier</typeparam>
        /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam>
        /// <param name="data">Estimator data</param>
        /// <param name="otherEstimatorData">The other estimate</param>
        /// <param name="configuration">The Bloom filter configuration</param>
        /// <param name="maxStrata">The maximum strata</param>
        /// <param name="destructive">When <c>true</c> the <paramref name="data"/> will be altered and no longer usable, else <c>false</c></param>
        /// <returns></returns>
        internal static StrataEstimatorData <int, TCount> Intersect <TEntity, TId, TCount>(this IStrataEstimatorData <int, TCount> data,
                                                                                           IStrataEstimatorData <int, TCount> otherEstimatorData,
                                                                                           IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration)
            where TId : struct
            where TCount : struct
        {
            if (data == null && otherEstimatorData == null)
            {
                return(null);
            }
            if (data == null)
            {
                return(new StrataEstimatorData <int, TCount>
                {
                    BlockSize = otherEstimatorData.BlockSize,
                    DecodeCountFactor = otherEstimatorData.DecodeCountFactor,
                    HashFunctionCount = otherEstimatorData.HashFunctionCount,
                    StrataCount = otherEstimatorData.StrataCount
                });
            }
            var strataConfig = configuration.ConvertToEstimatorConfiguration();
            var fold         = configuration.FoldingStrategy?.GetFoldFactors(data.BlockSize, otherEstimatorData?.BlockSize ?? data.BlockSize);
            var res          = new StrataEstimatorData <int, TCount>
            {
                BlockSize = data.BlockSize / (fold?.Item1 ?? 1L),
                BloomFilterStrataIndexes = data.BloomFilterStrataIndexes,
                BloomFilters             = data.BloomFilters?.Select(b => b.ConvertToBloomFilterData(strataConfig)).ToArray(),
                DecodeCountFactor        = data.DecodeCountFactor,
                HashFunctionCount        = data.HashFunctionCount,
                StrataCount = data.StrataCount
            };
            var minStrata = Math.Min(data.StrataCount, otherEstimatorData.StrataCount);

            for (var i = minStrata - 1; i >= 0; i--)
            {
                var ibf          = data.GetFilterForStrata(i);
                var estimatorIbf = i >= otherEstimatorData.StrataCount
                    ? null
                    : otherEstimatorData.GetFilterForStrata(i);
                if (ibf == null &&
                    estimatorIbf == null)
                {
                    continue;
                }
                res.BloomFilters[i] = ibf.Intersect(strataConfig, estimatorIbf);
            }
            return(res);
        }
コード例 #4
0
        /// <summary>
        /// Initialize
        /// </summary>
        /// <param name="capacity">The capacity (number of items to be added)</param>
        /// <param name="bitSize">The bit size for the bit minwise estimator</param>
        /// <param name="minWiseHashCount">The minwise hash count</param>
        public void Initialize(
            long capacity,
            byte bitSize,
            int minWiseHashCount)
        {
            var max             = Math.Pow(2, _strataEstimator.StrataLimit);
            var minWiseCapacity = Math.Max(
                (uint)(capacity * (1 - (max - Math.Pow(2, _strataEstimator.StrataLimit - _strataEstimator.MaxStrata)) / max)), 1);

            if (_configuration.FoldingStrategy != null)
            {
                minWiseCapacity = (uint)_configuration.FoldingStrategy.ComputeFoldableSize(minWiseCapacity, 2);
            }
            _minwiseEstimator = new BitMinwiseHashEstimator <KeyValuePair <int, int>, int, TCount>(
                _configuration.ConvertToEstimatorConfiguration(),
                bitSize,
                minWiseHashCount,
                minWiseCapacity);
            _minwiseReplacementCount = 0L;
        }
コード例 #5
0
        /// <summary>
        /// Fold the strata estimator data.
        /// </summary>
        /// <typeparam name="TEntity"></typeparam>
        /// <typeparam name="TId"></typeparam>
        /// <typeparam name="TCount"></typeparam>
        /// <param name="estimatorData"></param>
        /// <param name="configuration"></param>
        /// <param name="factor"></param>
        /// <returns></returns>
        internal static StrataEstimatorData <int, TCount> Fold <TEntity, TId, TCount>(
            this IStrataEstimatorData <int, TCount> estimatorData,
            IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration,
            uint factor)
            where TCount : struct
            where TId : struct
        {
            if (estimatorData?.BloomFilters == null)
            {
                return(null);
            }
            var filterConfig = configuration.ConvertToEstimatorConfiguration();
            var res          = new StrataEstimatorData <int, TCount>
            {
                BloomFilters =
                    estimatorData.BloomFilters == null
                        ? null
                        : new InvertibleBloomFilterData <int, int, TCount> [estimatorData.BloomFilters.Length],
                BloomFilterStrataIndexes = estimatorData.BloomFilterStrataIndexes?.ToArray(),
                BlockSize         = estimatorData.BlockSize / factor,
                DecodeCountFactor = estimatorData.DecodeCountFactor,
                StrataCount       = estimatorData.StrataCount
            };

            for (var j = 0L; j < res.BloomFilters.Length; j++)
            {
                estimatorData
                .BloomFilters[j]
                .SyncCompressionProviders(filterConfig);
                res.BloomFilters[j] = estimatorData
                                      .BloomFilters[j]
                                      .Fold(filterConfig, factor)
                                      .ConvertToBloomFilterData(filterConfig);
            }
            return(res);
        }