/// <summary>
        /// Get the Bloom filter for the given strata
        /// </summary>
        /// <typeparam name="TCount">The type for the occurence count</typeparam>
        /// <typeparam name="TId">Type of the identifier</typeparam>
        /// <param name="estimatorData"></param>
        /// <param name="strata"></param>
        /// <returns></returns>
        /// <remarks>Some serializers (*cough* protobuf) simply drop null values from the array. This is mostly harmless work-around.</remarks>
        internal static IInvertibleBloomFilterData <TId, int, TCount> GetFilterForStrata <TId, TCount>(
            this IStrataEstimatorData <TId, TCount> estimatorData, int strata)
            where TCount : struct
            where TId : struct
        {
            if (estimatorData?.BloomFilters == null)
            {
                return(null);
            }
            var indexes = estimatorData.BloomFilterStrataIndexes;

            if (indexes != null && indexes.Length > 0)
            {
                for (var j = indexes.Length - 1; j >= 0; j--)
                {
                    if (indexes[j] == strata)
                    {
                        return(estimatorData.BloomFilters[j]);
                    }
                }
            }
            else if (strata < estimatorData.BloomFilters.Length)
            {
                return(estimatorData.BloomFilters[strata]);
            }
            return(null);
        }
        /// <summary>
        /// Decode the given strata estimators.
        /// </summary>
        /// <typeparam name="TEntity">The entity type</typeparam>
        /// <typeparam name="TId">The type of the entity identifier</typeparam>
        /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam>
        /// <param name="data">Estimator data</param>
        /// <param name="otherEstimatorData">The other estimate</param>
        /// <param name="configuration">The Bloom filter configuration</param>
        /// <param name="maxStrata">The maximum strata</param>
        /// <param name="destructive">When <c>true</c> the <paramref name="data"/> will be altered and no longer usable, else <c>false</c></param>
        /// <returns></returns>
        internal static long?Decode <TEntity, TId, TCount>(this IStrataEstimatorData <int, TCount> data,
                                                           IStrataEstimatorData <int, TCount> otherEstimatorData,
                                                           IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration,
                                                           byte maxStrata,
                                                           bool destructive = false)
            where TId : struct
            where TCount : struct
        {
            if (data == null || otherEstimatorData == null)
            {
                return(null);
            }
            var strataConfig = configuration.ConvertToEstimatorConfiguration();
            var decodeFactor = Math.Max(data.DecodeCountFactor, otherEstimatorData.DecodeCountFactor);
            var hasDecoded   = false;
            var setA         = new HashSet <int>();
            var minStrata    = Math.Min(data.StrataCount, otherEstimatorData.StrataCount);

            for (var i = minStrata - 1; i >= 0; i--)
            {
                var ibf          = data.GetFilterForStrata(i);
                var estimatorIbf = i >= otherEstimatorData.StrataCount
                    ? null
                    : otherEstimatorData.GetFilterForStrata(i);
                if (ibf == null &&
                    estimatorIbf == null)
                {
                    if (i < maxStrata)
                    {
                        hasDecoded = true;
                    }
                    continue;
                }
                var decodeResult = ibf.SubtractAndDecode(estimatorIbf, strataConfig, setA, setA, setA, destructive);
                if (decodeResult != true)
                {
                    if (!hasDecoded)
                    {
                        return(null);
                    }
                    //compensate for the fact that a failed decode can still contribute counts by lowering the i+1 as more decodes succeeded
                    var addedFactor = decodeResult.HasValue ? 1 / Math.Pow(2, data.StrataCount - (i + 1)) : 1;
                    return((long)(Math.Pow(2, i + addedFactor) * decodeFactor * setA.Count));
                }
                hasDecoded = true;
            }
            if (!hasDecoded)
            {
                return(null);
            }
            return((long)(decodeFactor * setA.Count));
        }
Ejemplo n.º 3
0
 /// <summary>
 /// Restore the strata estimator from the given data
 /// </summary>
 /// <param name="data"></param>
 public void Rehydrate(IStrataEstimatorData <int, TCount> data)
 {
     if (data == null)
     {
         return;
     }
     BlockSize         = data.BlockSize;
     MaxStrata         = data.StrataCount;
     ErrorRate         = data.ErrorRate;
     HashFunctionCount = data.HashFunctionCount;
     DecodeCountFactor = data.DecodeCountFactor;
     CreateFilters(data);
 }
        /// <summary>
        /// Compress the strata estimator data.
        /// </summary>
        /// <typeparam name="TEntity"></typeparam>
        /// <typeparam name="TId"></typeparam>
        /// <typeparam name="TCount"></typeparam>
        /// <param name="estimatorData"></param>
        /// <param name="configuration"></param>
        /// <returns></returns>
        internal static StrataEstimatorData <int, TCount> Compress <TEntity, TId, TCount>(
            this IStrataEstimatorData <int, TCount> estimatorData,
            IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration)
            where TCount : struct
            where TId : struct
        {
            if (configuration?.FoldingStrategy == null || estimatorData == null)
            {
                return(null);
            }
            var fold = configuration.FoldingStrategy?.FindCompressionFactor(configuration, estimatorData.BlockSize, estimatorData.BlockSize,
                                                                            estimatorData.ItemCount);
            var res = fold.HasValue ? estimatorData.Fold(configuration, fold.Value) : null;

            return(res);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Create filters
        /// </summary>
        /// <param name="estimatorData">Filter data to rehydrate.</param>
        private void CreateFilters(IStrataEstimatorData <int, TCount> estimatorData = null)
        {
            var configuration = Configuration.ConvertToEstimatorConfiguration();

            HashFunctionCount = configuration.BestHashFunctionCount(BlockSize, ErrorRate);
            for (var idx = 0; idx < StrataFilters.Length; idx++)
            {
                if (idx >= MaxStrata)
                {
                    StrataFilters[idx] = null;
                    continue;
                }
                var filterData = estimatorData.GetFilterForStrata(idx);
                //lazily create Strata filters.
                StrataFilters[idx] = new Lazy <InvertibleBloomFilter <KeyValuePair <int, int>, int, TCount> >(() =>
                {
                    var res = new InvertibleBloomFilter <KeyValuePair <int, int>, int, TCount>(configuration);
                    //capacity doesn't really matter, the capacity is basically the block size.
                    res.Initialize(BlockSize, BlockSize, HashFunctionCount);
                    res.Rehydrate(filterData);
                    return(res);
                });
            }
        }
        /// <summary>
        /// Fold the strata estimator data.
        /// </summary>
        /// <typeparam name="TEntity"></typeparam>
        /// <typeparam name="TId"></typeparam>
        /// <typeparam name="TCount"></typeparam>
        /// <param name="estimatorData"></param>
        /// <param name="configuration"></param>
        /// <param name="factor"></param>
        /// <returns></returns>
        internal static StrataEstimatorData <int, TCount> Fold <TEntity, TId, TCount>(
            this IStrataEstimatorData <int, TCount> estimatorData,
            IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration,
            uint factor)
            where TCount : struct
            where TId : struct
        {
            if (estimatorData?.BloomFilters == null)
            {
                return(null);
            }
            var filterConfig = configuration.ConvertToEstimatorConfiguration();
            var res          = new StrataEstimatorData <int, TCount>
            {
                BloomFilters =
                    estimatorData.BloomFilters == null
                        ? null
                        : new InvertibleBloomFilterData <int, int, TCount> [estimatorData.BloomFilters.Length],
                BloomFilterStrataIndexes = estimatorData.BloomFilterStrataIndexes?.ToArray(),
                BlockSize         = estimatorData.BlockSize / factor,
                DecodeCountFactor = estimatorData.DecodeCountFactor,
                StrataCount       = estimatorData.StrataCount
            };

            for (var j = 0L; j < res.BloomFilters.Length; j++)
            {
                estimatorData
                .BloomFilters[j]
                .SyncCompressionProviders(filterConfig);
                res.BloomFilters[j] = estimatorData
                                      .BloomFilters[j]
                                      .Fold(filterConfig, factor)
                                      .ConvertToBloomFilterData(filterConfig);
            }
            return(res);
        }
Ejemplo n.º 7
0
 /// <summary>
 /// Intersect the given estimator data.
 /// </summary>
 /// <param name="estimator">Estimator data to intersect with.</param>
 /// <returns></returns>
 private void Intersect(IStrataEstimatorData <int, TCount> estimator,
                        bool destructive = false)
 {
     Rehydrate(Extract()
               .Intersect(estimator, Configuration));
 }
Ejemplo n.º 8
0
 /// <summary>
 /// Decode the given estimator data.
 /// </summary>
 /// <param name="estimator">Estimator data to subtract.</param>
 /// <param name="destructive">When <c>true</c> the values in this estimator will be altered and rendered useless, else <c>false</c>.</param>
 /// <returns></returns>
 private long?Decode(IStrataEstimatorData <int, TCount> estimator,
                     bool destructive = false)
 {
     return(Extract()
            .Decode(estimator, Configuration, MaxStrata, destructive));
 }
        /// <summary>
        /// Intersect the given strata estimators.
        /// </summary>
        /// <typeparam name="TEntity">The entity type</typeparam>
        /// <typeparam name="TId">The type of the entity identifier</typeparam>
        /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam>
        /// <param name="data">Estimator data</param>
        /// <param name="otherEstimatorData">The other estimate</param>
        /// <param name="configuration">The Bloom filter configuration</param>
        /// <param name="maxStrata">The maximum strata</param>
        /// <param name="destructive">When <c>true</c> the <paramref name="data"/> will be altered and no longer usable, else <c>false</c></param>
        /// <returns></returns>
        internal static StrataEstimatorData <int, TCount> Intersect <TEntity, TId, TCount>(this IStrataEstimatorData <int, TCount> data,
                                                                                           IStrataEstimatorData <int, TCount> otherEstimatorData,
                                                                                           IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration)
            where TId : struct
            where TCount : struct
        {
            if (data == null && otherEstimatorData == null)
            {
                return(null);
            }
            if (data == null)
            {
                return(new StrataEstimatorData <int, TCount>
                {
                    BlockSize = otherEstimatorData.BlockSize,
                    DecodeCountFactor = otherEstimatorData.DecodeCountFactor,
                    HashFunctionCount = otherEstimatorData.HashFunctionCount,
                    StrataCount = otherEstimatorData.StrataCount
                });
            }
            var strataConfig = configuration.ConvertToEstimatorConfiguration();
            var fold         = configuration.FoldingStrategy?.GetFoldFactors(data.BlockSize, otherEstimatorData?.BlockSize ?? data.BlockSize);
            var res          = new StrataEstimatorData <int, TCount>
            {
                BlockSize = data.BlockSize / (fold?.Item1 ?? 1L),
                BloomFilterStrataIndexes = data.BloomFilterStrataIndexes,
                BloomFilters             = data.BloomFilters?.Select(b => b.ConvertToBloomFilterData(strataConfig)).ToArray(),
                DecodeCountFactor        = data.DecodeCountFactor,
                HashFunctionCount        = data.HashFunctionCount,
                StrataCount = data.StrataCount
            };
            var minStrata = Math.Min(data.StrataCount, otherEstimatorData.StrataCount);

            for (var i = minStrata - 1; i >= 0; i--)
            {
                var ibf          = data.GetFilterForStrata(i);
                var estimatorIbf = i >= otherEstimatorData.StrataCount
                    ? null
                    : otherEstimatorData.GetFilterForStrata(i);
                if (ibf == null &&
                    estimatorIbf == null)
                {
                    continue;
                }
                res.BloomFilters[i] = ibf.Intersect(strataConfig, estimatorIbf);
            }
            return(res);
        }