Esempio n. 1
0
 /// <summary>
 /// Creates a new Bloom filter using the optimal size for the underlying data structure based on the desired capacity and error rate, as well as the optimal number of hash functions.
 /// </summary>
 /// <param name="bloomFilterConfiguration">The Bloom filter configuration</param>
 public InvertibleHybridBloomFilter(
     IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> bloomFilterConfiguration) : base(bloomFilterConfiguration)
 {
     _reverseBloomFilter = new InvertibleReverseBloomFilter <KeyValuePair <TId, int>, TId, TCount>(
         bloomFilterConfiguration.ConvertToKeyValueHash());
     ValidateConfiguration = false;
 }
Esempio n. 2
0
 /// <summary>
 /// Subtract and then decode.
 /// </summary>
 /// <param name="filter">Bloom filter to subtract</param>
 /// <param name="listA">Items in this filter, but not in <paramref name="filter"/></param>
 /// <param name="listB">Items not in this filter, but in <paramref name="filter"/></param>
 /// <param name="modifiedEntities">Entities in both filters, but with a different value</param>
 /// <returns><c>true</c> when the decode was successful, otherwise <c>false</c></returns>
 public bool?SubtractAndDecode(IInvertibleBloomFilter <TEntity, TId, TCount> filter,
                               HashSet <TId> listA,
                               HashSet <TId> listB,
                               HashSet <TId> modifiedEntities)
 {
     return(SubtractAndDecode(listA, listB, modifiedEntities, filter.Extract()));
 }
        /// <summary>
        /// Extract filter data from the given <paramref name="precalculatedFilter"/> for capacity <paramref name="capacity"/>.
        /// </summary>
        /// <typeparam name="TEntity">The entity type</typeparam>
        /// <typeparam name="TId">The identifier type</typeparam>
        /// <typeparam name="TCount">The occurence count type</typeparam>
        /// <param name="configuration">Configuration</param>
        /// <param name="precalculatedFilter">The pre-calculated filter</param>
        /// <param name="capacity">The targeted capacity.</param>
        /// <returns>The IBF data sized for <paramref name="precalculatedFilter"/> for target capacity <paramref name="capacity"/>.</returns>
        public IInvertibleBloomFilterData <TId, int, TCount> Extract <TEntity, TId, TCount>(
            IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration,
            IInvertibleBloomFilter <TEntity, TId, TCount> precalculatedFilter,
            long?capacity)
            where TCount : struct
            where TId : struct
        {
            if (precalculatedFilter == null)
            {
                return(null);
            }
            if (!capacity.HasValue || capacity < 10)
            {
                //set capacity to arbitrary low capacity.
                capacity = 10;
            }
            var data       = precalculatedFilter.Extract();
            var foldFactor = configuration.FoldingStrategy?.FindCompressionFactor(configuration, data.BlockSize, data.Capacity, capacity);

            if (foldFactor > 1)
            {
                return(data.Fold(configuration, (uint)foldFactor));
            }
            return(data);
        }
Esempio n. 4
0
        /// <summary>
        /// Intersect a Bloom filter with the current Bloom filter.
        /// </summary>
        /// <param name="bloomFilter"></param>
        public void Intersect(IInvertibleBloomFilter <TEntity, TId, TCount> bloomFilter)
        {
            var result = Extract().Intersect(Configuration, bloomFilter.Extract());

            if (result == null)
            {
                throw new ArgumentException("An incompatible Bloom filter cannot be intersected.", nameof(bloomFilter));
            }
            Rehydrate(result);
        }
Esempio n. 5
0
        /// <summary>
        /// Add the Bloom filter
        /// </summary>
        /// <param name="bloomFilter">Bloom filter to add</param>
        /// <exception cref="ArgumentException">Bloom filter is not compatible</exception>
        public void Add(IInvertibleBloomFilter <TEntity, TId, TCount> bloomFilter)
        {
            if (bloomFilter == null)
            {
                return;
            }
            var result = Extract().Add(Configuration, bloomFilter.Extract());

            if (result == null)
            {
                throw new ArgumentException("An incompatible Bloom filter cannot be added.", nameof(bloomFilter));
            }
            Rehydrate(result);
        }
Esempio n. 6
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="dataSet">The data set for this actor</param>
 /// <param name="hybridEstimatorFactory">Factory for creating estimators</param>
 /// <param name="bloomFilterFactory">Factory for creating Bloom filters</param>
 /// <param name="configuration">Bloom filter configuration to use</param>
 public PrecalculatedActor(IList <TestEntity> dataSet,
                           IHybridEstimatorFactory hybridEstimatorFactory,
                           IInvertibleBloomFilterFactory bloomFilterFactory,
                           IInvertibleBloomFilterConfiguration <TestEntity, long, int, TCount> configuration)
 {
     _protobufModel = TypeModel.Create();
     _protobufModel.UseImplicitZeroDefaults = true;
     _hybridEstimatorFactory = hybridEstimatorFactory;
     _configuration          = configuration;
     //terribly over size the estimator.
     _estimator = _hybridEstimatorFactory.Create(_configuration, 100000);
     foreach (var itm in dataSet)
     {
         _estimator.Add(itm);
     }
     //sized to number of differences it can handle, not to the size of the data.
     _filter = bloomFilterFactory.Create(_configuration, 5000, 0.001F, true);
     foreach (var item in dataSet)
     {
         _filter.Add(item);
     }
 }
Esempio n. 7
0
        /// <summary>
        /// Quasi decode a given <paramref name="filter">filter</paramref>.
        /// </summary>
        /// <typeparam name="TEntity">The entity type</typeparam>
        /// <typeparam name="TId">The identifier type</typeparam>
        /// <typeparam name="TCount">The count type</typeparam>
        /// <param name="filter">The Bloom filter</param>
        /// <param name="otherSetSample"></param>
        /// <param name="otherSetSize"></param>
        /// <returns></returns>
        public static long?QuasiDecode <TEntity, TId, TCount>(
            this IInvertibleBloomFilter <TEntity, TId, TCount> filter,
            IEnumerable <TEntity> otherSetSample,
            long?otherSetSize = null)
            where TId : struct
            where TCount : struct
        {
            if (filter == null)
            {
                return(otherSetSize ?? otherSetSample?.LongCount() ?? 0L);
            }
            //compensate for extremely high error rates that can occur with estimators. Without this, the difference goes to infinity.
            var factor = QuasiEstimator.GetAdjustmentFactor(filter.Configuration, filter.BlockSize, filter.ItemCount, filter.HashFunctionCount, filter.ErrorRate);

            return(QuasiEstimator.Decode(
                       filter.ItemCount,
                       factor.Item1,
                       filter.Contains,
                       otherSetSample,
                       otherSetSize,
                       factor.Item2));
        }