Beispiel #1
0
        /// <summary>
        /// Remove an item from the given position.
        /// </summary>
        /// <typeparam name="TEntity">The entity type</typeparam>
        /// <typeparam name="TId">The type of the entity identifier</typeparam>
        /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam>
        /// <param name="filter">The filter</param>
        /// <param name="configuration">The configuration</param>
        /// <param name="idValue">The identifier to remove</param>
        /// <param name="hashValue">The hash value to remove</param>
        /// <param name="position">The position of the cell to remove the identifier and hash from.</param>
        internal static bool Remove <TEntity, TId, TCount>(
            this IInvertibleBloomFilterData <TId, int, TCount> filter,
            IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration,
            TId idValue,
            int hashValue,
            long position)
            where TCount : struct
            where TId : struct
        {
            if (filter == null)
            {
                return(false);
            }
            var retVal = false;

            filter.ExecuteExclusively(position, () =>
            {
                filter.Counts[position]          = configuration.CountConfiguration.Decrease(filter.Counts[position]);
                filter.HashSumProvider[position] = configuration.HashRemove(filter.HashSumProvider[position], hashValue);
                filter.IdSumProvider[position]   = configuration.IdRemove(filter.IdSumProvider[position], idValue);
                retVal = configuration.IsPure(filter, position);
            });
            return(retVal);
        }
Beispiel #2
0
        /// <summary>
        /// Subtract the Bloom filter data.
        /// </summary>
        /// <typeparam name="TEntity">The entity type</typeparam>
        /// <typeparam name="TId">The entity identifier type</typeparam>
        /// <typeparam name="TCount">The occurence count type</typeparam>
        /// <typeparam name="THash">The hash type.</typeparam>
        /// <param name="filterData">The filter data</param>
        /// <param name="subtractedFilterData">The Bloom filter data to subtract</param>
        /// <param name="configuration">The Bloom filter configuration</param>
        /// <param name="listA">Items in <paramref name="filterData"/>, but not in <paramref name="subtractedFilterData"/></param>
        /// <param name="listB">Items in <paramref name="subtractedFilterData"/>, but not in <paramref name="filterData"/></param>
        /// <param name="pureList">Optional list of pure items.</param>
        /// <param name="destructive">When <c>true</c> the <paramref name="filterData"/> will no longer be valid after the subtract operation, otherwise <c>false</c></param>
        /// <returns>The resulting Bloom filter data</returns>
        private static IInvertibleBloomFilterData <TId, THash, TCount> Subtract <TEntity, TId, THash, TCount>(
            this IInvertibleBloomFilterData <TId, THash, TCount> filterData,
            IInvertibleBloomFilterData <TId, THash, TCount> subtractedFilterData,
            IInvertibleBloomFilterConfiguration <TEntity, TId, THash, TCount> configuration,
            HashSet <TId> listA,
            HashSet <TId> listB,
            Stack <long> pureList = null,
            bool destructive      = false
            )
            where TCount : struct
            where TId : struct
            where THash : struct
        {
            if (!filterData.IsCompatibleWith(subtractedFilterData, configuration))
            {
                throw new ArgumentException("Subtracted invertible Bloom filters are not compatible.", nameof(subtractedFilterData));
            }
            var foldFactors = configuration.FoldingStrategy?.GetFoldFactors(filterData.BlockSize, subtractedFilterData.BlockSize);

            if (filterData.BlockSize / (foldFactors?.Item1 ?? 1L) !=
                subtractedFilterData.BlockSize / (foldFactors?.Item2 ?? 1L))
            {
                //failed to find folding factors that will make the size of the filters match.
                return(null);
            }
            var result = destructive && foldFactors?.Item1 <= 1 ?
                         filterData :
                         (foldFactors == null || foldFactors.Item1 <= 1 ?
                          filterData.CreateDummy(configuration) :
                          configuration.DataFactory.Create(
                              configuration,
                              filterData.Capacity / foldFactors.Item1,
                              filterData.BlockSize / foldFactors.Item1,
                              filterData.HashFunctionCount));

            foldFactors = foldFactors ?? new Tuple <long, long>(1, 1);
            var idIdentity   = configuration.IdIdentity;
            var hashIdentity = configuration.HashIdentity;
            //conccurent place holders
            var listABag    = new ConcurrentBag <TId>();
            var listBBag    = new ConcurrentBag <TId>();
            var pureListBag = pureList == null ? default(ConcurrentBag <long>) : new ConcurrentBag <long>();

            Parallel.ForEach(
                Partitioner.Create(0L, result.BlockSize),
                (range, state) =>
            {
                for (var i = range.Item1; i < range.Item2; i++)
                {
                    var filterCount     = filterData.Counts.GetFolded(i, foldFactors.Item1, configuration.CountConfiguration.Add);
                    var subtractedCount = subtractedFilterData.Counts.GetFolded(i, foldFactors.Item2, configuration.CountConfiguration.Add);
                    var hashSum         = configuration.HashRemove(
                        filterData.HashSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.HashAdd),
                        subtractedFilterData.HashSumProvider.GetFolded(i, subtractedFilterData.BlockSize, foldFactors.Item2, configuration.HashAdd));
                    var filterIdSum     = filterData.IdSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.IdAdd);
                    var subtractedIdSum = subtractedFilterData.IdSumProvider.GetFolded(i, subtractedFilterData.BlockSize, foldFactors.Item2, configuration.IdAdd);
                    var idXorResult     = configuration.IdRemove(filterIdSum, subtractedIdSum);
                    if ((!configuration.IdEqualityComparer.Equals(idIdentity, idXorResult) ||
                         !configuration.HashEqualityComparer.Equals(hashIdentity, hashSum)) &&
                        configuration.CountConfiguration.IsPure(filterCount) &&
                        configuration.CountConfiguration.IsPure(subtractedCount))
                    {
                        //pure count went to zero: both filters were pure at the given position.
                        listABag.Add(filterIdSum);
                        listBBag.Add(subtractedIdSum);
                        idXorResult = idIdentity;
                        hashSum     = hashIdentity;
                    }
                    result.Counts[i]          = configuration.CountConfiguration.Subtract(filterCount, subtractedCount);
                    result.HashSumProvider[i] = hashSum;
                    result.IdSumProvider[i]   = idXorResult;
                    if (configuration.IsPure(result, i))
                    {
                        pureListBag?.Add(i);
                    }
                }
            });
            //move back to non concurrent data types.
            foreach (var itm in listABag)
            {
                listA.Add(itm);
            }
            foreach (var itm in listBBag)
            {
                listB.Add(itm);
            }
            if (pureList != null)
            {
                foreach (var item in pureListBag)
                {
                    pureList.Push(item);
                }
            }
            result.ItemCount = configuration.CountConfiguration.GetEstimatedCount(result.Counts, result.HashFunctionCount);
            return(result);
        }