Ejemplo n.º 1
0
        /// <summary>
        /// Subtract the given filter and decode for any changes
        /// </summary>
        /// <typeparam name="TEntity">The entity type</typeparam>
        /// <typeparam name="TId">The type of the entity identifier</typeparam>
        /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam>
        /// <param name="filter">Filter</param>
        /// <param name="subtractedFilter">The Bloom filter to subtract</param>
        /// <param name="configuration">The Bloom filter configuration</param>
        /// <param name="listA">Items in <paramref name="filter"/>, but not in <paramref name="subtractedFilter"/></param>
        /// <param name="listB">Items in <paramref name="subtractedFilter"/>, but not in <paramref name="filter"/></param>
        /// <param name="modifiedEntities">items in both filters, but with a different value.</param>
        /// <param name="destructive">Optional parameter, when <c>true</c> the filter <paramref name="filter"/> will be modified, and thus rendered useless, by the decoding.</param>
        /// <returns><c>true</c> when the decode was successful, else <c>false</c>.</returns>
        public static bool?SubtractAndDecode <TEntity, TId, TCount>(
            this IInvertibleBloomFilterData <TId, int, TCount> filter,
            IInvertibleBloomFilterData <TId, int, TCount> subtractedFilter,
            IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration,
            HashSet <TId> listA,
            HashSet <TId> listB,
            HashSet <TId> modifiedEntities,
            bool destructive = false)
            where TId : struct
            where TCount : struct
        {
            if (filter == null && subtractedFilter == null)
            {
                return(true);
            }
            if (filter == null)
            {
                //handle null filters as elegant as possible at this point.
                filter      = subtractedFilter.CreateDummy(configuration);
                destructive = true;
            }
            else
            {
                filter.SyncCompressionProviders(configuration);
            }
            if (subtractedFilter == null)
            {
                //swap the filters and the sets so we can still apply the destructive setting to temporarily created filter data
                subtractedFilter = filter;
                filter           = subtractedFilter.CreateDummy(configuration);
                var swap = listA;
                listA       = listB;
                listB       = swap;
                destructive = true;
            }
            else
            {
                subtractedFilter.SyncCompressionProviders(configuration);
            }
            if (!filter.IsCompatibleWith(subtractedFilter, configuration))
            {
                return(null);
            }
            bool?valueRes     = true;
            var  pureList     = new Stack <long>();
            var  hasSubFilter = filter.SubFilter != null || subtractedFilter.SubFilter != null;
            //add a dummy mod set when there is a reverse filter, because a regular filter is pretty bad at recognizing modified entites.
            var idRes = filter
                        .Subtract(subtractedFilter, configuration, listA, listB, pureList, destructive)
                        .Decode(configuration, listA, listB, hasSubFilter ? null : modifiedEntities, pureList);

            if (hasSubFilter)
            {
                valueRes = filter
                           .SubFilter
                           .SubtractAndDecode(
                    subtractedFilter.SubFilter,
                    configuration.SubFilterConfiguration,
                    listA,
                    listB,
                    modifiedEntities,
                    destructive);
            }
            if (!valueRes.HasValue || !idRes.HasValue)
            {
                return(null);
            }
            return(idRes.Value && valueRes.Value);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Subtract the Bloom filter data.
        /// </summary>
        /// <typeparam name="TEntity">The entity type</typeparam>
        /// <typeparam name="TId">The entity identifier type</typeparam>
        /// <typeparam name="TCount">The occurence count type</typeparam>
        /// <typeparam name="THash">The hash type.</typeparam>
        /// <param name="filterData">The filter data</param>
        /// <param name="subtractedFilterData">The Bloom filter data to subtract</param>
        /// <param name="configuration">The Bloom filter configuration</param>
        /// <param name="listA">Items in <paramref name="filterData"/>, but not in <paramref name="subtractedFilterData"/></param>
        /// <param name="listB">Items in <paramref name="subtractedFilterData"/>, but not in <paramref name="filterData"/></param>
        /// <param name="pureList">Optional list of pure items.</param>
        /// <param name="destructive">When <c>true</c> the <paramref name="filterData"/> will no longer be valid after the subtract operation, otherwise <c>false</c></param>
        /// <returns>The resulting Bloom filter data</returns>
        private static IInvertibleBloomFilterData <TId, THash, TCount> Subtract <TEntity, TId, THash, TCount>(
            this IInvertibleBloomFilterData <TId, THash, TCount> filterData,
            IInvertibleBloomFilterData <TId, THash, TCount> subtractedFilterData,
            IInvertibleBloomFilterConfiguration <TEntity, TId, THash, TCount> configuration,
            HashSet <TId> listA,
            HashSet <TId> listB,
            Stack <long> pureList = null,
            bool destructive      = false
            )
            where TCount : struct
            where TId : struct
            where THash : struct
        {
            if (!filterData.IsCompatibleWith(subtractedFilterData, configuration))
            {
                throw new ArgumentException("Subtracted invertible Bloom filters are not compatible.", nameof(subtractedFilterData));
            }
            var foldFactors = configuration.FoldingStrategy?.GetFoldFactors(filterData.BlockSize, subtractedFilterData.BlockSize);

            if (filterData.BlockSize / (foldFactors?.Item1 ?? 1L) !=
                subtractedFilterData.BlockSize / (foldFactors?.Item2 ?? 1L))
            {
                //failed to find folding factors that will make the size of the filters match.
                return(null);
            }
            var result = destructive && foldFactors?.Item1 <= 1 ?
                         filterData :
                         (foldFactors == null || foldFactors.Item1 <= 1 ?
                          filterData.CreateDummy(configuration) :
                          configuration.DataFactory.Create(
                              configuration,
                              filterData.Capacity / foldFactors.Item1,
                              filterData.BlockSize / foldFactors.Item1,
                              filterData.HashFunctionCount));

            foldFactors = foldFactors ?? new Tuple <long, long>(1, 1);
            var idIdentity   = configuration.IdIdentity;
            var hashIdentity = configuration.HashIdentity;
            //conccurent place holders
            var listABag    = new ConcurrentBag <TId>();
            var listBBag    = new ConcurrentBag <TId>();
            var pureListBag = pureList == null ? default(ConcurrentBag <long>) : new ConcurrentBag <long>();

            Parallel.ForEach(
                Partitioner.Create(0L, result.BlockSize),
                (range, state) =>
            {
                for (var i = range.Item1; i < range.Item2; i++)
                {
                    var filterCount     = filterData.Counts.GetFolded(i, foldFactors.Item1, configuration.CountConfiguration.Add);
                    var subtractedCount = subtractedFilterData.Counts.GetFolded(i, foldFactors.Item2, configuration.CountConfiguration.Add);
                    var hashSum         = configuration.HashRemove(
                        filterData.HashSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.HashAdd),
                        subtractedFilterData.HashSumProvider.GetFolded(i, subtractedFilterData.BlockSize, foldFactors.Item2, configuration.HashAdd));
                    var filterIdSum     = filterData.IdSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.IdAdd);
                    var subtractedIdSum = subtractedFilterData.IdSumProvider.GetFolded(i, subtractedFilterData.BlockSize, foldFactors.Item2, configuration.IdAdd);
                    var idXorResult     = configuration.IdRemove(filterIdSum, subtractedIdSum);
                    if ((!configuration.IdEqualityComparer.Equals(idIdentity, idXorResult) ||
                         !configuration.HashEqualityComparer.Equals(hashIdentity, hashSum)) &&
                        configuration.CountConfiguration.IsPure(filterCount) &&
                        configuration.CountConfiguration.IsPure(subtractedCount))
                    {
                        //pure count went to zero: both filters were pure at the given position.
                        listABag.Add(filterIdSum);
                        listBBag.Add(subtractedIdSum);
                        idXorResult = idIdentity;
                        hashSum     = hashIdentity;
                    }
                    result.Counts[i]          = configuration.CountConfiguration.Subtract(filterCount, subtractedCount);
                    result.HashSumProvider[i] = hashSum;
                    result.IdSumProvider[i]   = idXorResult;
                    if (configuration.IsPure(result, i))
                    {
                        pureListBag?.Add(i);
                    }
                }
            });
            //move back to non concurrent data types.
            foreach (var itm in listABag)
            {
                listA.Add(itm);
            }
            foreach (var itm in listBBag)
            {
                listB.Add(itm);
            }
            if (pureList != null)
            {
                foreach (var item in pureListBag)
                {
                    pureList.Push(item);
                }
            }
            result.ItemCount = configuration.CountConfiguration.GetEstimatedCount(result.Counts, result.HashFunctionCount);
            return(result);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Intersect Bloom filter data.
        /// </summary>
        /// <typeparam name="TEntity"></typeparam>
        /// <typeparam name="TId"></typeparam>
        /// <typeparam name="THash"></typeparam>
        /// <typeparam name="TCount"></typeparam>
        /// <param name="filterData"></param>
        /// <param name="configuration"></param>
        /// <param name="otherFilterData"></param>
        /// <param name="inPlace"></param>
        /// <returns></returns>
        internal static InvertibleBloomFilterData <TId, THash, TCount> Intersect <TEntity, TId, THash, TCount>(
            this IInvertibleBloomFilterData <TId, THash, TCount> filterData,
            IInvertibleBloomFilterConfiguration <TEntity, TId, THash, TCount> configuration,
            IInvertibleBloomFilterData <TId, THash, TCount> otherFilterData,
            bool inPlace = false
            )
            where TId : struct
            where THash : struct
            where TCount : struct
        {
            if (filterData == null && otherFilterData == null)
            {
                return(null);
            }
            if (filterData == null)
            {
                return(configuration
                       .DataFactory
                       .Create(configuration, otherFilterData.Capacity, otherFilterData.BlockSize, otherFilterData.HashFunctionCount));
            }
            else
            {
                filterData.SyncCompressionProviders(configuration);
            }
            if (otherFilterData == null)
            {
                if (inPlace)
                {
                    filterData.Clear(configuration);
                    return(filterData.ConvertToBloomFilterData(configuration));
                }
                return(configuration
                       .DataFactory
                       .Create(configuration, filterData.Capacity, filterData.BlockSize, filterData.HashFunctionCount));
            }
            else
            {
                otherFilterData.SyncCompressionProviders(configuration);
            }
            if (!filterData.IsCompatibleWith(otherFilterData, configuration))
            {
                return(null);
            }
            var foldFactors = configuration.FoldingStrategy?.GetFoldFactors(filterData.BlockSize, otherFilterData.BlockSize);
            var res         = inPlace && foldFactors?.Item1 <= 1 ?
                              filterData.ConvertToBloomFilterData(configuration) :
                              (foldFactors == null || foldFactors.Item1 <= 1 ?
                               filterData.CreateDummy(configuration) :
                               configuration.DataFactory.Create(
                                   configuration,
                                   filterData.Capacity / foldFactors.Item1,
                                   filterData.BlockSize / foldFactors.Item1,
                                   filterData.HashFunctionCount));

            foldFactors   = foldFactors ?? new Tuple <long, long>(1, 1);
            res.IsReverse = filterData.IsReverse;
            Parallel.ForEach(
                Partitioner.Create(0L, res.BlockSize),
                (range, state) =>
            {
                for (var i = range.Item1; i < range.Item2; i++)
                {
                    var filterDataCount      = filterData.Counts.GetFolded(i, foldFactors.Item1, configuration.CountConfiguration.Add);
                    var otherFilterDataCount = otherFilterData.Counts.GetFolded(i, foldFactors.Item2, configuration.CountConfiguration.Add);
                    res.Counts[i]            = configuration.CountConfiguration.Comparer.Compare(filterDataCount, otherFilterDataCount) < 0 ? filterDataCount : otherFilterDataCount;
                    res.IdSumProvider[i]     = configuration.IdIntersect(
                        filterData.IdSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.IdAdd),
                        otherFilterData.IdSumProvider.GetFolded(i, otherFilterData.BlockSize, foldFactors.Item2, configuration.IdAdd));
                    res.HashSumProvider[i] = configuration.HashIntersect(
                        filterData.HashSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.HashAdd),
                        otherFilterData.HashSumProvider.GetFolded(i, otherFilterData.BlockSize, foldFactors.Item2, configuration.HashAdd));
                }
            });
            res.SubFilter = filterData
                            .SubFilter
                            .Intersect(configuration.SubFilterConfiguration, otherFilterData.SubFilter, inPlace)
                            .ConvertToBloomFilterData(configuration);
            res.ItemCount = configuration.CountConfiguration.GetEstimatedCount(res.Counts, res.HashFunctionCount);
            return(res);
        }