/// <summary> /// Subtract the given filter and decode for any changes /// </summary> /// <typeparam name="TEntity">The entity type</typeparam> /// <typeparam name="TId">The type of the entity identifier</typeparam> /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam> /// <param name="filter">Filter</param> /// <param name="subtractedFilter">The Bloom filter to subtract</param> /// <param name="configuration">The Bloom filter configuration</param> /// <param name="listA">Items in <paramref name="filter"/>, but not in <paramref name="subtractedFilter"/></param> /// <param name="listB">Items in <paramref name="subtractedFilter"/>, but not in <paramref name="filter"/></param> /// <param name="modifiedEntities">items in both filters, but with a different value.</param> /// <param name="destructive">Optional parameter, when <c>true</c> the filter <paramref name="filter"/> will be modified, and thus rendered useless, by the decoding.</param> /// <returns><c>true</c> when the decode was successful, else <c>false</c>.</returns> public static bool?SubtractAndDecode <TEntity, TId, TCount>( this IInvertibleBloomFilterData <TId, int, TCount> filter, IInvertibleBloomFilterData <TId, int, TCount> subtractedFilter, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration, HashSet <TId> listA, HashSet <TId> listB, HashSet <TId> modifiedEntities, bool destructive = false) where TId : struct where TCount : struct { if (filter == null && subtractedFilter == null) { return(true); } if (filter == null) { //handle null filters as elegant as possible at this point. filter = subtractedFilter.CreateDummy(configuration); destructive = true; } else { filter.SyncCompressionProviders(configuration); } if (subtractedFilter == null) { //swap the filters and the sets so we can still apply the destructive setting to temporarily created filter data subtractedFilter = filter; filter = subtractedFilter.CreateDummy(configuration); var swap = listA; listA = listB; listB = swap; destructive = true; } else { subtractedFilter.SyncCompressionProviders(configuration); } if (!filter.IsCompatibleWith(subtractedFilter, configuration)) { return(null); } bool?valueRes = true; var pureList = new Stack <long>(); var hasSubFilter = filter.SubFilter != null || subtractedFilter.SubFilter != null; //add a dummy mod set when there is a reverse filter, because a regular filter is pretty bad at recognizing modified entites. var idRes = filter .Subtract(subtractedFilter, configuration, listA, listB, pureList, destructive) .Decode(configuration, listA, listB, hasSubFilter ? null : modifiedEntities, pureList); if (hasSubFilter) { valueRes = filter .SubFilter .SubtractAndDecode( subtractedFilter.SubFilter, configuration.SubFilterConfiguration, listA, listB, modifiedEntities, destructive); } if (!valueRes.HasValue || !idRes.HasValue) { return(null); } return(idRes.Value && valueRes.Value); }
/// <summary> /// Subtract the Bloom filter data. /// </summary> /// <typeparam name="TEntity">The entity type</typeparam> /// <typeparam name="TId">The entity identifier type</typeparam> /// <typeparam name="TCount">The occurence count type</typeparam> /// <typeparam name="THash">The hash type.</typeparam> /// <param name="filterData">The filter data</param> /// <param name="subtractedFilterData">The Bloom filter data to subtract</param> /// <param name="configuration">The Bloom filter configuration</param> /// <param name="listA">Items in <paramref name="filterData"/>, but not in <paramref name="subtractedFilterData"/></param> /// <param name="listB">Items in <paramref name="subtractedFilterData"/>, but not in <paramref name="filterData"/></param> /// <param name="pureList">Optional list of pure items.</param> /// <param name="destructive">When <c>true</c> the <paramref name="filterData"/> will no longer be valid after the subtract operation, otherwise <c>false</c></param> /// <returns>The resulting Bloom filter data</returns> private static IInvertibleBloomFilterData <TId, THash, TCount> Subtract <TEntity, TId, THash, TCount>( this IInvertibleBloomFilterData <TId, THash, TCount> filterData, IInvertibleBloomFilterData <TId, THash, TCount> subtractedFilterData, IInvertibleBloomFilterConfiguration <TEntity, TId, THash, TCount> configuration, HashSet <TId> listA, HashSet <TId> listB, Stack <long> pureList = null, bool destructive = false ) where TCount : struct where TId : struct where THash : struct { if (!filterData.IsCompatibleWith(subtractedFilterData, configuration)) { throw new ArgumentException("Subtracted invertible Bloom filters are not compatible.", nameof(subtractedFilterData)); } var foldFactors = configuration.FoldingStrategy?.GetFoldFactors(filterData.BlockSize, subtractedFilterData.BlockSize); if (filterData.BlockSize / (foldFactors?.Item1 ?? 1L) != subtractedFilterData.BlockSize / (foldFactors?.Item2 ?? 1L)) { //failed to find folding factors that will make the size of the filters match. return(null); } var result = destructive && foldFactors?.Item1 <= 1 ? filterData : (foldFactors == null || foldFactors.Item1 <= 1 ? filterData.CreateDummy(configuration) : configuration.DataFactory.Create( configuration, filterData.Capacity / foldFactors.Item1, filterData.BlockSize / foldFactors.Item1, filterData.HashFunctionCount)); foldFactors = foldFactors ?? new Tuple <long, long>(1, 1); var idIdentity = configuration.IdIdentity; var hashIdentity = configuration.HashIdentity; //conccurent place holders var listABag = new ConcurrentBag <TId>(); var listBBag = new ConcurrentBag <TId>(); var pureListBag = pureList == null ? default(ConcurrentBag <long>) : new ConcurrentBag <long>(); Parallel.ForEach( Partitioner.Create(0L, result.BlockSize), (range, state) => { for (var i = range.Item1; i < range.Item2; i++) { var filterCount = filterData.Counts.GetFolded(i, foldFactors.Item1, configuration.CountConfiguration.Add); var subtractedCount = subtractedFilterData.Counts.GetFolded(i, foldFactors.Item2, configuration.CountConfiguration.Add); var hashSum = configuration.HashRemove( filterData.HashSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.HashAdd), subtractedFilterData.HashSumProvider.GetFolded(i, subtractedFilterData.BlockSize, foldFactors.Item2, configuration.HashAdd)); var filterIdSum = filterData.IdSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.IdAdd); var subtractedIdSum = subtractedFilterData.IdSumProvider.GetFolded(i, subtractedFilterData.BlockSize, foldFactors.Item2, configuration.IdAdd); var idXorResult = configuration.IdRemove(filterIdSum, subtractedIdSum); if ((!configuration.IdEqualityComparer.Equals(idIdentity, idXorResult) || !configuration.HashEqualityComparer.Equals(hashIdentity, hashSum)) && configuration.CountConfiguration.IsPure(filterCount) && configuration.CountConfiguration.IsPure(subtractedCount)) { //pure count went to zero: both filters were pure at the given position. listABag.Add(filterIdSum); listBBag.Add(subtractedIdSum); idXorResult = idIdentity; hashSum = hashIdentity; } result.Counts[i] = configuration.CountConfiguration.Subtract(filterCount, subtractedCount); result.HashSumProvider[i] = hashSum; result.IdSumProvider[i] = idXorResult; if (configuration.IsPure(result, i)) { pureListBag?.Add(i); } } }); //move back to non concurrent data types. foreach (var itm in listABag) { listA.Add(itm); } foreach (var itm in listBBag) { listB.Add(itm); } if (pureList != null) { foreach (var item in pureListBag) { pureList.Push(item); } } result.ItemCount = configuration.CountConfiguration.GetEstimatedCount(result.Counts, result.HashFunctionCount); return(result); }
/// <summary> /// Intersect Bloom filter data. /// </summary> /// <typeparam name="TEntity"></typeparam> /// <typeparam name="TId"></typeparam> /// <typeparam name="THash"></typeparam> /// <typeparam name="TCount"></typeparam> /// <param name="filterData"></param> /// <param name="configuration"></param> /// <param name="otherFilterData"></param> /// <param name="inPlace"></param> /// <returns></returns> internal static InvertibleBloomFilterData <TId, THash, TCount> Intersect <TEntity, TId, THash, TCount>( this IInvertibleBloomFilterData <TId, THash, TCount> filterData, IInvertibleBloomFilterConfiguration <TEntity, TId, THash, TCount> configuration, IInvertibleBloomFilterData <TId, THash, TCount> otherFilterData, bool inPlace = false ) where TId : struct where THash : struct where TCount : struct { if (filterData == null && otherFilterData == null) { return(null); } if (filterData == null) { return(configuration .DataFactory .Create(configuration, otherFilterData.Capacity, otherFilterData.BlockSize, otherFilterData.HashFunctionCount)); } else { filterData.SyncCompressionProviders(configuration); } if (otherFilterData == null) { if (inPlace) { filterData.Clear(configuration); return(filterData.ConvertToBloomFilterData(configuration)); } return(configuration .DataFactory .Create(configuration, filterData.Capacity, filterData.BlockSize, filterData.HashFunctionCount)); } else { otherFilterData.SyncCompressionProviders(configuration); } if (!filterData.IsCompatibleWith(otherFilterData, configuration)) { return(null); } var foldFactors = configuration.FoldingStrategy?.GetFoldFactors(filterData.BlockSize, otherFilterData.BlockSize); var res = inPlace && foldFactors?.Item1 <= 1 ? filterData.ConvertToBloomFilterData(configuration) : (foldFactors == null || foldFactors.Item1 <= 1 ? filterData.CreateDummy(configuration) : configuration.DataFactory.Create( configuration, filterData.Capacity / foldFactors.Item1, filterData.BlockSize / foldFactors.Item1, filterData.HashFunctionCount)); foldFactors = foldFactors ?? new Tuple <long, long>(1, 1); res.IsReverse = filterData.IsReverse; Parallel.ForEach( Partitioner.Create(0L, res.BlockSize), (range, state) => { for (var i = range.Item1; i < range.Item2; i++) { var filterDataCount = filterData.Counts.GetFolded(i, foldFactors.Item1, configuration.CountConfiguration.Add); var otherFilterDataCount = otherFilterData.Counts.GetFolded(i, foldFactors.Item2, configuration.CountConfiguration.Add); res.Counts[i] = configuration.CountConfiguration.Comparer.Compare(filterDataCount, otherFilterDataCount) < 0 ? filterDataCount : otherFilterDataCount; res.IdSumProvider[i] = configuration.IdIntersect( filterData.IdSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.IdAdd), otherFilterData.IdSumProvider.GetFolded(i, otherFilterData.BlockSize, foldFactors.Item2, configuration.IdAdd)); res.HashSumProvider[i] = configuration.HashIntersect( filterData.HashSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.HashAdd), otherFilterData.HashSumProvider.GetFolded(i, otherFilterData.BlockSize, foldFactors.Item2, configuration.HashAdd)); } }); res.SubFilter = filterData .SubFilter .Intersect(configuration.SubFilterConfiguration, otherFilterData.SubFilter, inPlace) .ConvertToBloomFilterData(configuration); res.ItemCount = configuration.CountConfiguration.GetEstimatedCount(res.Counts, res.HashFunctionCount); return(res); }