/// <summary> /// Remove an item from the given position. /// </summary> /// <typeparam name="TEntity">The entity type</typeparam> /// <typeparam name="TId">The type of the entity identifier</typeparam> /// <typeparam name="TCount">The type of the Bloom filter occurence count</typeparam> /// <param name="filter">The filter</param> /// <param name="configuration">The configuration</param> /// <param name="idValue">The identifier to remove</param> /// <param name="hashValue">The hash value to remove</param> /// <param name="position">The position of the cell to remove the identifier and hash from.</param> internal static bool Remove <TEntity, TId, TCount>( this IInvertibleBloomFilterData <TId, int, TCount> filter, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration, TId idValue, int hashValue, long position) where TCount : struct where TId : struct { if (filter == null) { return(false); } var retVal = false; filter.ExecuteExclusively(position, () => { filter.Counts[position] = configuration.CountConfiguration.Decrease(filter.Counts[position]); filter.HashSumProvider[position] = configuration.HashRemove(filter.HashSumProvider[position], hashValue); filter.IdSumProvider[position] = configuration.IdRemove(filter.IdSumProvider[position], idValue); retVal = configuration.IsPure(filter, position); }); return(retVal); }
/// <summary> /// Subtract the Bloom filter data. /// </summary> /// <typeparam name="TEntity">The entity type</typeparam> /// <typeparam name="TId">The entity identifier type</typeparam> /// <typeparam name="TCount">The occurence count type</typeparam> /// <typeparam name="THash">The hash type.</typeparam> /// <param name="filterData">The filter data</param> /// <param name="subtractedFilterData">The Bloom filter data to subtract</param> /// <param name="configuration">The Bloom filter configuration</param> /// <param name="listA">Items in <paramref name="filterData"/>, but not in <paramref name="subtractedFilterData"/></param> /// <param name="listB">Items in <paramref name="subtractedFilterData"/>, but not in <paramref name="filterData"/></param> /// <param name="pureList">Optional list of pure items.</param> /// <param name="destructive">When <c>true</c> the <paramref name="filterData"/> will no longer be valid after the subtract operation, otherwise <c>false</c></param> /// <returns>The resulting Bloom filter data</returns> private static IInvertibleBloomFilterData <TId, THash, TCount> Subtract <TEntity, TId, THash, TCount>( this IInvertibleBloomFilterData <TId, THash, TCount> filterData, IInvertibleBloomFilterData <TId, THash, TCount> subtractedFilterData, IInvertibleBloomFilterConfiguration <TEntity, TId, THash, TCount> configuration, HashSet <TId> listA, HashSet <TId> listB, Stack <long> pureList = null, bool destructive = false ) where TCount : struct where TId : struct where THash : struct { if (!filterData.IsCompatibleWith(subtractedFilterData, configuration)) { throw new ArgumentException("Subtracted invertible Bloom filters are not compatible.", nameof(subtractedFilterData)); } var foldFactors = configuration.FoldingStrategy?.GetFoldFactors(filterData.BlockSize, subtractedFilterData.BlockSize); if (filterData.BlockSize / (foldFactors?.Item1 ?? 1L) != subtractedFilterData.BlockSize / (foldFactors?.Item2 ?? 1L)) { //failed to find folding factors that will make the size of the filters match. return(null); } var result = destructive && foldFactors?.Item1 <= 1 ? filterData : (foldFactors == null || foldFactors.Item1 <= 1 ? filterData.CreateDummy(configuration) : configuration.DataFactory.Create( configuration, filterData.Capacity / foldFactors.Item1, filterData.BlockSize / foldFactors.Item1, filterData.HashFunctionCount)); foldFactors = foldFactors ?? new Tuple <long, long>(1, 1); var idIdentity = configuration.IdIdentity; var hashIdentity = configuration.HashIdentity; //conccurent place holders var listABag = new ConcurrentBag <TId>(); var listBBag = new ConcurrentBag <TId>(); var pureListBag = pureList == null ? default(ConcurrentBag <long>) : new ConcurrentBag <long>(); Parallel.ForEach( Partitioner.Create(0L, result.BlockSize), (range, state) => { for (var i = range.Item1; i < range.Item2; i++) { var filterCount = filterData.Counts.GetFolded(i, foldFactors.Item1, configuration.CountConfiguration.Add); var subtractedCount = subtractedFilterData.Counts.GetFolded(i, foldFactors.Item2, configuration.CountConfiguration.Add); var hashSum = configuration.HashRemove( filterData.HashSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.HashAdd), subtractedFilterData.HashSumProvider.GetFolded(i, subtractedFilterData.BlockSize, foldFactors.Item2, configuration.HashAdd)); var filterIdSum = filterData.IdSumProvider.GetFolded(i, filterData.BlockSize, foldFactors.Item1, configuration.IdAdd); var subtractedIdSum = subtractedFilterData.IdSumProvider.GetFolded(i, subtractedFilterData.BlockSize, foldFactors.Item2, configuration.IdAdd); var idXorResult = configuration.IdRemove(filterIdSum, subtractedIdSum); if ((!configuration.IdEqualityComparer.Equals(idIdentity, idXorResult) || !configuration.HashEqualityComparer.Equals(hashIdentity, hashSum)) && configuration.CountConfiguration.IsPure(filterCount) && configuration.CountConfiguration.IsPure(subtractedCount)) { //pure count went to zero: both filters were pure at the given position. listABag.Add(filterIdSum); listBBag.Add(subtractedIdSum); idXorResult = idIdentity; hashSum = hashIdentity; } result.Counts[i] = configuration.CountConfiguration.Subtract(filterCount, subtractedCount); result.HashSumProvider[i] = hashSum; result.IdSumProvider[i] = idXorResult; if (configuration.IsPure(result, i)) { pureListBag?.Add(i); } } }); //move back to non concurrent data types. foreach (var itm in listABag) { listA.Add(itm); } foreach (var itm in listBBag) { listB.Add(itm); } if (pureList != null) { foreach (var item in pureListBag) { pureList.Push(item); } } result.ItemCount = configuration.CountConfiguration.GetEstimatedCount(result.Counts, result.HashFunctionCount); return(result); }
/// <summary> /// Decode the filter. /// </summary> /// <typeparam name="TEntity">The type of the entity</typeparam> /// <typeparam name="TId">The type of the entity identifier</typeparam> /// <typeparam name="TCount">The type of the occurence count for the invertible Bloom filter.</typeparam> /// <param name="filter">The Bloom filter data to decode</param> /// <param name="configuration">The Bloom filter configuration</param> /// <param name="listA">Items in the original set, but not in the subtracted set.</param> /// <param name="listB">Items not in the original set, but in the subtracted set.</param> /// <param name="modifiedEntities">items in both sets, but with a different value.</param> /// <param name="pureList">Optional list of pure items</param> /// <returns><c>true</c> when the decode was successful, else <c>false</c>.</returns> private static bool?Decode <TEntity, TId, TCount>( this IInvertibleBloomFilterData <TId, int, TCount> filter, IInvertibleBloomFilterConfiguration <TEntity, TId, int, TCount> configuration, HashSet <TId> listA, HashSet <TId> listB, HashSet <TId> modifiedEntities = null, Stack <long> pureList = null) where TId : struct where TCount : struct { if (filter == null) { return(null); } var countComparer = Comparer <TCount> .Default; if (pureList == null) { pureList = new Stack <long>(LongEnumerable.Range(0L, filter.BlockSize) .Where(i => configuration.IsPure(filter, i)) .Select(i => i)); } var countsIdentity = configuration.CountConfiguration.Identity; while (pureList.Any()) { var pureIdx = pureList.Pop(); if (!configuration.IsPure(filter, pureIdx)) { continue; } var id = filter.IdSumProvider[pureIdx]; var hashSum = filter.HashSumProvider[pureIdx]; var count = filter.Counts[pureIdx]; var negCount = countComparer.Compare(count, countsIdentity) < 0; var isModified = false; foreach (var position in configuration.Probe(filter, hashSum)) { var wasZero = configuration.CountConfiguration.Comparer.Compare(filter.Counts[position], countsIdentity) == 0; if (configuration.IsPure(filter, position) && !configuration.HashEqualityComparer.Equals(filter.HashSumProvider[position], hashSum) && configuration.IdEqualityComparer.Equals(id, filter.IdSumProvider[position])) { modifiedEntities?.Add(id); isModified = true; if (negCount) { filter.Add(configuration, id, filter.HashSumProvider[position], position); } else { filter.Remove(configuration, id, filter.HashSumProvider[position], position); } } else { if (negCount) { filter.Add(configuration, id, hashSum, position); } else { filter.Remove(configuration, id, hashSum, position); } } if (!wasZero && configuration.IsPure(filter, position)) { //count became pure, add to the list. pureList.Push(position); } } if (isModified) { continue; } if (negCount) { listB.Add(id); } else { listA.Add(id); } } modifiedEntities?.MoveModified(listA, listB); return(filter.IsCompleteDecode(configuration)); }