/// <summary> /// Add two estimators. /// </summary> /// <param name="estimator">The estimator to add to.</param> /// <param name="otherEstimator">The other estimator to add.</param> /// <param name="foldingStrategy">THe folding strategy to use</param> /// <param name="inPlace">When <c>true</c> the data is added to the given <paramref name="estimator"/>, otherwise a new estimator is created.</param> /// <returns></returns> internal static IBitMinwiseHashEstimatorFullData Add( this IBitMinwiseHashEstimatorFullData estimator, IBitMinwiseHashEstimatorFullData otherEstimator, IFoldingStrategy foldingStrategy, bool inPlace = false) { if (estimator == null || otherEstimator == null) { return(null); } var foldingFactors = foldingStrategy?.GetFoldFactors(estimator.Capacity, otherEstimator.Capacity); if ((estimator.Capacity != otherEstimator.Capacity && (foldingFactors?.Item1 ?? 0L) <= 1 && (foldingFactors?.Item2 ?? 0L) <= 1) || estimator.HashCount != otherEstimator.HashCount) { throw new ArgumentException("Minwise estimators with different capacity or hash count cannot be added."); } var res = inPlace && ((foldingFactors?.Item1 ?? 1L) == 1L) && ((foldingFactors?.Item2 ?? 1L) == 1L) ? estimator : new BitMinwiseHashEstimatorFullData { Capacity = estimator.Capacity / (foldingFactors?.Item1 ?? 1L), HashCount = estimator.HashCount, BitSize = estimator.BitSize, ItemCount = estimator.ItemCount, }; if (estimator.Values != null && otherEstimator.Values != null) { res.SetValues(false); } if (res.Values == null) { return(res); } Parallel.ForEach( Partitioner.Create(0L, res.Values.LongLength), (range, state) => { for (var i = range.Item1; i < range.Item2; i++) { res.Values[i] = Math.Min( GetFolded(estimator.Values, i, foldingFactors?.Item1, Math.Min, int.MaxValue), GetFolded(otherEstimator.Values, i, foldingFactors?.Item2, Math.Min, int.MaxValue)); } }); res.ItemCount += otherEstimator.ItemCount; return(res); }
/// <summary> /// Compress the estimator data. /// </summary> /// <param name="estimator"></param> /// <param name="configuration"></param> /// <returns></returns> internal static IBitMinwiseHashEstimatorFullData Compress <TId, TCount>( this IBitMinwiseHashEstimatorFullData estimator, IBloomFilterConfiguration <TId, int> configuration) where TId : struct where TCount : struct { if (configuration?.FoldingStrategy == null || estimator == null) { return(null); } var fold = configuration.FoldingStrategy.FindCompressionFactor(configuration, estimator.Capacity, estimator.Capacity, estimator.ItemCount); return(fold.HasValue ? estimator.Fold(fold.Value) : null); }
/// <summary> /// Rehydrate the given data. /// </summary> /// <param name="data"></param> public void Rehydrate(IBitMinwiseHashEstimatorFullData data) { if (data == null) { return; } _capacity = data.Capacity; _hashCount = data.HashCount; _bitSize = data.BitSize; _itemCount = data.ItemCount; _slots = data.Values == null ? new Lazy <int[]>(() => GetMinHashSlots(_hashCount, _capacity)) : new Lazy <int[]>(() => data.Values); }
/// <summary> /// Fold the minwise estimator data. /// </summary> /// <param name="estimator">The estimator data</param> /// <param name="factor">The folding factor</param> /// <returns></returns> internal static BitMinwiseHashEstimatorFullData Fold( this IBitMinwiseHashEstimatorFullData estimator, uint factor) { if (factor <= 0) { throw new ArgumentException($"Fold factor should be a positive number (given value was {factor})."); } if (estimator == null) { return(null); } if (estimator.Capacity % factor != 0) { throw new ArgumentException($"Bit minwise filter data cannot be folded by {factor}.", nameof(factor)); } var res = new BitMinwiseHashEstimatorFullData { BitSize = estimator.BitSize, Capacity = estimator.Capacity / factor, HashCount = estimator.HashCount, ItemCount = estimator.ItemCount }; if (estimator.Values != null) { res.SetValues(false); } if ((res.Values?.Length ?? 0L) == 0L) { return(res); } Parallel.ForEach( Partitioner.Create(0L, res.Values.LongLength), (range, state) => { for (var i = range.Item1; i < range.Item2; i++) { res.Values[i] = estimator.Values.GetFolded(i, factor, Math.Min); } }); return(res); }
internal static long GetBlockSize(this IBitMinwiseHashEstimatorFullData data) { return(data == null ? 0L : data.HashCount *data.Capacity); }
/// <summary> /// Intersect two estimators /// </summary> /// <param name="estimator"></param> /// <param name="otherEstimator"></param> /// <param name="foldingStrategy"></param> /// <param name="inPlace"></param> /// <returns></returns> /// <remarks>Logically possible, but the item count is pretty much useless after this operation.</remarks> internal static BitMinwiseHashEstimatorFullData Intersect( this IBitMinwiseHashEstimatorFullData estimator, IBitMinwiseHashEstimatorFullData otherEstimator, IFoldingStrategy foldingStrategy) { if (estimator == null && otherEstimator == null) { return(null); } var foldingFactors = estimator == null || otherEstimator == null? null : foldingStrategy?.GetFoldFactors(estimator.Capacity, otherEstimator.Capacity); if (estimator == null) { return(new BitMinwiseHashEstimatorFullData { BitSize = otherEstimator.BitSize, Capacity = otherEstimator.Capacity, HashCount = otherEstimator.HashCount, ItemCount = 0 }); } if (otherEstimator != null && ((estimator.Capacity != otherEstimator.Capacity && (foldingFactors?.Item1 ?? 0L) <= 1 && (foldingFactors?.Item2 ?? 0L) <= 1) || estimator.HashCount != otherEstimator.HashCount)) { throw new ArgumentException("Minwise estimators with different capacity or hash count cannot be intersected."); } var res = new BitMinwiseHashEstimatorFullData { Capacity = estimator.Capacity / (foldingFactors?.Item1 ?? 1L), HashCount = estimator.HashCount, BitSize = estimator.BitSize, ItemCount = otherEstimator == null ? 0 : estimator.ItemCount }; if (estimator.Values != null && otherEstimator?.Values != null) { res.SetValues(false); } if (res.Values == null) { return(res); } var dropped = 0; Parallel.ForEach( Partitioner.Create(0L, res.Values.LongLength), (range, state) => { for (var i = range.Item1; i < range.Item2; i++) { var estimatorValue = GetFolded( estimator.Values, i, foldingFactors?.Item1, Math.Min, int.MaxValue); var otherEstimatorValue = GetFolded( otherEstimator.Values, i, foldingFactors?.Item2, Math.Min, int.MaxValue); if (estimatorValue == int.MaxValue || otherEstimatorValue == int.MaxValue || otherEstimatorValue != estimatorValue) { Interlocked.Increment(ref dropped); } res.Values[i] = Math.Max(estimatorValue, otherEstimatorValue); } }); //wildly wrong, but about as good as it gets. res.ItemCount = Math.Max( 0, Math.Min( estimator.ItemCount, otherEstimator.ItemCount) - (long)Math.Ceiling(dropped / (0.5D * res.HashCount))); return(res); }
/// <summary> /// Add an estimator /// </summary> /// <param name="estimator">The estimator to add.</param> /// <returns></returns> public void Intersect(IBitMinwiseHashEstimatorFullData estimator) { Rehydrate(FullExtract().Intersect(estimator, _configuration.FoldingStrategy)); }
/// <summary> /// Add an estimator /// </summary> /// <param name="estimator">The estimator to add.</param> /// <returns></returns> public void Add(IBitMinwiseHashEstimatorFullData estimator) { Rehydrate(FullExtract().Add(estimator, _configuration.FoldingStrategy, true)); }