Пример #1
0
        /// <summary>
        /// Add two estimators.
        /// </summary>
        /// <param name="estimator">The estimator to add to.</param>
        /// <param name="otherEstimator">The other estimator to add.</param>
        /// <param name="foldingStrategy">THe folding strategy to use</param>
        /// <param name="inPlace">When <c>true</c> the data is added to the given <paramref name="estimator"/>, otherwise a new estimator is created.</param>
        /// <returns></returns>
        internal static IBitMinwiseHashEstimatorFullData Add(
            this IBitMinwiseHashEstimatorFullData estimator,
            IBitMinwiseHashEstimatorFullData otherEstimator,
            IFoldingStrategy foldingStrategy,
            bool inPlace = false)
        {
            if (estimator == null ||
                otherEstimator == null)
            {
                return(null);
            }
            var foldingFactors = foldingStrategy?.GetFoldFactors(estimator.Capacity, otherEstimator.Capacity);

            if ((estimator.Capacity != otherEstimator.Capacity &&
                 (foldingFactors?.Item1 ?? 0L) <= 1 &&
                 (foldingFactors?.Item2 ?? 0L) <= 1) ||
                estimator.HashCount != otherEstimator.HashCount)
            {
                throw new ArgumentException("Minwise estimators with different capacity or hash count cannot be added.");
            }
            var res = inPlace &&
                      ((foldingFactors?.Item1 ?? 1L) == 1L) &&
                      ((foldingFactors?.Item2 ?? 1L) == 1L)
                ? estimator
                : new BitMinwiseHashEstimatorFullData
            {
                Capacity  = estimator.Capacity / (foldingFactors?.Item1 ?? 1L),
                HashCount = estimator.HashCount,
                BitSize   = estimator.BitSize,
                ItemCount = estimator.ItemCount,
            };

            if (estimator.Values != null && otherEstimator.Values != null)
            {
                res.SetValues(false);
            }
            if (res.Values == null)
            {
                return(res);
            }
            Parallel.ForEach(
                Partitioner.Create(0L, res.Values.LongLength),
                (range, state) =>
            {
                for (var i = range.Item1; i < range.Item2; i++)
                {
                    res.Values[i] = Math.Min(
                        GetFolded(estimator.Values, i, foldingFactors?.Item1, Math.Min, int.MaxValue),
                        GetFolded(otherEstimator.Values, i, foldingFactors?.Item2, Math.Min, int.MaxValue));
                }
            });
            res.ItemCount += otherEstimator.ItemCount;
            return(res);
        }
Пример #2
0
        /// <summary>
        /// Compress the estimator data.
        /// </summary>
        /// <param name="estimator"></param>
        /// <param name="configuration"></param>
        /// <returns></returns>
        internal static IBitMinwiseHashEstimatorFullData Compress <TId, TCount>(
            this IBitMinwiseHashEstimatorFullData estimator,
            IBloomFilterConfiguration <TId, int> configuration)
            where TId : struct
            where TCount : struct
        {
            if (configuration?.FoldingStrategy == null || estimator == null)
            {
                return(null);
            }
            var fold = configuration.FoldingStrategy.FindCompressionFactor(configuration, estimator.Capacity, estimator.Capacity, estimator.ItemCount);

            return(fold.HasValue ? estimator.Fold(fold.Value) : null);
        }
Пример #3
0
 /// <summary>
 /// Rehydrate the given data.
 /// </summary>
 /// <param name="data"></param>
 public void Rehydrate(IBitMinwiseHashEstimatorFullData data)
 {
     if (data == null)
     {
         return;
     }
     _capacity  = data.Capacity;
     _hashCount = data.HashCount;
     _bitSize   = data.BitSize;
     _itemCount = data.ItemCount;
     _slots     = data.Values == null
         ? new Lazy <int[]>(() => GetMinHashSlots(_hashCount, _capacity))
         : new Lazy <int[]>(() => data.Values);
 }
Пример #4
0
        /// <summary>
        /// Fold the minwise estimator data.
        /// </summary>
        /// <param name="estimator">The estimator data</param>
        /// <param name="factor">The folding factor</param>
        /// <returns></returns>
        internal static BitMinwiseHashEstimatorFullData Fold(
            this IBitMinwiseHashEstimatorFullData estimator,
            uint factor)
        {
            if (factor <= 0)
            {
                throw new ArgumentException($"Fold factor should be a positive number (given value was {factor}).");
            }
            if (estimator == null)
            {
                return(null);
            }
            if (estimator.Capacity % factor != 0)
            {
                throw new ArgumentException($"Bit minwise filter data cannot be folded by {factor}.", nameof(factor));
            }
            var res = new BitMinwiseHashEstimatorFullData
            {
                BitSize   = estimator.BitSize,
                Capacity  = estimator.Capacity / factor,
                HashCount = estimator.HashCount,
                ItemCount = estimator.ItemCount
            };

            if (estimator.Values != null)
            {
                res.SetValues(false);
            }
            if ((res.Values?.Length ?? 0L) == 0L)
            {
                return(res);
            }
            Parallel.ForEach(
                Partitioner.Create(0L, res.Values.LongLength),
                (range, state) =>
            {
                for (var i = range.Item1; i < range.Item2; i++)
                {
                    res.Values[i] = estimator.Values.GetFolded(i, factor, Math.Min);
                }
            });
            return(res);
        }
Пример #5
0
 internal static long GetBlockSize(this IBitMinwiseHashEstimatorFullData data)
 {
     return(data == null ? 0L : data.HashCount *data.Capacity);
 }
Пример #6
0
        /// <summary>
        /// Intersect two estimators
        /// </summary>
        /// <param name="estimator"></param>
        /// <param name="otherEstimator"></param>
        /// <param name="foldingStrategy"></param>
        /// <param name="inPlace"></param>
        /// <returns></returns>
        /// <remarks>Logically possible, but the item count is pretty much useless after this operation.</remarks>
        internal static BitMinwiseHashEstimatorFullData Intersect(
            this IBitMinwiseHashEstimatorFullData estimator,
            IBitMinwiseHashEstimatorFullData otherEstimator,
            IFoldingStrategy foldingStrategy)
        {
            if (estimator == null &&
                otherEstimator == null)
            {
                return(null);
            }
            var foldingFactors = estimator == null || otherEstimator == null?
                                 null :
                                 foldingStrategy?.GetFoldFactors(estimator.Capacity, otherEstimator.Capacity);

            if (estimator == null)
            {
                return(new BitMinwiseHashEstimatorFullData
                {
                    BitSize = otherEstimator.BitSize,
                    Capacity = otherEstimator.Capacity,
                    HashCount = otherEstimator.HashCount,
                    ItemCount = 0
                });
            }

            if (otherEstimator != null &&
                ((estimator.Capacity != otherEstimator.Capacity &&
                  (foldingFactors?.Item1 ?? 0L) <= 1 &&
                  (foldingFactors?.Item2 ?? 0L) <= 1) ||
                 estimator.HashCount != otherEstimator.HashCount))
            {
                throw new ArgumentException("Minwise estimators with different capacity or hash count cannot be intersected.");
            }
            var res = new BitMinwiseHashEstimatorFullData
            {
                Capacity  = estimator.Capacity / (foldingFactors?.Item1 ?? 1L),
                HashCount = estimator.HashCount,
                BitSize   = estimator.BitSize,
                ItemCount = otherEstimator == null ? 0 : estimator.ItemCount
            };

            if (estimator.Values != null && otherEstimator?.Values != null)
            {
                res.SetValues(false);
            }
            if (res.Values == null)
            {
                return(res);
            }
            var dropped = 0;

            Parallel.ForEach(
                Partitioner.Create(0L, res.Values.LongLength),
                (range, state) =>
            {
                for (var i = range.Item1; i < range.Item2; i++)
                {
                    var estimatorValue = GetFolded(
                        estimator.Values,
                        i,
                        foldingFactors?.Item1,
                        Math.Min,
                        int.MaxValue);
                    var otherEstimatorValue = GetFolded(
                        otherEstimator.Values,
                        i,
                        foldingFactors?.Item2,
                        Math.Min,
                        int.MaxValue);
                    if (estimatorValue == int.MaxValue ||
                        otherEstimatorValue == int.MaxValue ||
                        otherEstimatorValue != estimatorValue)
                    {
                        Interlocked.Increment(ref dropped);
                    }
                    res.Values[i] = Math.Max(estimatorValue, otherEstimatorValue);
                }
            });
            //wildly wrong, but about as good as it gets.
            res.ItemCount = Math.Max(
                0,
                Math.Min(
                    estimator.ItemCount,
                    otherEstimator.ItemCount) - (long)Math.Ceiling(dropped / (0.5D * res.HashCount)));
            return(res);
        }
Пример #7
0
 /// <summary>
 /// Add an estimator
 /// </summary>
 /// <param name="estimator">The estimator to add.</param>
 /// <returns></returns>
 public void Intersect(IBitMinwiseHashEstimatorFullData estimator)
 {
     Rehydrate(FullExtract().Intersect(estimator, _configuration.FoldingStrategy));
 }
Пример #8
0
 /// <summary>
 /// Add an estimator
 /// </summary>
 /// <param name="estimator">The estimator to add.</param>
 /// <returns></returns>
 public void Add(IBitMinwiseHashEstimatorFullData estimator)
 {
     Rehydrate(FullExtract().Add(estimator, _configuration.FoldingStrategy, true));
 }