Пример #1
0
        /// <summary>
        /// Add two estimators.
        /// </summary>
        /// <param name="estimator">The estimator to add to.</param>
        /// <param name="otherEstimator">The other estimator to add.</param>
        /// <param name="foldingStrategy">THe folding strategy to use</param>
        /// <param name="inPlace">When <c>true</c> the data is added to the given <paramref name="estimator"/>, otherwise a new estimator is created.</param>
        /// <returns></returns>
        internal static IBitMinwiseHashEstimatorFullData Add(
            this IBitMinwiseHashEstimatorFullData estimator,
            IBitMinwiseHashEstimatorFullData otherEstimator,
            IFoldingStrategy foldingStrategy,
            bool inPlace = false)
        {
            if (estimator == null ||
                otherEstimator == null)
            {
                return(null);
            }
            var foldingFactors = foldingStrategy?.GetFoldFactors(estimator.Capacity, otherEstimator.Capacity);

            if ((estimator.Capacity != otherEstimator.Capacity &&
                 (foldingFactors?.Item1 ?? 0L) <= 1 &&
                 (foldingFactors?.Item2 ?? 0L) <= 1) ||
                estimator.HashCount != otherEstimator.HashCount)
            {
                throw new ArgumentException("Minwise estimators with different capacity or hash count cannot be added.");
            }
            var res = inPlace &&
                      ((foldingFactors?.Item1 ?? 1L) == 1L) &&
                      ((foldingFactors?.Item2 ?? 1L) == 1L)
                ? estimator
                : new BitMinwiseHashEstimatorFullData
            {
                Capacity  = estimator.Capacity / (foldingFactors?.Item1 ?? 1L),
                HashCount = estimator.HashCount,
                BitSize   = estimator.BitSize,
                ItemCount = estimator.ItemCount,
            };

            if (estimator.Values != null && otherEstimator.Values != null)
            {
                res.SetValues(false);
            }
            if (res.Values == null)
            {
                return(res);
            }
            Parallel.ForEach(
                Partitioner.Create(0L, res.Values.LongLength),
                (range, state) =>
            {
                for (var i = range.Item1; i < range.Item2; i++)
                {
                    res.Values[i] = Math.Min(
                        GetFolded(estimator.Values, i, foldingFactors?.Item1, Math.Min, int.MaxValue),
                        GetFolded(otherEstimator.Values, i, foldingFactors?.Item2, Math.Min, int.MaxValue));
                }
            });
            res.ItemCount += otherEstimator.ItemCount;
            return(res);
        }
Пример #2
0
        /// <summary>
        /// Intersect two estimators
        /// </summary>
        /// <param name="estimator"></param>
        /// <param name="otherEstimator"></param>
        /// <param name="foldingStrategy"></param>
        /// <param name="inPlace"></param>
        /// <returns></returns>
        /// <remarks>Logically possible, but the item count is pretty much useless after this operation.</remarks>
        internal static BitMinwiseHashEstimatorFullData Intersect(
            this IBitMinwiseHashEstimatorFullData estimator,
            IBitMinwiseHashEstimatorFullData otherEstimator,
            IFoldingStrategy foldingStrategy)
        {
            if (estimator == null &&
                otherEstimator == null)
            {
                return(null);
            }
            var foldingFactors = estimator == null || otherEstimator == null?
                                 null :
                                 foldingStrategy?.GetFoldFactors(estimator.Capacity, otherEstimator.Capacity);

            if (estimator == null)
            {
                return(new BitMinwiseHashEstimatorFullData
                {
                    BitSize = otherEstimator.BitSize,
                    Capacity = otherEstimator.Capacity,
                    HashCount = otherEstimator.HashCount,
                    ItemCount = 0
                });
            }

            if (otherEstimator != null &&
                ((estimator.Capacity != otherEstimator.Capacity &&
                  (foldingFactors?.Item1 ?? 0L) <= 1 &&
                  (foldingFactors?.Item2 ?? 0L) <= 1) ||
                 estimator.HashCount != otherEstimator.HashCount))
            {
                throw new ArgumentException("Minwise estimators with different capacity or hash count cannot be intersected.");
            }
            var res = new BitMinwiseHashEstimatorFullData
            {
                Capacity  = estimator.Capacity / (foldingFactors?.Item1 ?? 1L),
                HashCount = estimator.HashCount,
                BitSize   = estimator.BitSize,
                ItemCount = otherEstimator == null ? 0 : estimator.ItemCount
            };

            if (estimator.Values != null && otherEstimator?.Values != null)
            {
                res.SetValues(false);
            }
            if (res.Values == null)
            {
                return(res);
            }
            var dropped = 0;

            Parallel.ForEach(
                Partitioner.Create(0L, res.Values.LongLength),
                (range, state) =>
            {
                for (var i = range.Item1; i < range.Item2; i++)
                {
                    var estimatorValue = GetFolded(
                        estimator.Values,
                        i,
                        foldingFactors?.Item1,
                        Math.Min,
                        int.MaxValue);
                    var otherEstimatorValue = GetFolded(
                        otherEstimator.Values,
                        i,
                        foldingFactors?.Item2,
                        Math.Min,
                        int.MaxValue);
                    if (estimatorValue == int.MaxValue ||
                        otherEstimatorValue == int.MaxValue ||
                        otherEstimatorValue != estimatorValue)
                    {
                        Interlocked.Increment(ref dropped);
                    }
                    res.Values[i] = Math.Max(estimatorValue, otherEstimatorValue);
                }
            });
            //wildly wrong, but about as good as it gets.
            res.ItemCount = Math.Max(
                0,
                Math.Min(
                    estimator.ItemCount,
                    otherEstimator.ItemCount) - (long)Math.Ceiling(dropped / (0.5D * res.HashCount)));
            return(res);
        }