private Dictionary <BigInteger, int> Analyze(IReadOnlyList <UnsignedPoint> points)
        {
            var balancer            = new PointBalancer(points);
            var hilbertIndexTallies = new Dictionary <BigInteger, int>();

            LargestClusterMembership = 0;
            LargeClusterCount        = 0;
            LargeClusterMembership   = 0;
            foreach (var point in points)
            {
                var hIndex = balancer.ToHilbertPosition(point, 1);
                hilbertIndexTallies.TryGetValue(hIndex, out int tally);
                tally++;
                LargestClusterMembership = Max(LargestClusterMembership, tally);
                if (tally == OutlierSize)
                {
                    LargeClusterCount++;
                    LargeClusterMembership += tally;
                }
                else if (tally > OutlierSize)
                {
                    LargeClusterMembership++;
                }
                hilbertIndexTallies[hIndex] = tally;
            }
            OutlierMembership = points.Count - LargeClusterMembership;
            OutlierCount      = hilbertIndexTallies.Count - LargeClusterCount;
            return(hilbertIndexTallies);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Sorts points in-place according to the Hilbert curve, applying the transform to balance points, using less memory.
        /// </summary>
        /// <param name="points">Points to sort in-place.</param>
        /// <param name="balancer">If supplied, this point balancer is reused.
        /// Otherwise, a new one is built and returned.
        /// Reuse is appropriate if sorting of subsets of data is performed but you want all sorting to be conducted on a uniform basis,
        /// which using the same balancer ensures.</param>
        /// <returns>The same points array passed in as argument, with values sorted.</returns>
        public static UnsignedPoint[] SmallBalancedSort(UnsignedPoint[] points, ref PointBalancer balancer, Permutation <uint> perm = null)
        {
            balancer = balancer ?? new PointBalancer(points);
            var hilbertPositions    = new BigInteger[points.Length];
            var allPoints           = new ArraySegment <UnsignedPoint>(points, 0, points.Length);
            var allHilbertPositions = new ArraySegment <BigInteger>(hilbertPositions, 0, hilbertPositions.Length);

            var cost = SortSegment(allPoints, allHilbertPositions, balancer, 1, false, perm);

            RelativeSortCost = cost / (double)(points.Length * balancer.BitsPerDimension);
            return(points);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Sort points according to the Hilbert curve order, potentially using a lower precision for all coordinate values.
        /// If points are sorted with lower precision, the ordering is consistent with sorts conducted at higher precision.
        /// This means that if two points receive DIFFERING Hilbert positions, they will appear in the sorted list in the
        /// proper relative order. However if two points receive the SAME Hilbert positions at the lower precision, they likely
        /// would have different positions at the full precision. Such points may appear out of order, but will be grouped together
        /// by that position.
        ///
        /// Sorting using fewer bits of precision is faster and uses less memory. If data is nearly random, it may sort points entirely
        /// in the correct order, or close to it, permitting subsequent passes to re-sort the groups of points with like positions
        /// in separate batches.
        /// </summary>
        /// <param name="points">Points to sort.</param>
        /// <param name="balancer">If supplied, this point balancer is reused.
        /// Otherwise, a new one is built and returned.
        /// Reuse is appropriate if sorting of subsets of data is performed but you want all sorting to be conducted on a uniform basis,
        /// which using the same balancer ensures.</param>
        /// <returns>Sorted list of points.
        /// Points are not sorted in-place.</returns>
        public static List <UnsignedPoint> BalancedSort(IReadOnlyList <UnsignedPoint> points, int bitsPerDimension, ref PointBalancer balancer)
        {
            var pointBalancer = balancer ?? new PointBalancer(points);

            return(points.OrderBy(
                       point => pointBalancer.ToHilbertPosition(point, bitsPerDimension)
                       ).ToList());
        }
Ejemplo n.º 4
0
 private static Action SortRecursion(ArraySegment <UnsignedPoint> points, ArraySegment <BigInteger> hilbertPositions, PointBalancer balancer, int bits, int iStart, int segmentLength, Permutation <uint> perm, Action <int> costUpdater)
 {
     return(() =>
     {
         var smallerSegment = new ArraySegment <UnsignedPoint>(points.Array, points.Offset + iStart, segmentLength);
         var smallerHilbertKeys = new ArraySegment <BigInteger>(hilbertPositions.Array, hilbertPositions.Offset + iStart, segmentLength);
         // The bucket has more than one point, so we need to sort it recursively.
         var grid = new GridCoarseness((IList <UnsignedPoint>)smallerSegment, balancer.BitsPerDimension);
         var targetCount = segmentLength < 50 ? 0 : segmentLength / 10;
         var bitsToRecurse = grid.BitsToDivide(targetCount, segmentLength * 2);
         // The grid sometimes yields the same number of bits twice in a row due to estimation ,
         // so force it to at least increase by one.
         if (bitsToRecurse <= bits)
         {
             bitsToRecurse = bits + 1;
         }
         var parallelRecursion = false; // Normally false, but testing to see the effect.
         var costIncrement = SortSegment(smallerSegment, smallerHilbertKeys, balancer, bitsToRecurse, parallelRecursion, perm);
         costUpdater(costIncrement);
     });
 }
Ejemplo n.º 5
0
        /// <summary>
        /// Recursively sort the array in place into smaller and smaller segments, until each segment is one item long.
        ///
        /// Each segment is distinguished by sharing the same value for the sort key (the Hilbert position).
        /// At each recursion step, we add to the number of bits in the Hilbert transform until we run out of sorting or reach BitsPerDimension,
        /// the maximum. By adding bits, the Hilbert sort key becomes more specific, thus shortening the segments sharing the same key.
        /// </summary>
        /// <param name="points">Points to sort.</param>
        /// <param name="hilbertPositions">Will hold the Hilbert sort keys, which will change at each level of recursion.</param>
        /// <param name="balancer">Used to shift the coordinates of each dimension so that the median falls in the middle of the range.</param>
        /// <param name="bits">Number of bits to use per coordinate when computing the Hilbert position.
        /// Each recursive level increses the number of bits.</param>
        /// <param name="perm">Optional permutation.</param>
        /// <returns>The recursive cost of the operation, which is governed by how many Hilbert transformation were required,
        /// times the number of bits per transform.
        /// If we sort N points with B bits in the straightforward way (not preserving memory), the cost would be N*B.
        /// If the cost comes in below that, we have improved on the simple, non-recursive quicksort.</returns>
        private static int SortSegment(ArraySegment <UnsignedPoint> points, ArraySegment <BigInteger> hilbertPositions, PointBalancer balancer, int bits, bool executeParallel, Permutation <uint> perm = null)
        {
            var          cost        = 0;
            Action <int> costUpdater = (int costIncrement) => {
                Interlocked.Add(ref cost, costIncrement);
            };
            var pointsList = (IList <UnsignedPoint>)points;
            var hpList     = (IList <BigInteger>)hilbertPositions;

            // Prepare the sort keys - the Hilbert positions.
            if (executeParallel)
            {
                Parallel.For(0, pointsList.Count, i => { hpList[i] = balancer.ToHilbertPosition(pointsList[i], bits, perm); });
            }
            else
            {
                for (var i = 0; i < pointsList.Count; i++)
                {
                    hpList[i] = balancer.ToHilbertPosition(pointsList[i], bits, perm);
                }
            }

            Array.Sort(hilbertPositions.Array, points.Array, points.Offset, points.Count);
            cost += points.Count * bits;

            // If we are already at the highest number of bits, even if two points have the same
            // Hilbert position, we can sort them no further.
            if (bits >= balancer.BitsPerDimension)
            {
                return(cost);
            }

            var actions = new List <Action>();

            var        iStart       = 0;
            BigInteger?prevPosition = hpList[0];

            for (var i = 1; i <= pointsList.Count; i++)
            {
                BigInteger?currentPosition = null;
                if (i < pointsList.Count)
                {
                    currentPosition = hpList[i];
                }
                if (!prevPosition.Equals(currentPosition))
                {
                    var segmentLength = i - iStart;
                    if (segmentLength > 1)
                    {
                        Action taskAction = SortRecursion(points, hilbertPositions, balancer, bits, iStart, segmentLength, perm, costUpdater);
                        if (executeParallel)
                        {
                            actions.Add(taskAction);
                        }
                        else
                        {
                            taskAction.Invoke();
                        }
                    }
                    iStart = i;
                }
                prevPosition = currentPosition;
            }
            if (executeParallel)
            {
                foreach (var a in actions)
                {
                    a.Invoke();
                }
                //TODO: Don't know why parallel execution fails to produce correct results. Some shared state is likely being altered.

                //var tasks = actions.Select(a => new Task(a)).ToList();
                //foreach (var t in tasks)
                //    t.Start();
                //Task.WhenAll(tasks).Wait();
            }
            return(cost);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Sort the points (not in-place) and return a List of Arrays, where all points in a given array share the same
        /// value for the sort key (the Hilbert position) and the arrays are sorted by the Hilbert position.
        ///
        /// All points are sorted in a single pass using the specified precision of each number in constructing the Hilbert curve.
        /// Consequently, if the full precision is used, this uses more memory than the in-place, recursive sort,
        /// because all the Hilbert positions are computed and held in memory simultaneously.
        /// </summary>
        /// <param name="points">Points to sort.</param>
        /// <param name="bitsPerDimension">Number of bits to use per dimension in constructing the Hilbert position.
        /// If less than the number required for full precision, this will reduce the precision used in sorting.</param>
        /// <param name="balancer">If passed in, this balancer is reused, otherwise one is created and returned.</param>
        /// <returns>A List of arrays of points where each array holds points that share the same value of Hilbert position
        /// at the given precision. The arrays themselves are sorted by Hilbert position.</returns>
        public static List <UnsignedPoint[]> SortWithTies(IReadOnlyList <UnsignedPoint> points, int bitsPerDimension, ref PointBalancer balancer)
        {
            var pointBalancer  = balancer ?? new PointBalancer(points);
            var pointsWithTies = points
                                 .GroupBy(point => pointBalancer.ToHilbertPosition(point, bitsPerDimension))
                                 .OrderBy(g => g.Key)
                                 .Select(g => g.ToArray())
                                 .ToList();

            return(pointsWithTies);
        }