private Dictionary <BigInteger, int> Analyze(IReadOnlyList <UnsignedPoint> points)
        {
            var balancer            = new PointBalancer(points);
            var hilbertIndexTallies = new Dictionary <BigInteger, int>();

            LargestClusterMembership = 0;
            LargeClusterCount        = 0;
            LargeClusterMembership   = 0;
            foreach (var point in points)
            {
                var hIndex = balancer.ToHilbertPosition(point, 1);
                hilbertIndexTallies.TryGetValue(hIndex, out int tally);
                tally++;
                LargestClusterMembership = Max(LargestClusterMembership, tally);
                if (tally == OutlierSize)
                {
                    LargeClusterCount++;
                    LargeClusterMembership += tally;
                }
                else if (tally > OutlierSize)
                {
                    LargeClusterMembership++;
                }
                hilbertIndexTallies[hIndex] = tally;
            }
            OutlierMembership = points.Count - LargeClusterMembership;
            OutlierCount      = hilbertIndexTallies.Count - LargeClusterCount;
            return(hilbertIndexTallies);
        }
Example #2
0
        /// <summary>
        /// Recursively sort the array in place into smaller and smaller segments, until each segment is one item long.
        ///
        /// Each segment is distinguished by sharing the same value for the sort key (the Hilbert position).
        /// At each recursion step, we add to the number of bits in the Hilbert transform until we run out of sorting or reach BitsPerDimension,
        /// the maximum. By adding bits, the Hilbert sort key becomes more specific, thus shortening the segments sharing the same key.
        /// </summary>
        /// <param name="points">Points to sort.</param>
        /// <param name="hilbertPositions">Will hold the Hilbert sort keys, which will change at each level of recursion.</param>
        /// <param name="balancer">Used to shift the coordinates of each dimension so that the median falls in the middle of the range.</param>
        /// <param name="bits">Number of bits to use per coordinate when computing the Hilbert position.
        /// Each recursive level increses the number of bits.</param>
        /// <param name="perm">Optional permutation.</param>
        /// <returns>The recursive cost of the operation, which is governed by how many Hilbert transformation were required,
        /// times the number of bits per transform.
        /// If we sort N points with B bits in the straightforward way (not preserving memory), the cost would be N*B.
        /// If the cost comes in below that, we have improved on the simple, non-recursive quicksort.</returns>
        private static int SortSegment(ArraySegment <UnsignedPoint> points, ArraySegment <BigInteger> hilbertPositions, PointBalancer balancer, int bits, bool executeParallel, Permutation <uint> perm = null)
        {
            var          cost        = 0;
            Action <int> costUpdater = (int costIncrement) => {
                Interlocked.Add(ref cost, costIncrement);
            };
            var pointsList = (IList <UnsignedPoint>)points;
            var hpList     = (IList <BigInteger>)hilbertPositions;

            // Prepare the sort keys - the Hilbert positions.
            if (executeParallel)
            {
                Parallel.For(0, pointsList.Count, i => { hpList[i] = balancer.ToHilbertPosition(pointsList[i], bits, perm); });
            }
            else
            {
                for (var i = 0; i < pointsList.Count; i++)
                {
                    hpList[i] = balancer.ToHilbertPosition(pointsList[i], bits, perm);
                }
            }

            Array.Sort(hilbertPositions.Array, points.Array, points.Offset, points.Count);
            cost += points.Count * bits;

            // If we are already at the highest number of bits, even if two points have the same
            // Hilbert position, we can sort them no further.
            if (bits >= balancer.BitsPerDimension)
            {
                return(cost);
            }

            var actions = new List <Action>();

            var        iStart       = 0;
            BigInteger?prevPosition = hpList[0];

            for (var i = 1; i <= pointsList.Count; i++)
            {
                BigInteger?currentPosition = null;
                if (i < pointsList.Count)
                {
                    currentPosition = hpList[i];
                }
                if (!prevPosition.Equals(currentPosition))
                {
                    var segmentLength = i - iStart;
                    if (segmentLength > 1)
                    {
                        Action taskAction = SortRecursion(points, hilbertPositions, balancer, bits, iStart, segmentLength, perm, costUpdater);
                        if (executeParallel)
                        {
                            actions.Add(taskAction);
                        }
                        else
                        {
                            taskAction.Invoke();
                        }
                    }
                    iStart = i;
                }
                prevPosition = currentPosition;
            }
            if (executeParallel)
            {
                foreach (var a in actions)
                {
                    a.Invoke();
                }
                //TODO: Don't know why parallel execution fails to produce correct results. Some shared state is likely being altered.

                //var tasks = actions.Select(a => new Task(a)).ToList();
                //foreach (var t in tasks)
                //    t.Start();
                //Task.WhenAll(tasks).Wait();
            }
            return(cost);
        }