コード例 #1
0
        public int MaxBucketSizePerBits(List <UnsignedPoint> points, int lowresBits, int smallBucketSize, ref PointBalancer balancer, out int pointsInSmallBuckets)
        {
            balancer = balancer ?? new PointBalancer(points);
            var dimensions      = points[0].Dimensions;
            var lowresSort      = HilbertSort.SortWithTies(points, lowresBits, ref balancer);
            var lowresPositions = new Dictionary <UnsignedPoint, int>();

            foreach (var ties in lowresSort.Select((p, i) => new { Points = p, Position = i }))
            {
                foreach (var point in ties.Points)
                {
                    lowresPositions[point] = ties.Position;
                }
            }

            //      Compare the positions of many pairs of points in the two orderings to see that
            //      they are either in the same relative order
            //      or tied for position in the lowres ordering.
            var actualNumPoints = points.Count;
            var largestBucket   = lowresSort.Select(bucket => bucket.Length).Max();

            pointsInSmallBuckets = lowresSort.Select(bucket => bucket.Length > smallBucketSize ? 0 : bucket.Length).Sum();
            var caseDescription = $"N = {actualNumPoints}  D = {dimensions}  B = {lowresBits}";

            //Console.WriteLine(caseDescription);
            //Console.WriteLine($"Buckets: Count = {lowresSort.Count}  Largest = {largestBucket}  Points in Small = {pointsInSmallBuckets}");
            return(largestBucket);
        }
コード例 #2
0
        /// <summary>
        /// Compare the exact coarseness with an estimate for all numbers of bits.
        ///
        /// This takes an assemblage of many clusters and finds the most concentrated
        /// cluster according to a single bit Hilbert curve.
        /// Then it composes a GridCoarseness for the points in that cluster.
        /// </summary>
        /// <param name="numPoints">Number of points</param>
        /// <param name="dimensions">Number of dimensions</param>
        /// <param name="clusterCount">Number of clusters</param>
        /// <param name="maxCoordinate">Larges value any cooedinate of any dimension can hold</param>
        /// <param name="maxStdDeviation">Maximum standard deviation among coordinate values relative to the center of each Gaussian cluster generated.</param>
        /// <param name="minStdDeviation">Maximum standard deviation among coordinate values relative to the center of each Gaussian cluster generated.</param>
        /// <returns>The GridCoarseness.</returns>
        GridCoarseness MakeTestGrid(int numPoints, int dimensions, int clusterCount, int maxCoordinate, int minStdDeviation = 10, int maxStdDeviation = 30)
        {
            var avgClusterSize = numPoints / clusterCount;
            var data           = new GaussianClustering
            {
                ClusterCount      = clusterCount,
                Dimensions        = dimensions,
                MaxCoordinate     = maxCoordinate,
                MinClusterSize    = avgClusterSize - 100,
                MaxClusterSize    = avgClusterSize + 100,
                MaxDistanceStdDev = maxStdDeviation,
                MinDistanceStdDev = minStdDeviation
            };
            var           clusters     = data.MakeClusters();
            var           points       = clusters.Points().ToList();
            PointBalancer balancer     = null;
            var           bitsRequired = (maxCoordinate + 1).SmallestPowerOfTwo();

            var lowresSort    = HilbertSort.SortWithTies(points, 1, ref balancer);
            var largestBucket = lowresSort.OrderByDescending(bucket => bucket.Length).FirstOrDefault();
            var bucketSize    = largestBucket.Length;

            var grid = new GridCoarseness(largestBucket, bitsRequired);

            return(grid);
        }
コード例 #3
0
        public void CompareSpeedOfSorting_Unbalanced_vs_HilbertIndex()
        {
            var points = TestData(20000, 50, 20, 1000000, 100, 500, out int bitsPerDimension);

            var timer1 = new Stopwatch();
            var timer2 = new Stopwatch();

            // 1. HilbertIndex
            timer1.Start();
            var hIndex = new HilbertIndex(points.Select(p => new HilbertPoint(p.Coordinates, bitsPerDimension)));
            var sortedPointsFromIndex = hIndex.SortedPoints;

            timer1.Stop();
            var hilbertIndexTime = timer1.ElapsedMilliseconds;

            // 2. HilbertSort.Sort
            timer2.Start();

            HilbertSort.Sort(points.ToList(), bitsPerDimension);
            timer2.Stop();
            var unbalancedSortTime = timer2.ElapsedMilliseconds;

            var message = $"HilbertIndex required {hilbertIndexTime / 1000.0} sec.  Unbalanced Sort required {unbalancedSortTime / 1000.0} sec.";

            Console.WriteLine(message);
            Assert.Greater(hilbertIndexTime, unbalancedSortTime, message);
        }
コード例 #4
0
        public void CompareSpeedOfSorting_Balanced_vs_HilbertIndex()
        {
            var points = TestData(20000, 50, 20, 1000000, 100, 500, out int bitsPerDimension);

            var timer1 = new Stopwatch();
            var timer2 = new Stopwatch();
            var timer3 = new Stopwatch();

            // 1. HilbertIndex
            timer1.Start();
            var hIndex = new HilbertIndex(points.Select(p => new HilbertPoint(p.Coordinates, bitsPerDimension)));
            var sortedPointsFromIndex = hIndex.SortedPoints;

            timer1.Stop();
            var hilbertIndexTime = timer1.ElapsedMilliseconds;

            // 2. HilbertSort.BalancedSort
            timer2.Start();
            timer3.Start();
            PointBalancer balancer = new PointBalancer(points);

            timer3.Stop();
            HilbertSort.BalancedSort(points.ToList(), ref balancer);
            timer2.Stop();
            var balancedSortTime = timer2.ElapsedMilliseconds;
            var balancerTime     = timer3.ElapsedMilliseconds;

            var message = $"HilbertIndex required {hilbertIndexTime / 1000.0} sec.  Balanced Sort required {balancedSortTime / 1000.0} sec, of which {balancerTime / 1000.0} sec is Balancer ctor.  Relative Cost = {HilbertSort.RelativeSortCost}";

            Console.WriteLine(message);
            Assert.Greater(hilbertIndexTime, balancedSortTime, message);
        }
コード例 #5
0
        public void InPlaceSort_NarrowClusters()
        {
            var           points          = TestData(20000, 50, 20, 1000000, 10, 30, out int bitsPerDimension);
            PointBalancer balancer        = new PointBalancer(points);
            var           unoptimizedSort = HilbertSort.BalancedSort(points.ToList(), ref balancer).ToArray();

            HilbertSort.SmallBalancedSort(points, ref balancer);
            CollectionAssert.AreEqual(unoptimizedSort, points, "Not in same order");
        }
コード例 #6
0
        public void InPlaceSortRelativeCost()
        {
            var clusters      = new[] { 10, 20, 50, 100 };
            var stdDeviations = new[] { 20, 100, 200, 1000, 2000 };
            var dimensions    = 50;
            var numPoints     = 20000;
            var report        = "Clusters,Standard Deviation,Relative Cost\n";

            foreach (var k in clusters)
            {
                foreach (var sd in stdDeviations)
                {
                    var           points   = TestData(numPoints, dimensions, k, 1000000, sd, sd, out int bitsPerDimension);
                    PointBalancer balancer = new PointBalancer(points);
                    HilbertSort.SmallBalancedSort(points, ref balancer);
                    var cost = HilbertSort.RelativeSortCost;
                    report += $"{k},{sd},{cost}\n";
                }
            }
            Console.WriteLine($"\n\nFinal report:\n\n{report}");
        }
コード例 #7
0
        public void LowresVersusHiresCase(int numPoints, int dimensions, int clusterCount, int lowresBits)
        {
            var maxCoordinate        = 1000;
            var clusterSizeVariation = 100;
            var minClusterSize       = (numPoints / clusterCount) - clusterSizeVariation;
            var maxClusterSize       = (numPoints / clusterCount) + clusterSizeVariation;
            var data = new GaussianClustering
            {
                ClusterCount   = clusterCount,
                Dimensions     = dimensions,
                MaxCoordinate  = maxCoordinate,
                MinClusterSize = minClusterSize,
                MaxClusterSize = maxClusterSize
            };
            var           clusters = data.MakeClusters();
            var           points   = clusters.Points().ToList();
            PointBalancer balancer = null;

            var hiresSort  = HilbertSort.BalancedSort(points, ref balancer);
            var lowresSort = HilbertSort.SortWithTies(points, lowresBits, ref balancer);

            var lowresPositions = new Dictionary <UnsignedPoint, int>();
            var hiresPosition   = new Dictionary <UnsignedPoint, int>();

            foreach (var p in hiresSort.Select((p, i) => { hiresPosition[p] = i; return(p); }))
            {
                ;
            }
            foreach (var ties in lowresSort.Select((p, i) => new { Points = p, Position = i }))
            {
                foreach (var point in ties.Points)
                {
                    lowresPositions[point] = ties.Position;
                }
            }

            //      Compare the positions of many pairs of points in the two orderings to see that
            //      they are either in the same relative order
            //      or tied for position in the lowres ordering.
            var actualNumPoints = points.Count;
            var largestBucket   = lowresSort.Select(bucket => bucket.Length).Max();
            var caseDescription = $"N = {actualNumPoints}  D = {dimensions}  K = {clusterCount}  B = {lowresBits}";

            Console.WriteLine(caseDescription);
            Console.WriteLine($"Lowres buckets = {lowresSort.Count}  Largest bucket = {largestBucket}");

            int outOfPlaceCount = 0;

            for (var i = 0; i < actualNumPoints - 1; i++)
            {
                var p1 = points[i];
                for (var j = i + 1; j < actualNumPoints; j++)
                {
                    var p2 = points[j];
                    var lowresPosition1 = lowresPositions[p1];
                    var lowresPosition2 = lowresPositions[p2];
                    var hiresPosition1  = hiresPosition[p1];
                    var hiresPosition2  = hiresPosition[p2];
                    if (lowresPosition1 != lowresPosition2)
                    {
                        if (lowresPosition1 < lowresPosition2 != hiresPosition1 < hiresPosition2)
                        {
                            outOfPlaceCount++;
                        }
                    }
                }
            }
            var msg = $"Out of place count = {outOfPlaceCount}";

            Console.WriteLine(msg);
            Assert.AreEqual(0, outOfPlaceCount, msg);
        }