/// <summary>
        /// Compare the exact coarseness with an estimate for all numbers of bits.
        ///
        /// This takes an assemblage of many clusters and finds the most concentrated
        /// cluster according to a single bit Hilbert curve.
        /// Then it composes a GridCoarseness for the points in that cluster.
        /// </summary>
        /// <param name="numPoints">Number of points</param>
        /// <param name="dimensions">Number of dimensions</param>
        /// <param name="clusterCount">Number of clusters</param>
        /// <param name="maxCoordinate">Larges value any cooedinate of any dimension can hold</param>
        /// <param name="maxStdDeviation">Maximum standard deviation among coordinate values relative to the center of each Gaussian cluster generated.</param>
        /// <param name="minStdDeviation">Maximum standard deviation among coordinate values relative to the center of each Gaussian cluster generated.</param>
        /// <returns>The GridCoarseness.</returns>
        GridCoarseness MakeTestGrid(int numPoints, int dimensions, int clusterCount, int maxCoordinate, int minStdDeviation = 10, int maxStdDeviation = 30)
        {
            var avgClusterSize = numPoints / clusterCount;
            var data           = new GaussianClustering
            {
                ClusterCount      = clusterCount,
                Dimensions        = dimensions,
                MaxCoordinate     = maxCoordinate,
                MinClusterSize    = avgClusterSize - 100,
                MaxClusterSize    = avgClusterSize + 100,
                MaxDistanceStdDev = maxStdDeviation,
                MinDistanceStdDev = minStdDeviation
            };
            var           clusters     = data.MakeClusters();
            var           points       = clusters.Points().ToList();
            PointBalancer balancer     = null;
            var           bitsRequired = (maxCoordinate + 1).SmallestPowerOfTwo();

            var lowresSort    = HilbertSort.SortWithTies(points, 1, ref balancer);
            var largestBucket = lowresSort.OrderByDescending(bucket => bucket.Length).FirstOrDefault();
            var bucketSize    = largestBucket.Length;

            var grid = new GridCoarseness(largestBucket, bitsRequired);

            return(grid);
        }
Пример #2
0
        public int MaxBucketSizePerBits(List <UnsignedPoint> points, int lowresBits, int smallBucketSize, ref PointBalancer balancer, out int pointsInSmallBuckets)
        {
            balancer = balancer ?? new PointBalancer(points);
            var dimensions      = points[0].Dimensions;
            var lowresSort      = HilbertSort.SortWithTies(points, lowresBits, ref balancer);
            var lowresPositions = new Dictionary <UnsignedPoint, int>();

            foreach (var ties in lowresSort.Select((p, i) => new { Points = p, Position = i }))
            {
                foreach (var point in ties.Points)
                {
                    lowresPositions[point] = ties.Position;
                }
            }

            //      Compare the positions of many pairs of points in the two orderings to see that
            //      they are either in the same relative order
            //      or tied for position in the lowres ordering.
            var actualNumPoints = points.Count;
            var largestBucket   = lowresSort.Select(bucket => bucket.Length).Max();

            pointsInSmallBuckets = lowresSort.Select(bucket => bucket.Length > smallBucketSize ? 0 : bucket.Length).Sum();
            var caseDescription = $"N = {actualNumPoints}  D = {dimensions}  B = {lowresBits}";

            //Console.WriteLine(caseDescription);
            //Console.WriteLine($"Buckets: Count = {lowresSort.Count}  Largest = {largestBucket}  Points in Small = {pointsInSmallBuckets}");
            return(largestBucket);
        }
Пример #3
0
        public void LowresVersusHiresCase(int numPoints, int dimensions, int clusterCount, int lowresBits)
        {
            var maxCoordinate        = 1000;
            var clusterSizeVariation = 100;
            var minClusterSize       = (numPoints / clusterCount) - clusterSizeVariation;
            var maxClusterSize       = (numPoints / clusterCount) + clusterSizeVariation;
            var data = new GaussianClustering
            {
                ClusterCount   = clusterCount,
                Dimensions     = dimensions,
                MaxCoordinate  = maxCoordinate,
                MinClusterSize = minClusterSize,
                MaxClusterSize = maxClusterSize
            };
            var           clusters = data.MakeClusters();
            var           points   = clusters.Points().ToList();
            PointBalancer balancer = null;

            var hiresSort  = HilbertSort.BalancedSort(points, ref balancer);
            var lowresSort = HilbertSort.SortWithTies(points, lowresBits, ref balancer);

            var lowresPositions = new Dictionary <UnsignedPoint, int>();
            var hiresPosition   = new Dictionary <UnsignedPoint, int>();

            foreach (var p in hiresSort.Select((p, i) => { hiresPosition[p] = i; return(p); }))
            {
                ;
            }
            foreach (var ties in lowresSort.Select((p, i) => new { Points = p, Position = i }))
            {
                foreach (var point in ties.Points)
                {
                    lowresPositions[point] = ties.Position;
                }
            }

            //      Compare the positions of many pairs of points in the two orderings to see that
            //      they are either in the same relative order
            //      or tied for position in the lowres ordering.
            var actualNumPoints = points.Count;
            var largestBucket   = lowresSort.Select(bucket => bucket.Length).Max();
            var caseDescription = $"N = {actualNumPoints}  D = {dimensions}  K = {clusterCount}  B = {lowresBits}";

            Console.WriteLine(caseDescription);
            Console.WriteLine($"Lowres buckets = {lowresSort.Count}  Largest bucket = {largestBucket}");

            int outOfPlaceCount = 0;

            for (var i = 0; i < actualNumPoints - 1; i++)
            {
                var p1 = points[i];
                for (var j = i + 1; j < actualNumPoints; j++)
                {
                    var p2 = points[j];
                    var lowresPosition1 = lowresPositions[p1];
                    var lowresPosition2 = lowresPositions[p2];
                    var hiresPosition1  = hiresPosition[p1];
                    var hiresPosition2  = hiresPosition[p2];
                    if (lowresPosition1 != lowresPosition2)
                    {
                        if (lowresPosition1 < lowresPosition2 != hiresPosition1 < hiresPosition2)
                        {
                            outOfPlaceCount++;
                        }
                    }
                }
            }
            var msg = $"Out of place count = {outOfPlaceCount}";

            Console.WriteLine(msg);
            Assert.AreEqual(0, outOfPlaceCount, msg);
        }