/// <summary> /// Compare the exact coarseness with an estimate for all numbers of bits. /// /// This takes an assemblage of many clusters and finds the most concentrated /// cluster according to a single bit Hilbert curve. /// Then it composes a GridCoarseness for the points in that cluster. /// </summary> /// <param name="numPoints">Number of points</param> /// <param name="dimensions">Number of dimensions</param> /// <param name="clusterCount">Number of clusters</param> /// <param name="maxCoordinate">Larges value any cooedinate of any dimension can hold</param> /// <param name="maxStdDeviation">Maximum standard deviation among coordinate values relative to the center of each Gaussian cluster generated.</param> /// <param name="minStdDeviation">Maximum standard deviation among coordinate values relative to the center of each Gaussian cluster generated.</param> /// <returns>The GridCoarseness.</returns> GridCoarseness MakeTestGrid(int numPoints, int dimensions, int clusterCount, int maxCoordinate, int minStdDeviation = 10, int maxStdDeviation = 30) { var avgClusterSize = numPoints / clusterCount; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = avgClusterSize - 100, MaxClusterSize = avgClusterSize + 100, MaxDistanceStdDev = maxStdDeviation, MinDistanceStdDev = minStdDeviation }; var clusters = data.MakeClusters(); var points = clusters.Points().ToList(); PointBalancer balancer = null; var bitsRequired = (maxCoordinate + 1).SmallestPowerOfTwo(); var lowresSort = HilbertSort.SortWithTies(points, 1, ref balancer); var largestBucket = lowresSort.OrderByDescending(bucket => bucket.Length).FirstOrDefault(); var bucketSize = largestBucket.Length; var grid = new GridCoarseness(largestBucket, bitsRequired); return(grid); }
public int MaxBucketSizePerBits(List <UnsignedPoint> points, int lowresBits, int smallBucketSize, ref PointBalancer balancer, out int pointsInSmallBuckets) { balancer = balancer ?? new PointBalancer(points); var dimensions = points[0].Dimensions; var lowresSort = HilbertSort.SortWithTies(points, lowresBits, ref balancer); var lowresPositions = new Dictionary <UnsignedPoint, int>(); foreach (var ties in lowresSort.Select((p, i) => new { Points = p, Position = i })) { foreach (var point in ties.Points) { lowresPositions[point] = ties.Position; } } // Compare the positions of many pairs of points in the two orderings to see that // they are either in the same relative order // or tied for position in the lowres ordering. var actualNumPoints = points.Count; var largestBucket = lowresSort.Select(bucket => bucket.Length).Max(); pointsInSmallBuckets = lowresSort.Select(bucket => bucket.Length > smallBucketSize ? 0 : bucket.Length).Sum(); var caseDescription = $"N = {actualNumPoints} D = {dimensions} B = {lowresBits}"; //Console.WriteLine(caseDescription); //Console.WriteLine($"Buckets: Count = {lowresSort.Count} Largest = {largestBucket} Points in Small = {pointsInSmallBuckets}"); return(largestBucket); }
public void LowresVersusHiresCase(int numPoints, int dimensions, int clusterCount, int lowresBits) { var maxCoordinate = 1000; var clusterSizeVariation = 100; var minClusterSize = (numPoints / clusterCount) - clusterSizeVariation; var maxClusterSize = (numPoints / clusterCount) + clusterSizeVariation; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = minClusterSize, MaxClusterSize = maxClusterSize }; var clusters = data.MakeClusters(); var points = clusters.Points().ToList(); PointBalancer balancer = null; var hiresSort = HilbertSort.BalancedSort(points, ref balancer); var lowresSort = HilbertSort.SortWithTies(points, lowresBits, ref balancer); var lowresPositions = new Dictionary <UnsignedPoint, int>(); var hiresPosition = new Dictionary <UnsignedPoint, int>(); foreach (var p in hiresSort.Select((p, i) => { hiresPosition[p] = i; return(p); })) { ; } foreach (var ties in lowresSort.Select((p, i) => new { Points = p, Position = i })) { foreach (var point in ties.Points) { lowresPositions[point] = ties.Position; } } // Compare the positions of many pairs of points in the two orderings to see that // they are either in the same relative order // or tied for position in the lowres ordering. var actualNumPoints = points.Count; var largestBucket = lowresSort.Select(bucket => bucket.Length).Max(); var caseDescription = $"N = {actualNumPoints} D = {dimensions} K = {clusterCount} B = {lowresBits}"; Console.WriteLine(caseDescription); Console.WriteLine($"Lowres buckets = {lowresSort.Count} Largest bucket = {largestBucket}"); int outOfPlaceCount = 0; for (var i = 0; i < actualNumPoints - 1; i++) { var p1 = points[i]; for (var j = i + 1; j < actualNumPoints; j++) { var p2 = points[j]; var lowresPosition1 = lowresPositions[p1]; var lowresPosition2 = lowresPositions[p2]; var hiresPosition1 = hiresPosition[p1]; var hiresPosition2 = hiresPosition[p2]; if (lowresPosition1 != lowresPosition2) { if (lowresPosition1 < lowresPosition2 != hiresPosition1 < hiresPosition2) { outOfPlaceCount++; } } } } var msg = $"Out of place count = {outOfPlaceCount}"; Console.WriteLine(msg); Assert.AreEqual(0, outOfPlaceCount, msg); }