public static Tuple <string, BigInteger, int> commandLine_HI_initIndex(string[] command_line, int index) { int bpd = FindBitsPerDimension(6); float SCALAR = (float)Math.Pow(10, 3); var coords = parseCommandLine(command_line); int[] scaledIntCoordinates = new int[command_line.Length]; for (int i = 0; i < coords.Length; i++) { float floatCoordinate = (float)Double.Parse(coords[i], System.Globalization.NumberStyles.Float); float scaledCoordinate = floatCoordinate * SCALAR; int coordinate = (int)scaledCoordinate; scaledIntCoordinates[i] = coordinate; } string[] coordinatesWithAppendedHilbertIndex = new string[command_line.Length + 1]; for (int i = 0; i < command_line.Length; i++) { coordinatesWithAppendedHilbertIndex[i] = command_line[i]; } HilbertPoint hilbertPoint = new HilbertPoint(scaledIntCoordinates, bpd); Tuple <string, BigInteger, int> line_HI_index = new Tuple <string, BigInteger, int>(lineToString(command_line), hilbertPoint.HilbertIndex, index); return(line_HI_index); }
/// <summary> /// Lookup a point by its id. /// </summary> /// <returns>The point whose id matches the given id, or null.</returns> /// <param name="id">UniqueId of a HilbertPoint.</param> public HilbertPoint FindById(int id) { HilbertPoint p = null; IdsToPoints.TryGetValue(id, out p); return(p); }
/// <summary> /// Count the number of Neighbors this point has the in window to either side of it along the Hilbert curve. /// /// These neighbors must be no farther away than the NeighborhoodRadius. /// These neighbors must be in the window to either side of the given point along the Hilbert curve. /// </summary> /// <returns>Count of neighbors.</returns> /// <param name="point">Point whose neighbors are to be counted.</param> public int NeighborsInWindow(HilbertPoint point) { MeasureWindow(); var iPoint1 = Index.SortedPosition(point); var center = iPoint1; if (center < WindowRadius) { center = WindowRadius; } else if (center > Count - WindowRadius - 1) { center = Count - WindowRadius - 1; } var start = center - WindowRadius; var stop = center + WindowRadius; return(Distances[iPoint1].Keys.Count(i => i >= start && i <= stop)); // If we wanted all points in the neighborhood, not just in the window, we would do this: // return Distances[iPoint1].Count; // Why don't we? Though in many cases it may lead to a mo accurate value for some points, // it would worsen the correlation. If most points undercount, and some are accurate, that // would be inferior. }
/// <summary> /// Compose an enumerable that encompasses a range of points starting at the given point and running for the given length. /// If the point is too close to the end of the list in sorted order, fewer items than rangeLength may be returned. /// </summary> /// <param name="p">Point where range starts.</param> /// <param name="rangeLength">Range length.</param> public IEnumerable <HilbertPoint> Range(HilbertPoint p, int rangeLength) { var position = SortedPosition(p); var rangeStart = Math.Min(Math.Max(0, position - rangeLength / 2), Count - rangeLength); return(SortedPoints.Skip(rangeStart).Take(rangeLength)); }
private void OptimalIndexTestCase( int hilbertTries, int minClusterSize, int maxClusterSize, int dimensions, int clusterCount, int acceptableClusterCount, int bitsPerDimension, int outlierSize, int noiseSkipBy) { var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = 1000, MinClusterSize = minClusterSize, MaxClusterSize = maxClusterSize }; var clusters = data.MakeClusters(); var points = clusters.Points().Select(p => HilbertPoint.CastOrConvert(p, bitsPerDimension, true)).ToList(); var results = OptimalIndex.Search( points, outlierSize, noiseSkipBy, hilbertTries, // maxTrials 4 // maxIterationsWithoutImprovement ); var message = $"Estimated cluster count = {results.EstimatedClusterCount}, actual = {clusterCount}, acceptable = {acceptableClusterCount}"; Console.WriteLine(message); Assert.LessOrEqual(results.EstimatedClusterCount, acceptableClusterCount, $"HilbertIndex fragmented by more than 50%: {message}"); }
/// <summary> /// Find how accurate NearestFromRange is when searching for the neighbors of a single given reference point. /// This finds the true K-nearest neighbors of the reference point (using Nearest) /// and the approximate K-nearest neighbors using the Hilbert index, /// then compare how accurate the Hilbert index was. /// </summary> /// <param name="point">Reference point whose neighbors are sought.</param> /// <param name="k">Number of nearest neighbors sought.</param> /// <param name="rangeLength">Number of points in the Hilbert index to sample.</param> /// <returns>A value from zero to 1.0, where 1.0 means perfectly accurate.</returns> public double Accuracy(HilbertPoint point, int k, int rangeLength) { var allNeighbors = new HashSet <HilbertPoint>(); allNeighbors.UnionWith(Nearest(point, k)); var matches = NearestFromRange(point, rangeLength).Count(allNeighbors.Contains); return(matches / (double)k); }
public void AllColorPairsClosestClusterTest(int nPoints, int dimensions, int numClusters, int numCurvesToTry) { var rankHistogram = new int[numClusters + 1]; // We will skip the first element so as to have a one-based array. var data = new GaussianClustering { ClusterCount = numClusters, Dimensions = dimensions, MaxCoordinate = 1000, MinClusterSize = nPoints, MaxClusterSize = nPoints }; var worstDistanceRatio = 1.0; var ratioSum = 0.0; var ratioCount = 0; var clusters = data.MakeClusters(); var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo(); var results = OptimalIndex .Search( clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(), 5 /*outlier size */, 10 /* NoiseSkipBy */, 1 /* ReducedNoiseSkipBy */, numCurvesToTry ); var pccp1 = new PolyChromaticClosestPoint <string>(clusters, results.Index); var allColorPairs = pccp1.FindAllClustersApproximately(); foreach (var color1 in clusters.ClassLabels()) { var exact = pccp1.FindClusterExhaustively(color1).Swap(color1); var color1Pairs = allColorPairs .Where(cp => cp.Color1.Equals(color1) || cp.Color2.Equals(color1)) .Select(cp => cp.Swap(color1)) .ToList(); var approximateColor2Distance = color1Pairs.First(cp => cp.Color2.Equals(exact.Color2)).SquareDistance; var approximateRank = color1Pairs.Count(cp => cp.SquareDistance < approximateColor2Distance) + 1; rankHistogram[approximateRank]++; #pragma warning disable RECS0018 // Comparison of floating point numbers with equality operator var ratio = exact.SquareDistance == 0.0 ? 0 : approximateColor2Distance / (double)exact.SquareDistance; #pragma warning restore RECS0018 // Comparison of floating point numbers with equality operator ratioSum += ratio; ratioCount++; worstDistanceRatio = Math.Max(worstDistanceRatio, ratio); } Debug.WriteLine(string.Format("Worst distance overage = {0:N3}%", (worstDistanceRatio - 1.0) * 100.0)); Debug.WriteLine(string.Format("Average distance overage = {0:N3}%", ((ratioSum / ratioCount) - 1.0) * 100.0)); for (var iRank = 1; iRank <= numClusters; iRank++) { if (rankHistogram[iRank] > 0 || iRank < 4) { Debug.WriteLine(string.Format("For {0} Clusters the closest cluster found was Ranked #{1}.", rankHistogram[iRank], iRank)); } } // Accept a win, place or show: the true closest cluster shows up as no worse than the 3rd ranked cluster according to the approximate measure. Assert.IsTrue(rankHistogram[1] + rankHistogram[2] + rankHistogram[3] == numClusters, string.Format("Found the closest cluster for {0} colors", rankHistogram[1]) ); }
/// <summary> /// Find the points adjacent to the given point in the Hilbert ordering, then sort them by the cartesian distance, from nearest to farthest. /// </summary> /// <param name="point">Reference point to seek in the index.</param> /// <param name="rangeLength">Number of points to retrieve from the index. Half of these points will precede and half succeed the given point /// in the index, unless we are near the beginning or end of the index, in which case the range will be shifted.</param> /// <param name="includePointItself">If false, the reference point will not be present in the results. /// If true, the point will be present in the results.</param> /// <returns>The points nearest to the reference point in both Hilbert and Cartesian ordering, sorted from nearest to farthest.</returns> public IEnumerable <HilbertPoint> NearestFromRange(HilbertPoint point, int rangeLength, bool includePointItself = false) { rangeLength = includePointItself ? rangeLength : rangeLength + 1; var middlePosition = SortedPosition(point); var rangeStart = Math.Max(0, middlePosition - rangeLength / 2); return(SortedPoints .Skip(rangeStart) .Take(rangeLength) .Where(p => includePointItself || !p.Equals(point)) .OrderBy(p => p.Measure(point))); }
/// <summary> /// Sorts the points according to their position in a Hilbert curve. /// </summary> private void SortPoints() { var maxValue = Clusters.Points().Select(p => p.MaxCoordinate).Max(); var bitsPerDimension = ((int)maxValue + 1).SmallestPowerOfTwo(); var index = new Dictionary <HilbertPoint, UnsignedPoint> (); var hPoints = new List <HilbertPoint> (); foreach (UnsignedPoint p in Clusters.Points()) { var hp = new HilbertPoint(p.Coordinates, bitsPerDimension); index [hp] = p; hPoints.Add(hp); } hPoints.Sort(); SortedPoints = hPoints.Select(hp => index [hp]).ToList(); }
/// <summary> /// Test if two points are adjacent, meaning that only a single coordiante differs between them and /// the difference in coordinate value is exactly one. /// </summary> /// <returns><c>true</c>, if points are adjacent, <c>false</c> otherwise.</returns> /// <param name="p1">First point.</param> /// <param name="p2">Second point.</param> static bool ArePointsAdjacent(HilbertPoint p1, HilbertPoint p2) { var maxCoordinateDistance = 0; var differentDimensionsCount = 0; for (var dim = 0; dim < p1.Dimensions; dim++) { var diff = Math.Abs(p1[dim] - p2[dim]); if (diff != 0) { differentDimensionsCount++; maxCoordinateDistance = Math.Max(diff, maxCoordinateDistance); } } return(maxCoordinateDistance == 1 && differentDimensionsCount == 1); }
/// <summary> /// Unioning the results of several different indices, find the composite accuracy of using them all /// in combination to find the nearest neighbors. /// </summary> /// <param name="indices">Indices.</param> /// <param name="point">Point whos enearest neighbors are sought.</param> /// <param name="k">Number of nearest neighbors who are sought.</param> /// <param name="rangeLength">Number of points to draw from each index.</param> /// <returns>A value from zero to 1.0, where 1.0 means perfectly accurate.</returns> public static double CompositeAccuracy(IList <HilbertIndex> indices, HilbertPoint point, int k, int rangeLength) { // Note the tricky use of Equivalent. The points from different indices should not be directly compared, // so we need to map a point from the first index to the equivalent point in another, then map back // for the final tally. var allNeighbors = new HashSet <HilbertPoint>(); var firstIndex = indices[0]; allNeighbors.UnionWith(firstIndex.Nearest(firstIndex.Equivalent(point), k)); var fromRange = new HashSet <HilbertPoint>(); fromRange.UnionWith( indices.SelectMany(i => i.NearestFromRange(i.Equivalent(point), rangeLength) .Where(p => allNeighbors.Contains(firstIndex.Equivalent(p)))) ); return(fromRange.Count() / (double)k); }
/// <summary> /// Create an index of all the points in a Classification, optionally adding a new dimension to each point to hold /// that point's classification index. /// </summary> /// <param name="clusters">Clusters of points, which could be UnsignedPoints or HilbertPoints.</param> /// <param name="bitsPerDimension">Bits per dimension to use when transforming UnsignedPoints into HilbertPoints, /// should that be necessary. /// If a non-positive number, compute the value by studying the data, using the smallest number capable of accommodating /// the largest coordinate values.</param> /// <param name="addClassificationDimension">If set to <c>true</c> add a classification dimension to the end of each point. /// The value will be the index of that point's cluster. Cluster ordering is arbitrary and dependent on the order that /// the set Classification.LabelToPoints.Values iterates over them.</param> public HilbertIndex(Classification <UnsignedPoint, string> clusters, int bitsPerDimension = 0, bool addClassificationDimension = false) { if (bitsPerDimension <= 0) { bitsPerDimension = FindBitsPerDimension(clusters.Points()); } UnsortedPoints = new List <HilbertPoint>(); foreach (var clusterWithNumber in clusters.LabelToPoints.Values.Select((c, i) => new { Cluster = c, Index = (uint)i })) { UnsortedPoints.AddRange( clusterWithNumber.Cluster .Select(p => addClassificationDimension ? p.AppendCoordinate(clusterWithNumber.Index) : p) .Select(p => HilbertPoint.CastOrConvert(p, bitsPerDimension, true)) ); } InitIndexing(); }
/// <summary> /// Count how many neighbors are near the given point, within the NeighborhoodRadius. /// </summary> /// <param name="point">Point whose neighbors are to be counted.</param> /// <param name="allNeighbors">If false, only return the number of neighbors already known due to previous measurements. /// If true, make sure we measure the distance from this point to all other points, but still reuse /// any already computed distances.</param> public int Neighbors(HilbertPoint point, bool allNeighbors = true) { var iPoint1 = Index.SortedPosition(point); if (!AllMeasured[iPoint1] && allNeighbors) { for (var iPoint2 = 0; iPoint2 < Count; iPoint2++) { // If all distances have already been computed for iPoint2, // then we do not need to recompute that paricular distance. if (iPoint1 != iPoint2 && !AllMeasured[iPoint2]) { Measure(iPoint1, iPoint2, true); } } Complete(point); } return(Distances[iPoint1].Count); }
/// <summary> /// Verify the transformation in both directions, from 1-Dimensional index to N-dimensional point and back. /// /// This evaluates 2^(dims*bits) points, so be careful or the test will run for a long time and consume a lot of memory. /// </summary> /// <param name="dims">Dimensions for each point.</param> /// <param name="bits">Bits per dimension.</param> public void AdjacentPointsCase(int dims, int bits) { var points = new HilbertPoint[1 << (bits * dims)]; for (var i = 0; i < points.Length; i++) { var hilbertIndex = new BigInteger(i); points[i] = new HilbertPoint(hilbertIndex, dims, bits); if (i > 0) { var p1 = points[i - 1]; var p2 = points[i]; Assert.IsTrue(ArePointsAdjacent(p1, p2), string.Format("Points {0} and {1}", FormatPoint(p1), FormatPoint(p2))); } AssertPointMapsToHilbertIndex(points[i].Coordinates, hilbertIndex, dims, bits); } }
/// <summary> /// Estimates the density of points local to the given point. /// </summary> /// <returns>The density.</returns> /// <param name="point">Point whose local density is sought.</param> /// <param name="windowRadius">The distance to twice this many points will be measured, /// half to the left and half to the right of the given point along the Hilbert curve.</param> public long EstimatedDensity(HilbertPoint point, int windowRadius) { if (Estimator != null) { var windowSize = Math.Min(Count, 2 * windowRadius + 1); var iPoint1 = Index.SortedPosition(point); var start = Math.Max(0, iPoint1 - windowRadius); start = Math.Min(start, Count - windowSize); return(Estimator( Enumerable .Range(start, windowSize) .Where(i => i != iPoint1) .Select(iPoint2 => Distances.Measure(iPoint1, iPoint2, false)))); } else { // If our windowRadius and the one used by Distances are not the same, adjust the memo. if (Distances.WindowRadius != windowRadius) { Distances.WindowRadius = windowRadius; } return(Distances.NeighborsInWindow(point)); } }
public void CartesianToHilbert_Dim2Bits2() { var bits = 2; var size = 1 << bits; var sb = new StringBuilder(); for (var row = 0; row < size; row++) { for (var column = 0; column < size; column++) { var cartesianPoint = new int[] { row, column }; var hilbertPoint = new HilbertPoint(cartesianPoint, bits); var hilbertIndex = hilbertPoint.HilbertIndex; sb.Append("Cart = [") .Append(string.Join(",", cartesianPoint)) .Append("] Hilbert = ") .Append(hilbertIndex.ToString()) .AppendLine(); } } var diagnostic = sb.ToString(); Console.WriteLine(diagnostic); }
/// <summary> /// Measure the square distance between the specified points, possibly reusing a memoized value. /// </summary> /// <param name="point1">Point1.</param> /// <param name="point2">Point2.</param> /// <param name="limitToNeighborhood">If false, return the correct square distance in all cases, and record its value /// if it does not exceed the NeighborhoodRadius and has not yet been recorded. /// If true and the distance has already been measured and recorded, return the recorded (and correct) square distance. /// If true and the distance has not yet been measured and AllMeasured is not set for either point, /// compute and return the proper square distance and record it if it does not exceed the NeighborhoodRadius. /// Otherwise, AllMeasured is true for one of the points and the value was not recorded because it exceeds NeighborhoodRadius, /// therefore return long.MaxValue. /// </param> public long Measure(HilbertPoint point1, HilbertPoint point2, bool limitToNeighborhood = false) { return(Measure(Index.SortedPosition(point1), Index.SortedPosition(point2), limitToNeighborhood)); }
private static int FindBitsPerDimension(IEnumerable <UnsignedPoint> points) { return(HilbertPoint.FindBitsPerDimension((int)points.Select(p => p.MaxCoordinate).Max())); }
/// <summary> /// If two indices were composed from the same points but with their coordinates differently permuted, /// the corresponding points retain the same UniqueId (which isn't so unique after all). /// This will look up the corresponding point in this index of a point frmo another index. /// </summary> /// <param name="p">P.</param> public HilbertPoint Equivalent(HilbertPoint p) { return(FindById(p.UniqueId)); }
/// <summary> /// Pretty print a HilbertPoint. /// </summary> /// <returns>Formatted point.</returns> /// <param name="p">Point to pretty print.</param> static string FormatPoint(HilbertPoint p) { return(string.Format("Index: {0} Coords: [{1}]", p.HilbertIndex, string.Join(",", p.Coordinates))); }
/// <summary> /// Count exactly the number of points that are near the given point, that is, have a square distance /// that does not exceed NeighborhoodRadius. /// /// This may require the comparison of the point to every other point, which is expensive. /// However, if this is performed for several points, many of the distance computations will be reused. /// /// This value can be used as a density for density-based clustering. /// </summary> /// <returns>The count of neighbors.</returns> /// <param name="point">Point whose neighbors we must count.</param> public int ExactNeighbors(HilbertPoint point) { return(Distances.Neighbors(point, true)); }
/// <summary> /// Get the zero-based position of the point in UnsortedPoints. /// </summary> /// <param name="p">Point to lookup.</param> /// <returns>A zero-based position into the UnsortedPoints list.</returns> public int UnsortedPosition(HilbertPoint p) { return(Index[p].Original); }
public void DistanceDistribution() { /* * Percentile,By Index,By Random * ----------------------------- * 0%,111.35,146.55 * 1%,142.06,255.96 * 2%,147.21,2163.43 * 3%,151.2,2214.15 * 4%,154.06,2245.2 * 5%,156.24,2271.37 * 6%,158.38,2292.29 * 7%,160.42,2313.55 * 8%,162.29,2327.14 * 9%,164.07,2345.25 * 10%,165.41,2359.95 * 11%,166.72,2372.83 * 12%,167.99,2386.15 * 13%,169.29,2398.47 * 14%,170.43,2410.01 * 15%,171.53,2422.34 * 16%,172.48,2432.43 * 17%,173.58,2443.08 * 18%,174.73,2454.27 * 19%,175.56,2463.71 * 20%,176.35,2472.97 * 21%,177.35,2483.24 * 22%,178.3,2491.9 * 23%,179.1,2501.44 * 24%,179.82,2510.26 * 25%,180.64,2517.73 * 26%,181.55,2524.97 * 27%,182.33,2531.58 * 28%,182.98,2538.08 * 29%,183.67,2543.83 * 30%,184.33,2550.93 * 31%,185.09,2556.59 * 32%,185.7,2563.37 * 33%,186.41,2570.29 * 34%,187.09,2577.29 * 35%,187.7,2583.56 * 36%,188.43,2589.95 * 37%,189.07,2596.13 * 38%,189.71,2602.24 * 39%,190.46,2608.28 * 40%,191.08,2615.25 * 41%,191.79,2620.81 * 42%,192.46,2626.02 * 43%,193.09,2632.7 * 44%,193.71,2638.18 * 45%,194.31,2643.35 * 46%,194.98,2648.69 * 47%,195.65,2655.47 * 48%,196.3,2660.26 * 49%,196.96,2666.37 * 50%,197.66,2670.94 * 51%,198.34,2677.09 * 52%,199.07,2681.9 * 53%,199.72,2687.11 * 54%,200.3,2692.42 * 55%,201.06,2697.92 * 56%,201.71,2703.76 * 57%,202.4,2710.17 * 58%,203.16,2715.06 * 59%,203.82,2720.25 * 60%,204.51,2725.99 * 61%,205.32,2731.6 * 62%,206.08,2736.59 * 63%,206.79,2741.72 * 64%,207.58,2746.59 * 65%,208.29,2754.03 * 66%,209.07,2760.81 * 67%,209.8,2766.65 * 68%,210.68,2771.98 * 69%,211.71,2778.27 * 70%,212.38,2784.23 * 71%,213.19,2790.71 * 72%,213.92,2796.42 * 73%,214.82,2802.84 * 74%,215.68,2809.36 * 75%,216.54,2814.55 * 76%,217.48,2821.32 * 77%,218.43,2827.56 * 78%,219.35,2833.35 * 79%,220.28,2840.72 * 80%,221.33,2848.87 * 81%,222.31,2856.89 * 82%,223.42,2864 * 83%,224.46,2872.51 * 84%,225.83,2881.09 * 85%,227.06,2891.57 * 86%,228.27,2900.46 * 87%,229.63,2910.46 * 88%,231.55,2919.5 * 89%,233.59,2933.76 * 90%,235.6,2944.88 * 91%,237.25,2959.45 * 92%,239.83,2976.08 * 93%,241.88,2990.4 * 94%,244.97,3010.08 * 95%,248.23,3029.15 * 96%,252.34,3052.37 * 97%,260.68,3074.84 * 98%,282.76,3112.43 *** Note the jump from 282 to 2550, which shows that the characteristic distance is about 282. * 99%,2550.87,3170.93 * 100%,3114.89,3412.57 */ var data = new GaussianClustering { ClusterCount = 100, Dimensions = 50, MaxCoordinate = 1000, MinClusterSize = 50, MaxClusterSize = 150 }; var clusters = data.MakeClusters(); var bitsPerDimension = 10; var points = clusters.Points().Select(p => HilbertPoint.CastOrConvert(p, bitsPerDimension, true)).ToList(); var results = OptimalIndex.Search( points, 5, // outlierSize 10, // noiseSkipBy 1000, // maxTrials 4 // maxIterationsWithoutImprovement ); var pointsFromIndex = results.Index.SortedPoints; var distancesRandom = new List <long>(); var distancesHilbert = new List <long>(); var n = pointsFromIndex.Count; var rng = new FastRandom(); for (var i = 0; i < n - 1; i++) { var p1 = pointsFromIndex[i]; var p2 = pointsFromIndex[i + 1]; distancesHilbert.Add(p1.Measure(p2)); var p3 = pointsFromIndex[rng.Next(n)]; var p4 = pointsFromIndex[rng.Next(n)]; distancesRandom.Add(p3.Measure(p4)); } distancesHilbert.Sort(); distancesRandom.Sort(); Console.WriteLine("Percentile,By Index,By Random"); for (var percentile = 0; percentile <= 100; percentile++) { var i = Math.Min(n - 2, (n - 1) * percentile / 100); var distHilbert = Math.Round(Math.Sqrt(distancesHilbert[i]), 2); var distRandom = Math.Round(Math.Sqrt(distancesRandom[i]), 2); Console.Write($"{percentile}%,{distHilbert},{distRandom}"); } }
/// <summary> /// Find the K-nearest neighbors of a given point according to the cartesian distance between the point and its neighbors. /// /// NOTE: This compares the point to all other points, hence is more costly than NearestFromRange but is guaranteed /// to find all near neighbors. /// </summary> /// <param name="point">Reference point whose neighbors are sought.</param> /// <param name="k">Number of nearest neighbors to find.</param> /// <param name="includePointItself">If false, the point is not considered its own neighbor and will not be present in the results. /// If true, the point is considered its own neighbor and will be present in the results, /// unless all the nearest neighbors are zero distance from this point, in which case it might not make the cut.</param> /// <returns>The nearest neighbors of the given point, sorted from nearest to farthest.</returns> public IEnumerable <HilbertPoint> Nearest(HilbertPoint point, int k, bool includePointItself = false) { return(SortedPoints .Where(p => includePointItself || !p.Equals(point)) .BottomN <HilbertPoint, long>(point, k)); }
/// <summary> /// Mark a point as being AllMeasured, meaning that we have measured the distance from that point to all other points /// and recorded the smaller distances of interest. /// </summary> /// <param name="point">Point.</param> public void Complete(HilbertPoint point) { AllMeasured[Index.SortedPosition(point)] = true; }
public void ClosestOfFiftyClusters() { int hilbertTries = 1000; var correctColorCount = 0; var correctCrosscheckCount = 0; var correctDistanceCount = 0; var nPoints = 100; var dimensions = 100; var clusterCount = 50; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = 1000, MinClusterSize = nPoints, MaxClusterSize = nPoints }; var closestExact = new PolyChromaticClosestPoint <string> .ClosestPair(); var closestApproximate = new PolyChromaticClosestPoint <string> .ClosestPair(); var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo(); var clusters = data.MakeClusters(); Assert.AreEqual(clusterCount, clusters.NumPartitions, "Test data are grouped into fewer clusters than requested."); PolyChromaticClosestPoint <string> pccp; if (hilbertTries <= 1) { pccp = new PolyChromaticClosestPoint <string>(clusters); } else { var reducedNoiseSkipBy = 1; var results = OptimalIndex.Search( clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(), 5 /*outlier size */, 10 /* NoiseSkipBy */, reducedNoiseSkipBy, hilbertTries ); pccp = new PolyChromaticClosestPoint <string>(clusters, results.Index); } foreach (var color in pccp.Clusters.ClassLabels()) { var exact = pccp.FindClusterExhaustively(color); var approximate = pccp.FindClusterApproximately(color); var crosscheck = pccp.FindClusterIteratively(color); if (exact.SquareDistance >= approximate.SquareDistance) { correctDistanceCount++; } if (exact.Color2.Equals(approximate.Color2)) { correctColorCount++; } if (exact.Color2.Equals(crosscheck.Color2)) { correctCrosscheckCount++; } if (exact.SquareDistance < closestExact.SquareDistance) { closestExact = exact; } if (approximate.SquareDistance < closestApproximate.SquareDistance) { closestApproximate = approximate; } var ratio = approximate.SquareDistance / (double)exact.SquareDistance; Console.WriteLine(string.Format("Exact {0} vs Approx. {1} vs Cross {2}. Over by {3:N3}%", exact, approximate, crosscheck, (ratio - 1.0) * 100.0)); } if (closestExact.SquareDistance >= closestApproximate.SquareDistance) { Console.WriteLine("DID FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate); } else { Console.WriteLine("DID NOT FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate); } Assert.IsTrue(correctColorCount == clusterCount && correctDistanceCount == clusterCount, string.Format("Of {0} clusters, only {1} searches found the closest cluster and {2} found the shortest distance. Crosscheck = {3}", clusterCount, correctColorCount, correctDistanceCount, correctCrosscheckCount ) ); }
/// <summary> /// Get the zero-based position of the point in SortedPoints. /// </summary> /// <param name="p">Point to lookup.</param> /// <returns>A zero-based position into the SortedPoints list.</returns> public int SortedPosition(HilbertPoint p) { return(Index[p].Sorted); }
/// <summary> /// A test case for PolyChromaticClosestPoint.FindPairApproximately where clusters conform to a Gaussian distribution. /// </summary> /// <param name="nPoints">Number of points in each cluster.</param> /// <param name="dimensions">Number of Dimensions in each point.</param> /// <param name="numClusters">Number of clusters to create.</param> /// <param name="hilbertsToTry">Number of randomly generated Hilbert curves to try.</param> public void GaussianPolyChromaticPairTestCase(int nPoints, int dimensions, int numClusters, int hilbertsToTry = 1) { var successes = 0; var worstRatio = 1.0; var color1 = "0"; var data = new GaussianClustering { ClusterCount = numClusters, Dimensions = dimensions, MaxCoordinate = 1000, MinClusterSize = nPoints, MaxClusterSize = nPoints }; var clusters = data.MakeClusters(); PolyChromaticClosestPoint <string> pccp; if (hilbertsToTry <= 1) { pccp = new PolyChromaticClosestPoint <string>(clusters); } else { var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo(); var results = OptimalIndex.Search( clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(), 5 /*outlier size */, 10 /* NoiseSkipBy */, 1 /* ReducedNoiseSkipBy */, hilbertsToTry ); pccp = new PolyChromaticClosestPoint <string>(clusters, results.Index); } for (var iColor2 = 1; iColor2 < numClusters; iColor2++) { var color2 = iColor2.ToString(); var exact = pccp.FindPairExhaustively(color1, color2); var approximate = pccp.FindPairApproximately(color1, color2); var expectedDistance = exact.SquareDistance; var actualDistance = approximate.SquareDistance; if (actualDistance <= expectedDistance) { successes++; } else { worstRatio = Math.Max(worstRatio, actualDistance / (double)expectedDistance); } if (exact.SquareDistance >= approximate.SquareDistance) { Console.WriteLine("FindPairApproximately CORRECT. Exact {0}. Approx {1}", exact, approximate); } else { Console.WriteLine("FindPairApproximately INCORRECT. Exact {0}. Approx {1}. Too high by {2:N3}%", exact, approximate, 100.0 * (approximate.SquareDistance / (double)exact.SquareDistance - 1.0)); } } Assert.AreEqual(numClusters - 1, successes, string.Format("Did not succeed every time. Failed {0} of {1} times. Worst distance ratio is {2:N4}. {3} points of {4} dimensions.", numClusters - successes - 1, numClusters - 1, worstRatio, nPoints, dimensions ) ); }
public static int FindBitsPerDimension(IReadOnlyList <UnsignedPoint> points) { return(HilbertPoint.FindBitsPerDimension((int)points.Select(p => p.MaxCoordinate).Max())); }
public void ClosestClusterTest(int nPoints, int dimensions, int numClusters, int numCurvesToTry, int numCurvesToKeep) { var correctColorCount = 0; var correctDistanceCount = 0; var data = new GaussianClustering { ClusterCount = numClusters, Dimensions = dimensions, MaxCoordinate = 1000, MinClusterSize = nPoints, MaxClusterSize = nPoints }; var closestExact = new PolyChromaticClosestPoint <string> .ClosestPair(); var closestApproximate = new PolyChromaticClosestPoint <string> .ClosestPair(); var clusters = data.MakeClusters(); var pccps = new List <PolyChromaticClosestPoint <string> >(); var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo(); var bestIndices = OptimalIndex.SearchMany( clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(), numCurvesToKeep, 5 /*outlier size */, 10 /* NoiseSkipBy */, 1 /* ReducedNoiseSkipBy */, numCurvesToTry ); //var pointLists = bestIndices.Select(result => result.Index.SortedPoints).ToList(); //foreach (var pList in pointLists) // pccps.Add(new PolyChromaticClosestPoint<string>(clusters, pList)); var indices = bestIndices.Select(result => result.Index).ToList(); foreach (var index in indices) { pccps.Add(new PolyChromaticClosestPoint <string>(clusters, index)); } var pccp1 = pccps[0]; foreach (var color in pccp1.Clusters.ClassLabels()) { var exact = pccp1.FindClusterExhaustively(color); var approximate = pccps.Select(pccp => pccp.FindClusterApproximately(color)).OrderBy(cp => cp).First(); if (exact.SquareDistance >= approximate.SquareDistance) { correctDistanceCount++; } if (exact.Color2.Equals(approximate.Color2)) { correctColorCount++; } if (exact.SquareDistance < closestExact.SquareDistance) { closestExact = exact; } if (approximate.SquareDistance < closestApproximate.SquareDistance) { closestApproximate = approximate; } var ratio = approximate.SquareDistance / (double)exact.SquareDistance; Console.WriteLine(string.Format("Exact {0} vs Approx. {1}. Over by {2:N3}%", exact, approximate, (ratio - 1.0) * 100.0)); } if (closestExact.SquareDistance >= closestApproximate.SquareDistance) { Console.WriteLine("DID FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate); } else { Console.WriteLine("DID NOT FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate); } Assert.IsTrue(correctColorCount == numClusters && correctDistanceCount == numClusters, string.Format("Of {0} clusters, only {1} searches found the closest cluster and {2} found the shortest distance.", numClusters, correctColorCount, correctDistanceCount ) ); }