/// <summary> /// If D-dimensional space were divided into a grid of hypercubes whose side equals cellSize, rooted at the origin, /// would the two given points be in the same grid cell or different cells? /// /// For example, if the cellSize is 32, the first cell has coordinates from zero to 31, the next from 32 to 63, etc. /// Thus if x = 37 and y = 70, the grid line at 64 falls between them, hence they are NOT in the same cell. /// </summary> /// <param name="p1">First point to compare.</param> /// <param name="p2">Second point to compare.</param> /// <param name="cellSize">Edge length of grid cell.</param> /// <returns>True if the cells will fall in the same cell, false otherwise.</returns> private bool InSameCell(UnsignedPoint p1, UnsignedPoint p2, int cellSize) { // It only takes one divisive dimension to split points apart, even if all the other coordinates match. // We are looking for min < grid <= max where grid = i * cellSize for an integer multiplier i. // If that is true, then a grid line falls between p1 and p2 for that dimension. // If the grid line falls on the point with the larger coordinate value, that is okay too. for (var iDim = 0; iDim < Dimensions; iDim++) { var x = p1[iDim]; var y = p2[iDim]; if (x == y) { continue; } var max = Max(x, y); var min = Min(x, y); var i = (max / cellSize); // If the cellSize is larger than the maximum of the coordinates, both are in the same first cell. if (i == 0) { continue; } var grid = i * cellSize; // With i > 0, we already know that grid <= max, now see if min < grid if (grid > min) { return(false); } } return(true); }
/// <summary> /// Finds an approximately closest pair of points (one of each color) by using the ordering found in SortedPoints. /// /// This compares points in two of the clusters, ignoring points in all other clusters. /// /// NOTE: This was a good idea, but yields results too poor to be used. /// </summary> /// <param name="color1">Label of the first cluster.</param> /// <param name="color2">Label of the second cluster.</param> /// <returns>The point with Color1, the point with Color2 and the square distance between them. /// </returns> public ClosestPair FindPairApproximately(TLabel color1, TLabel color2) { var shortest = new ClosestPair(); UnsignedPoint prevP = null; TLabel prevColor = default(TLabel); foreach (var pc in SortedPoints .Select(p => new { Point = p, Color = Clusters.GetClassLabel(p) }) .Where(pc => pc.Color.Equals(color1) || pc.Color.Equals(color2))) { if (prevP != null && !prevColor.Equals(pc.Color)) { var d = pc.Point.Measure(prevP); if (d < shortest.SquareDistance) { shortest.SquareDistance = d; shortest.Color1 = prevColor; shortest.Point1 = prevP; shortest.Color2 = pc.Color; shortest.Point2 = pc.Point; } } prevP = pc.Point; prevColor = pc.Color; } return(shortest.Swap(color1)); }
/// <summary> /// Compose an enumerable that encompasses a range of points starting at the given point and running for the given length. /// If the point is too close to the end of the list in sorted order, fewer items than rangeLength may be returned. /// </summary> /// <param name="p">Point where range starts.</param> /// <param name="rangeLength">Range length.</param> public IEnumerable <UnsignedPoint> Range(UnsignedPoint p, int rangeLength) { var position = SortedPosition(p); var rangeStart = Math.Min(Math.Max(0, position - rangeLength / 2), Count - rangeLength); return(SortedPoints.Skip(rangeStart).Take(rangeLength)); }
/// <summary> /// Merge into one cluster all pairs of points that are adjacent to one another in Hilbert curve order /// if they are not too far apart. /// /// If the ideal number of clusters is K, this first pass often reduces the points to 2K clusters or fewer, /// excluding the outliers. /// </summary> /// <param name="sortedPoints">Points arranged in Hilbert curve order.</param> private void MergeByHilbertIndex(IList <UnsignedPoint> sortedPoints) { UnsignedPoint prevPoint = null; UnsignedPoint lastMerged = null; // About "revisitations". If the Hilbert order leaves a cluster to visit an outlier, // then returns to "revisit" the same cluster, the revisitation logic may sometimes capture this and perform a merge. // This test is only performed when consecutive points could not be joined, hence is // proportional to K, the number of clusters, not N, the number of points. var revisitations = 0; foreach (var currPoint in sortedPoints) { if (prevPoint != null) { if (MergeIfNear(prevPoint, currPoint)) { lastMerged = currPoint; } else if (lastMerged != null && MergeIfNear(lastMerged, currPoint)) { lastMerged = currPoint; revisitations++; } } prevPoint = currPoint; } var plural = revisitations != 1 ? 's' : ' '; Logger.Debug($"{revisitations} Revisitation{plural} in MergeByHilbertIndex"); }
/// <summary> /// Lookup a point by its id. /// </summary> /// <returns>The point whose id matches the given id, or null.</returns> /// <param name="id">UniqueId of a point.</param> public UnsignedPoint FindById(int id) { UnsignedPoint p = null; IdsToPoints.TryGetValue(id, out p); return(p); }
/// <summary> /// Create a point from a sparse set of (x,y) pairs where the x is the MovieId minus one (to make it zero-based) and the /// y is the Rating. /// </summary> /// <param name="dimensions">Total number of dimensions, including those which are missing a value, hence have /// no corresponding pair (MovieId,Rating).</param> /// <returns>A new HyperContrastedPoint or SparsePoint, whose UniqueId is the ReviewerId.</returns> public UnsignedPoint ToPoint(int dimensions) { if (Point == null) { var useHyperContrastedPoints = true; if (useHyperContrastedPoints) { Point = new HyperContrastedPoint( MovieIds.Select(movieId => movieId - 1).ToList(), Ratings.Select(rating => (uint)rating).ToList(), dimensions, new[] { 0U, 6U }, ReviewerId ); } else { Point = new SparsePoint( MovieIds.Select(movieId => movieId - 1).ToList(), Ratings.Select(rating => (uint)rating).ToList(), dimensions, 0U, ReviewerId ); } } return(Point); }
public ClosestPair(TLabel color1, UnsignedPoint p1, TLabel color2, UnsignedPoint p2, long sqDist) { Color1 = color1; Point1 = p1; Color2 = color2; Point2 = p2; SquareDistance = sqDist; Validate(); }
public ClosestPair(TLabel color1, UnsignedPoint p1, TLabel color2, UnsignedPoint p2) { Color1 = color1; Point1 = p1; Color2 = color2; Point2 = p2; SquareDistance = Point1.Measure(Point2); Validate(); }
/// <summary> /// Find how accurate NearestFromRange is when searching for the neighbors of a single given reference point. /// This finds the true K-nearest neighbors of the reference point (using Nearest) /// and the approximate K-nearest neighbors using the Hilbert index, /// then compare how accurate the Hilbert index was. /// </summary> /// <param name="point">Reference point whose neighbors are sought.</param> /// <param name="k">Number of nearest neighbors sought.</param> /// <param name="rangeLength">Number of points in the Hilbert index to sample.</param> /// <returns>A value from zero to 1.0, where 1.0 means perfectly accurate.</returns> public double Accuracy(UnsignedPoint point, int k, int rangeLength) { var allNeighbors = new HashSet <UnsignedPoint>(); allNeighbors.UnionWith(Nearest(point, k)); var matches = NearestFromRange(point, rangeLength).Count(allNeighbors.Contains); return(matches / (double)k); }
/// <summary> /// Gets the centroids for each cluster. /// </summary> /// <returns>The centroids and their class labels.</returns> public List <ClusterCentroid> GetCentroids() { return(Clusters .ClassLabels() .Select(label => new ClusterCentroid { ClusterLabel = label, Centroid = UnsignedPoint.Centroid(Clusters.PointsInClass(label)), Count = Clusters.PointsInClass(label).Count }).ToList()); }
/// <summary> /// Generate random points clumped into individual, well-separated, Gaussian clusters with optional uniform noise added. /// /// </summary> /// <returns>Points that are grouped into clusters and stored in a Classification.</returns> public Classification <UnsignedPoint, string> MakeClusters() { var clusters = new Classification <UnsignedPoint, string>(); r = new FastRandom(); //var z = new ZigguratGaussianSampler(); var farthestDistanceFromClusterCenter = 0.0; var minDistance = EllipsoidalGenerator.MinimumSeparation(MaxDistanceStdDev, Dimensions); var centerGenerator = new DiffuseGenerator(Dimensions, minDistance) { // Keep the centers of the clusters away from the edge, so that points do not go out of bounds and have their coordinates truncated. Minimum = MaxDistanceStdDev, Maximum = MaxCoordinate - MaxDistanceStdDev }; var iCluster = 0; var clusterCenters = new Dictionary <string, UnsignedPoint> (); foreach (var clusterCenter in centerGenerator.Take(ClusterCount).Where(ctr => ctr != null)) { var centerPoint = new UnsignedPoint(clusterCenter); // The cluster size may be random, or come from ClusterSizes. int clusterSize; if (ClusterSizes.Length > 0) { clusterSize = ClusterSizes[iCluster % ClusterSizes.Length]; } else { clusterSize = r.Next(MinClusterSize, MaxClusterSize); } var pointGenerator = new EllipsoidalGenerator(clusterCenter, RandomDoubles(Dimensions, MinDistanceStdDev, MaxDistanceStdDev, r), Dimensions); var clusterId = iCluster.ToString(); foreach (var iPoint in Enumerable.Range(1, clusterSize)) { UnsignedPoint p; clusters.Add( p = new UnsignedPoint(pointGenerator.Generate(new int[Dimensions])), clusterId ); var distance = Math.Sqrt(centerPoint.Measure(p)); farthestDistanceFromClusterCenter = Math.Max(farthestDistanceFromClusterCenter, distance); } clusterCenters[clusterId] = centerPoint; iCluster++; } AddNoise((int)Math.Floor(clusters.NumPoints * NoisePercentage / 100), clusterCenters, clusters); Debug.WriteLine("Test data: Farthest Distance from center = {0:N2}. Minimum Distance Permitted between Clusters = {1:N2}. Max Standard Deviation = {2}", farthestDistanceFromClusterCenter, minDistance, MaxDistanceStdDev ); return(clusters); }
/// <summary> /// Add noise points to the data and classify each noise point with the nearest cluster center. /// </summary> /// <param name="noisePointsToAdd">Number of noise points to add.</param> /// <param name="clusterCenters">Cluster centers for each cluster, where the key is the cluster id.</param> /// <param name="clusters">The noise points will be added to these clusters.</param> private void AddNoise(int noisePointsToAdd, Dictionary <string, UnsignedPoint> clusterCenters, Classification <UnsignedPoint, string> clusters) { if (noisePointsToAdd <= 0) { return; } var pccp = new PolyChromaticClosestPoint <string> (clusters); var closest = new List <Tuple <String, String> > (); // Find the nearest neighboring cluster to each cluster. // We will be choosing random noise points positioned in the space between clusters that are near neighbors. foreach (var clusterId in clusters.ClassLabels()) { var cp = pccp.FindClusterApproximately(clusterId).Swap(clusterId); closest.Add(new Tuple <string, string>(cp.Color1, cp.Color2)); } // We need to pick random points from each cluster, so must convert from Sets to Lists for performance. var clustersAsLists = new Dictionary <string, List <UnsignedPoint> > (); foreach (var pair in clusters.LabelToPoints) { clustersAsLists [pair.Key] = pair.Value.ToList(); } // Pick random pairs of clusters that are close neighbors. // Then pick a random point from each cluster and compute a weighted average of the two points. // This will construct noise points that tend to form a filament between two clusters. // Such connecting filaments pose the greatest likelihood of merging two distinct // clusters into one, the very error that must be compensated for by an improved algorithm. for (var i = 0; i < noisePointsToAdd; i++) { var whereToAdd = closest [r.Next(closest.Count)]; // The weight will range from 0.18 to 0.82 so as to keep most noise points from being inside a cluster, // which would make them non-noisy. var weight1 = r.NextDouble() * 0.64 + 0.18; var weight2 = 1.0 - weight1; var c1 = clustersAsLists[whereToAdd.Item1]; var c2 = clustersAsLists[whereToAdd.Item2]; var p1 = c1[r.Next(c1.Count)]; var p2 = c2[r.Next(c2.Count)]; var vRandom = new int[Dimensions]; for (var iDim = 0; iDim < vRandom.Length; iDim++) { vRandom [iDim] = (int)(weight1 * p1.Coordinates [iDim] + weight2 * p2.Coordinates [iDim]); } var pRandom = new UnsignedPoint(vRandom); var d1 = c1.Select(p => pRandom.Measure(p)).Min(); var d2 = c2.Select(p => pRandom.Measure(p)).Min(); var cRandom = d1 < d2 ? whereToAdd.Item1 : whereToAdd.Item2; clusters.Add(pRandom, cRandom); Noise.Add(pRandom); } }
/// <summary> /// Make a Classification of N-Dimensional data where the inputs are arrays of integers and the final element in each matrix /// is the number of its category. /// </summary> /// <param name="pointsPlusClass">Data to classify.</param> /// <returns>A Classification of the points.</returns> public static Classification <UnsignedPoint, string> MakeClassification(IList <int[]> pointsPlusClass) { var dimensions = pointsPlusClass[0].Length - 1; // The last number for each point is its category. var c = new Classification <UnsignedPoint, string>(); foreach (var pointPlusClass in pointsPlusClass) { var point = new UnsignedPoint(pointPlusClass.Take(dimensions).ToArray()); c.Add(point, pointPlusClass[dimensions].ToString(CultureInfo.InvariantCulture)); } return(c); }
/// <summary> /// Find the points adjacent to the given point in the Hilbert ordering, then sort them by the cartesian distance, from nearest to farthest. /// </summary> /// <param name="point">Reference point to seek in the index.</param> /// <param name="rangeLength">Number of points to retrieve from the index. Half of these points will precede and half succeed the given point /// in the index, unless we are near the beginning or end of the index, in which case the range will be shifted.</param> /// <param name="includePointItself">If false, the reference point will not be present in the results. /// If true, the point will be present in the results.</param> /// <returns>The points nearest to the reference point in both Hilbert and Cartesian ordering, sorted from nearest to farthest.</returns> public IEnumerable <UnsignedPoint> NearestFromRange(UnsignedPoint point, int rangeLength, bool includePointItself = false) { rangeLength = includePointItself ? rangeLength : rangeLength + 1; var middlePosition = SortedPosition(point); var rangeStart = Math.Max(0, middlePosition - rangeLength / 2); return(SortedPoints .Skip(rangeStart) .Take(rangeLength) .Where(p => includePointItself || !p.Equals(point)) .OrderBy(p => p.Measure(point))); }
/// <summary> /// Searches for the point in the first cluster that is closest to a corresponding point in the second cluster /// and returns an approximate result. /// /// This finds the centroid C1 of the first cluster, then the point P2 in the second cluster closest to centroid C1, then the /// point P1 in the first cluster closest to P2. /// /// NOTE: If the two clusters overlap or are shaped irregularly, this is likely to return a poor result. /// If the clusters are spherical, the results are likely to be very good. /// </summary> /// <param name="color1">Indicates the first cluster to be searched.</param> /// <param name="color2">Indicates the second cluster to be searched.</param> /// <returns>An approximate result, inclusing one point from each cluster and the square of the distance between them.</returns> public ClosestPair FindPairByCentroids(TLabel color1, TLabel color2) { var points1 = Clusters.PointsInClass(color1); var points2 = Clusters.PointsInClass(color2); var c1 = UnsignedPoint.Centroid(points1); var p2 = points2 .OrderBy(p => c1.Measure(p)) .First() ; var closest = points1.Select(p1 => new ClosestPair(color1, p1, color2, p2, p1.Measure(p2))).OrderBy(cp => cp.SquareDistance).First(); return(closest.Swap(color1)); }
static List <double> GaussianRadiusDistances(int n, int dimensions, int maxCoordinate, int sigma) { var center = Enumerable.Range(0, dimensions).Select(i => maxCoordinate / 2).ToArray(); var deviations = Enumerable.Range(0, dimensions).Select(i => (double)sigma).ToArray(); var affectedIndices = Enumerable.Range(0, dimensions).ToArray(); var generator = new EllipsoidalGenerator(center, deviations, affectedIndices); var tempPoint = new int[dimensions]; var centerPoint = new UnsignedPoint(center); var points = Enumerable.Range(0, n).Select(i => new UnsignedPoint(generator.Generate(tempPoint))).ToList(); var distances = points.Select(p => centerPoint.Distance(p)).OrderBy(dist => dist).ToList(); return(distances); }
/// <summary> /// If Color1 does not match color1, swap Color1 and Color2 and Point1 with Point2. /// </summary> /// <param name="color1">The color that should match Color1.</param> public ClosestPair Swap(TLabel color1) { ValidateColor(Color1, "Swapping with null Color1"); if (!Color1.Equals(color1)) { var tempColor = Color1; Color1 = Color2; Color2 = tempColor; var tempPoint = Point1; Point1 = Point2; Point2 = tempPoint; } return(this); }
/// <summary> /// UnsignedPoint.SquareDistanceCompare has an optimization. /// This tests if that optimization can be used in a given case. /// </summary> /// <returns><c>true</c>, if distance optimization is usable, <c>false</c> otherwise.</returns> /// <param name="p1">First point to compare.</param> /// <param name="p2">Second point to compare.</param> /// <param name="squareDistance">Test if the distance between the points is less than, equal to or greater than this given distance.</param> private bool IsDistanceOptimizationUsable(UnsignedPoint p1, UnsignedPoint p2, long squareDistance) { var delta = p1.Magnitude - p2.Magnitude; var low = (long)Math.Floor(delta * delta); if (squareDistance < low) { return(true); } var high = p1.SquareMagnitude + p2.SquareMagnitude; return(squareDistance > high); }
/// <summary> /// Format a point as a delimited string record, without the terminating newline. /// </summary> /// <returns>The record.</returns> /// <param name="point">Point to format.</param> /// <param name="fieldDelimiter">Field delimiter.</param> string PointToRecord(UnsignedPoint point, string fieldDelimiter = ",") { var category = FinalClassification.GetClassLabel(point); var id = InputDataIds[point]; var sb = new StringBuilder(); sb.Append(id).Append(fieldDelimiter).Append(category); foreach (var coordinate in point.LazyCoordinates()) { sb.Append(fieldDelimiter).Append(coordinate); } return(sb.ToString()); }
/// <summary> /// For a pair of points, loop over many grid sizes and determine whether the points will be in the same or different cells at that size. /// </summary> /// <param name="point1">First point to test.</param> /// <param name="point2">Second point to test.</param> /// <param name="sameCellPerBits">Accumulate the results here, adding to what has been recorded for other pairs of points.</param> private void LoopOverBits(UnsignedPoint point1, UnsignedPoint point2, int[] sameCellPerBits) { for (var bits = 1; bits < BitsPerDimension; bits++) { var cellSize = 1 << (BitsPerDimension - bits); if (InSameCell(point1, point2, cellSize)) { sameCellPerBits[bits]++; } else // All subsequent values of bits will necessarily split points apart. { break; } } }
/// <summary> /// Get the Hilbert position for a given point after balancing it, performing an optional permutation of the coordinates. /// The point may have its coordinates reduced in precision if bitsPerDimension is lower than the required value. /// </summary> /// <param name="unbalancedPoint">Point prior to balancing.</param> /// <param name="bitsPerDimension">Number of bits per dimension to use in forming the Hilbert position, /// which may be lower than the number of bits required to faithfully represent all coordinate values, /// causing the coordinate values of all coordinates to be reduced in precision.</param> /// <param name="perm">Permutation to apply to coordinates, scrambling their order in a consistent way for all points.</param> /// <returns>The Hilbert position.</returns> public BigInteger ToHilbertPosition(UnsignedPoint unbalancedPoint, int bitsPerDimension, Permutation <uint> perm = null) { uint[] balancedCoordinates; if (perm == null) { balancedCoordinates = Balance(unbalancedPoint.Coordinates, bitsPerDimension); return(balancedCoordinates.HilbertIndex(bitsPerDimension)); } else { balancedCoordinates = Balance(unbalancedPoint.Coordinates, bitsPerDimension); var permutedCoordinates = perm.ApplyToArray(balancedCoordinates); return(permutedCoordinates.HilbertIndex(bitsPerDimension)); } }
public void SparseToUnsignedMeasureWhereMissingValueIsPositive() { var sparseData1 = new Dictionary <int, uint> { [5] = 1, [7] = 2, [10] = 3, [15] = 4 }; var missingValue = 1U; var p1 = new SparsePoint(sparseData1, 20, missingValue); var p2 = new UnsignedPoint(new[] { 0, 0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0 }); var actualSquareDistance = p1.Measure(p2); var expectedSquareDistance = 42L; // 46 + 30 - 2(4+6+6+1) Assert.AreEqual(expectedSquareDistance, actualSquareDistance, $"Sparse-to-unsigned Distances with MissingValue={missingValue} do not match."); }
public ClusterRadius(IList <UnsignedPoint> points) { Centroid = UnsignedPoint.Centroid(points); var radiusSum = 0.0; MaximumRadius = 0; foreach (var point in points) { var distance = Centroid.Distance(point); MaximumRadius = Math.Max(MaximumRadius, distance); radiusSum += distance; } if (points.Count > 0) { MeanRadius = radiusSum / points.Count; } }
/// <summary> /// Merge the clusters containing two points if the distance separating them does not exceed MergeSquareDistance. /// The points given here may be HilbertPoints frmo a HilbertIndex or UnsignedPoints already present in the Classification. /// In case of the former, a lookup is performed based on the id to find the proper UnsignedPoint corresponding to the HilbertPoint. /// </summary> /// <returns><c>true</c>, if a new merge performed, <c>false</c> if too far to merge or already merged.</returns> /// <param name="p1">First point to compare.</param> /// <param name="p2">Second point.</param> /// <param name="maxSquareDistance">If a positive value, use this as the maximum distance permitted between points. /// Otherwise, use MergeSquareDistance.</param> private bool MergeIfNear(UnsignedPoint p1, UnsignedPoint p2, long maxSquareDistance = 0) { var p1InClusters = IdsToPoints[p1.UniqueId]; var p2InClusters = IdsToPoints[p2.UniqueId]; maxSquareDistance = (maxSquareDistance <= 0) ? MergeSquareDistance : maxSquareDistance; if (p1InClusters.SquareDistanceCompare(p2InClusters, maxSquareDistance) <= 0) { var c1 = Clusters.GetClassLabel(p1InClusters); var c2 = Clusters.GetClassLabel(p2InClusters); return(Clusters.Merge(c1, c2)); } else { return(false); } }
/// <summary> /// Unioning the results of several different indices, find the composite accuracy of using them all /// in combination to find the nearest neighbors. /// </summary> /// <param name="indices">Indices.</param> /// <param name="point">Point whos enearest neighbors are sought.</param> /// <param name="k">Number of nearest neighbors who are sought.</param> /// <param name="rangeLength">Number of points to draw from each index.</param> /// <returns>A value from zero to 1.0, where 1.0 means perfectly accurate.</returns> public static double CompositeAccuracy(IList <HilbertOrderedIndex> indices, UnsignedPoint point, int k, int rangeLength) { // Note the tricky use of Equivalent. The points from different indices should not be directly compared, // so we need to map a point from the first index to the equivalent point in another, then map back // for the final tally. var allNeighbors = new HashSet <UnsignedPoint>(); var firstIndex = indices[0]; allNeighbors.UnionWith(firstIndex.Nearest(firstIndex.Equivalent(point), k)); var fromRange = new HashSet <UnsignedPoint>(); fromRange.UnionWith( indices.SelectMany(i => i.NearestFromRange(i.Equivalent(point), rangeLength) .Where(p => allNeighbors.Contains(firstIndex.Equivalent(p)))) ); return(fromRange.Count() / (double)k); }
/// <summary> /// Create two random clusters that may be separated from one another by enough distance /// that they do not overlap, or be partly overlapping, or fully overlapping. /// /// NOTE: This type of setup is to test divisive clustering, that divides two partly mixed gaussians. /// </summary> /// <param name="overlapPercent">A number from zero to 100. /// If zero, the clusters do not overlap at all. /// If fifty, then the clusters partly overlap. /// If 100, the clusters have the same center, so are indistinguishable.</param> /// <returns>The two clusters.</returns> public Classification <UnsignedPoint, string> TwoClusters(double overlapPercent) { var clusters = new Classification <UnsignedPoint, string>(); r = new FastRandom(); var farthestDistanceFromClusterCenter = 0.0; var minDistance = EllipsoidalGenerator.MinimumSeparation(MaxDistanceStdDev, Dimensions); var centerGenerator = new DiffuseGenerator(Dimensions, minDistance) { // Keep the centers of the clusters away from the edge, so that points do not go out of bounds and have their coordinates truncated. // Keep the maximum coordinate farther away, because we will pick the second point by shifting one coordinate // in the higher direction. Minimum = MaxDistanceStdDev, Maximum = MaxCoordinate - MaxDistanceStdDev - (int)minDistance }; var iCluster = 0; var clusterCenter1 = centerGenerator.Take(1).FirstOrDefault(); var clusterCenter2 = (int[])clusterCenter1.Clone(); clusterCenter2[0] += (int)(minDistance * (100.0 - overlapPercent) / 100.0); var centers = new[] { clusterCenter1, clusterCenter2 }; foreach (var clusterCenter in centers) { var centerPoint = new UnsignedPoint(clusterCenter); var clusterSize = r.Next(MinClusterSize, MaxClusterSize); var pointGenerator = new EllipsoidalGenerator(clusterCenter, RandomDoubles(Dimensions, MinDistanceStdDev, MaxDistanceStdDev, r), Dimensions); var clusterId = iCluster.ToString(); foreach (var iPoint in Enumerable.Range(1, clusterSize)) { UnsignedPoint p; clusters.Add( p = new UnsignedPoint(pointGenerator.Generate(new int[Dimensions])), clusterId ); var distance = Math.Sqrt(centerPoint.Measure(p)); farthestDistanceFromClusterCenter = Math.Max(farthestDistanceFromClusterCenter, distance); } iCluster++; } //TODO: Go back and recluster the points. Put each point into the cluster whose centroid // it is nearest. Thus, if two clusters partly overlap, the points from one will be pushed into the other. return(clusters); }
/// <summary> /// Merge the clusters to which the two points belong, if their sizes permit. /// /// No more than one of the clusters may have a size greater than or equal to UnmergeableSize. /// </summary> /// <param name="p1">Point belonging to first cluster to merge.</param> /// <param name="p2">Point belonging to second cluster to merge.</param> /// <param name="forceMerge">If true and UnmergeableSize is the sole obstacle to the merge, perform the merge anyways. /// If false, honor UnmergeableSize.</param> /// <returns>True if the merge was performed successfully, false otherwise.</returns> private bool Merge(UnsignedPoint p1, UnsignedPoint p2, bool forceMerge = false) { var category1 = Clusters.GetClassLabel(p1); var category2 = Clusters.GetClassLabel(p2); if (category1.Equals(category2)) { return(false); } var size1 = Clusters.PointsInClass(category1).Count; var size2 = Clusters.PointsInClass(category2).Count; if (size1 >= UnmergeableSize && size2 >= UnmergeableSize && !forceMerge) { return(false); } return(Clusters.Merge(category1, category2)); }
public void MeasureDistanceSquared() { const int dims = 100; var p1 = new uint[dims]; var p2 = new uint[dims]; var expectedSquareDistance = 0L; for (var i = 0; i < dims; i++) { p1[i] = (uint)(i % 37) * 10; p2[i] = (uint)(i % 18) * 17; long delta = (long)p1[i] - (long)p2[i]; expectedSquareDistance += delta * delta; } var up1 = new UnsignedPoint(p1); var up2 = new UnsignedPoint(p2); var actualSquareDistance = up1.Measure(up2); Assert.AreEqual(expectedSquareDistance, actualSquareDistance, "Distances do not match"); }
/// <summary> /// Finds exactly the two closest points (one of each color) and their square distance /// using an exhaustive algorithm that compares the distances of every point in one cluster /// to every point in the other. /// /// This compares points in two of the clusters, ignoring points in all other clusters. /// </summary> /// <param name="color1">Label of the first cluster.</param> /// <param name="color2">Label of the second cluster.</param> /// <returns>The point with Color1, the point with Color2 and the square distance between them.</returns> public ClosestPair FindPairExhaustively(TLabel color1, TLabel color2) { var shortestDistance = long.MaxValue; UnsignedPoint p1Shortest = null; UnsignedPoint p2Shortest = null; foreach (var p1 in Clusters.PointsInClass(color1)) { foreach (var p2 in Clusters.PointsInClass(color2)) { var d = p1.Measure(p2); if (d < shortestDistance) { shortestDistance = d; p1Shortest = p1; p2Shortest = p2; } } } return(new ClosestPair(color1, p1Shortest, color2, p2Shortest, shortestDistance).Swap(color1)); }
/// <summary> /// UnsignedPoint.SquareDistanceCompare has an optimization. /// This tests if an extension of that optimization can be used in a given case. /// </summary> /// <returns><c>true</c>, if distance optimization is usable, <c>false</c> otherwise.</returns> /// <param name="p1">First point to compare.</param> /// <param name="p2">Second point to compare.</param> /// <param name="squareDistance">Test if the distance between the points is less than, equal to or greater than this given distance.</param> /// <param name="bitsPerDimension">Number of bits needed to represent the larges value of any coordinate of any point.</param> private bool IsExtendedDistanceOptimizationUsable(UnsignedPoint p1, UnsignedPoint p2, long squareDistance, int bitsPerDimension) { if (IsDistanceOptimizationUsable(p1, p2, squareDistance)) { return(true); } var maxCoordinate = (1 << bitsPerDimension) - 1; var cornerSqDistance1 = 0L; var cornerSqDistance2 = 0L; for (var d = 0; d < p1.Dimensions; d++) { long delta; delta = maxCoordinate - p1[d]; cornerSqDistance1 += delta * delta; delta = maxCoordinate - p2[d]; cornerSqDistance2 += delta * delta; } var cornerDistance1 = Math.Sqrt(cornerSqDistance1); var cornerDistance2 = Math.Sqrt(cornerSqDistance2); var delta2 = cornerDistance1 - cornerDistance2; var low = (long)Math.Floor(delta2 * delta2); if (squareDistance < low) { return(true); } var high = cornerSqDistance1 + cornerSqDistance2; if (squareDistance > high) { return(true); } return(false); }