Esempio n. 1
0
 /// <summary>
 /// If D-dimensional space were divided into a grid of hypercubes whose side equals cellSize, rooted at the origin,
 /// would the two given points be in the same grid cell or different cells?
 ///
 /// For example, if the cellSize is 32, the first cell has coordinates from zero to 31, the next from 32 to 63, etc.
 /// Thus if x = 37 and y = 70, the grid line at 64 falls between them, hence they are NOT in the same cell.
 /// </summary>
 /// <param name="p1">First point to compare.</param>
 /// <param name="p2">Second point to compare.</param>
 /// <param name="cellSize">Edge length of grid cell.</param>
 /// <returns>True if the cells will fall in the same cell, false otherwise.</returns>
 private bool InSameCell(UnsignedPoint p1, UnsignedPoint p2, int cellSize)
 {
     // It only takes one divisive dimension to split points apart, even if all the other coordinates match.
     // We are looking for min < grid <= max where grid = i * cellSize for an integer multiplier i.
     // If that is true, then a grid line falls between p1 and p2 for that dimension.
     // If the grid line falls on the point with the larger coordinate value, that is okay too.
     for (var iDim = 0; iDim < Dimensions; iDim++)
     {
         var x = p1[iDim];
         var y = p2[iDim];
         if (x == y)
         {
             continue;
         }
         var max = Max(x, y);
         var min = Min(x, y);
         var i   = (max / cellSize);
         // If the cellSize is larger than the maximum of the coordinates, both are in the same first cell.
         if (i == 0)
         {
             continue;
         }
         var grid = i * cellSize;
         // With i > 0, we already know that grid <= max, now see if min < grid
         if (grid > min)
         {
             return(false);
         }
     }
     return(true);
 }
        /// <summary>
        /// Finds an approximately closest pair of points (one of each color) by using the ordering found in SortedPoints.
        ///
        /// This compares points in two of the clusters, ignoring points in all other clusters.
        ///
        /// NOTE: This was a good idea, but yields results too poor to be used.
        /// </summary>
        /// <param name="color1">Label of the first cluster.</param>
        /// <param name="color2">Label of the second cluster.</param>
        /// <returns>The point with Color1, the point with Color2 and the square distance between them.
        /// </returns>
        public ClosestPair FindPairApproximately(TLabel color1, TLabel color2)
        {
            var           shortest  = new ClosestPair();
            UnsignedPoint prevP     = null;
            TLabel        prevColor = default(TLabel);

            foreach (var pc in SortedPoints
                     .Select(p => new { Point = p, Color = Clusters.GetClassLabel(p) })
                     .Where(pc => pc.Color.Equals(color1) || pc.Color.Equals(color2)))
            {
                if (prevP != null && !prevColor.Equals(pc.Color))
                {
                    var d = pc.Point.Measure(prevP);
                    if (d < shortest.SquareDistance)
                    {
                        shortest.SquareDistance = d;
                        shortest.Color1         = prevColor;
                        shortest.Point1         = prevP;
                        shortest.Color2         = pc.Color;
                        shortest.Point2         = pc.Point;
                    }
                }
                prevP     = pc.Point;
                prevColor = pc.Color;
            }
            return(shortest.Swap(color1));
        }
Esempio n. 3
0
        /// <summary>
        /// Compose an enumerable that encompasses a range of points starting at the given point and running for the given length.
        /// If the point is too close to the end of the list in sorted order, fewer items than rangeLength may be returned.
        /// </summary>
        /// <param name="p">Point where range starts.</param>
        /// <param name="rangeLength">Range length.</param>
        public IEnumerable <UnsignedPoint> Range(UnsignedPoint p, int rangeLength)
        {
            var position   = SortedPosition(p);
            var rangeStart = Math.Min(Math.Max(0, position - rangeLength / 2), Count - rangeLength);

            return(SortedPoints.Skip(rangeStart).Take(rangeLength));
        }
        /// <summary>
        /// Merge into one cluster all pairs of points that are adjacent to one another in Hilbert curve order
        /// if they are not too far apart.
        ///
        /// If the ideal number of clusters is K, this first pass often reduces the points to 2K clusters or fewer,
        /// excluding the outliers.
        /// </summary>
        /// <param name="sortedPoints">Points arranged in Hilbert curve order.</param>
        private void MergeByHilbertIndex(IList <UnsignedPoint> sortedPoints)
        {
            UnsignedPoint prevPoint  = null;
            UnsignedPoint lastMerged = null;
            // About "revisitations". If the Hilbert order leaves a cluster to visit an outlier,
            // then returns to "revisit" the same cluster, the revisitation logic may sometimes capture this and perform a merge.
            // This test is only performed when consecutive points could not be joined, hence is
            // proportional to K, the number of clusters, not N, the number of points.
            var revisitations = 0;

            foreach (var currPoint in sortedPoints)
            {
                if (prevPoint != null)
                {
                    if (MergeIfNear(prevPoint, currPoint))
                    {
                        lastMerged = currPoint;
                    }
                    else if (lastMerged != null && MergeIfNear(lastMerged, currPoint))
                    {
                        lastMerged = currPoint;
                        revisitations++;
                    }
                }
                prevPoint = currPoint;
            }
            var plural = revisitations != 1 ? 's' : ' ';

            Logger.Debug($"{revisitations} Revisitation{plural} in MergeByHilbertIndex");
        }
Esempio n. 5
0
        /// <summary>
        /// Lookup a point by its id.
        /// </summary>
        /// <returns>The point whose id matches the given id, or null.</returns>
        /// <param name="id">UniqueId of a point.</param>
        public UnsignedPoint FindById(int id)
        {
            UnsignedPoint p = null;

            IdsToPoints.TryGetValue(id, out p);
            return(p);
        }
Esempio n. 6
0
 /// <summary>
 /// Create a point from a sparse set of (x,y) pairs where the x is the MovieId minus one (to make it zero-based) and the
 /// y is the Rating.
 /// </summary>
 /// <param name="dimensions">Total number of dimensions, including those which are missing a value, hence have
 /// no corresponding pair (MovieId,Rating).</param>
 /// <returns>A new HyperContrastedPoint or SparsePoint, whose UniqueId is the ReviewerId.</returns>
 public UnsignedPoint ToPoint(int dimensions)
 {
     if (Point == null)
     {
         var useHyperContrastedPoints = true;
         if (useHyperContrastedPoints)
         {
             Point = new HyperContrastedPoint(
                 MovieIds.Select(movieId => movieId - 1).ToList(),
                 Ratings.Select(rating => (uint)rating).ToList(),
                 dimensions,
                 new[] { 0U, 6U },
                 ReviewerId
                 );
         }
         else
         {
             Point = new SparsePoint(
                 MovieIds.Select(movieId => movieId - 1).ToList(),
                 Ratings.Select(rating => (uint)rating).ToList(),
                 dimensions,
                 0U,
                 ReviewerId
                 );
         }
     }
     return(Point);
 }
 public ClosestPair(TLabel color1, UnsignedPoint p1, TLabel color2, UnsignedPoint p2, long sqDist)
 {
     Color1         = color1;
     Point1         = p1;
     Color2         = color2;
     Point2         = p2;
     SquareDistance = sqDist;
     Validate();
 }
 public ClosestPair(TLabel color1, UnsignedPoint p1, TLabel color2, UnsignedPoint p2)
 {
     Color1         = color1;
     Point1         = p1;
     Color2         = color2;
     Point2         = p2;
     SquareDistance = Point1.Measure(Point2);
     Validate();
 }
Esempio n. 9
0
        /// <summary>
        /// Find how accurate NearestFromRange is when searching for the neighbors of a single given reference point.
        /// This finds the true K-nearest neighbors of the reference point (using Nearest)
        /// and the approximate K-nearest neighbors using the Hilbert index,
        /// then compare how accurate the Hilbert index was.
        /// </summary>
        /// <param name="point">Reference point whose neighbors are sought.</param>
        /// <param name="k">Number of nearest neighbors sought.</param>
        /// <param name="rangeLength">Number of points in the Hilbert index to sample.</param>
        /// <returns>A value from zero to 1.0, where 1.0 means perfectly accurate.</returns>
        public double Accuracy(UnsignedPoint point, int k, int rangeLength)
        {
            var allNeighbors = new HashSet <UnsignedPoint>();

            allNeighbors.UnionWith(Nearest(point, k));
            var matches = NearestFromRange(point, rangeLength).Count(allNeighbors.Contains);

            return(matches / (double)k);
        }
 /// <summary>
 /// Gets the centroids for each cluster.
 /// </summary>
 /// <returns>The centroids and their class labels.</returns>
 public List <ClusterCentroid> GetCentroids()
 {
     return(Clusters
            .ClassLabels()
            .Select(label => new ClusterCentroid {
         ClusterLabel = label,
         Centroid = UnsignedPoint.Centroid(Clusters.PointsInClass(label)),
         Count = Clusters.PointsInClass(label).Count
     }).ToList());
 }
Esempio n. 11
0
        /// <summary>
        /// Generate random points clumped into individual, well-separated, Gaussian clusters with optional uniform noise added.
        ///
        /// </summary>
        /// <returns>Points that are grouped into clusters and stored in a Classification.</returns>
        public Classification <UnsignedPoint, string> MakeClusters()
        {
            var clusters = new Classification <UnsignedPoint, string>();

            r = new FastRandom();
            //var z = new ZigguratGaussianSampler();
            var farthestDistanceFromClusterCenter = 0.0;

            var minDistance     = EllipsoidalGenerator.MinimumSeparation(MaxDistanceStdDev, Dimensions);
            var centerGenerator = new DiffuseGenerator(Dimensions, minDistance)
            {
                // Keep the centers of the clusters away from the edge, so that points do not go out of bounds and have their coordinates truncated.
                Minimum = MaxDistanceStdDev,
                Maximum = MaxCoordinate - MaxDistanceStdDev
            };
            var iCluster       = 0;
            var clusterCenters = new Dictionary <string, UnsignedPoint> ();

            foreach (var clusterCenter in centerGenerator.Take(ClusterCount).Where(ctr => ctr != null))
            {
                var centerPoint = new UnsignedPoint(clusterCenter);
                // The cluster size may be random, or come from ClusterSizes.
                int clusterSize;
                if (ClusterSizes.Length > 0)
                {
                    clusterSize = ClusterSizes[iCluster % ClusterSizes.Length];
                }
                else
                {
                    clusterSize = r.Next(MinClusterSize, MaxClusterSize);
                }
                var pointGenerator = new EllipsoidalGenerator(clusterCenter, RandomDoubles(Dimensions, MinDistanceStdDev, MaxDistanceStdDev, r), Dimensions);
                var clusterId      = iCluster.ToString();
                foreach (var iPoint in Enumerable.Range(1, clusterSize))
                {
                    UnsignedPoint p;
                    clusters.Add(
                        p = new UnsignedPoint(pointGenerator.Generate(new int[Dimensions])),
                        clusterId
                        );
                    var distance = Math.Sqrt(centerPoint.Measure(p));
                    farthestDistanceFromClusterCenter = Math.Max(farthestDistanceFromClusterCenter, distance);
                }
                clusterCenters[clusterId] = centerPoint;
                iCluster++;
            }
            AddNoise((int)Math.Floor(clusters.NumPoints * NoisePercentage / 100), clusterCenters, clusters);
            Debug.WriteLine("Test data: Farthest Distance from center = {0:N2}. Minimum Distance Permitted between Clusters = {1:N2}. Max Standard Deviation = {2}",
                            farthestDistanceFromClusterCenter,
                            minDistance,
                            MaxDistanceStdDev
                            );
            return(clusters);
        }
Esempio n. 12
0
        /// <summary>
        /// Add noise points to the data and classify each noise point with the nearest cluster center.
        /// </summary>
        /// <param name="noisePointsToAdd">Number of noise points to add.</param>
        /// <param name="clusterCenters">Cluster centers for each cluster, where the key is the cluster id.</param>
        /// <param name="clusters">The noise points will be added to these clusters.</param>
        private void AddNoise(int noisePointsToAdd, Dictionary <string, UnsignedPoint> clusterCenters, Classification <UnsignedPoint, string> clusters)
        {
            if (noisePointsToAdd <= 0)
            {
                return;
            }
            var pccp    = new PolyChromaticClosestPoint <string> (clusters);
            var closest = new List <Tuple <String, String> > ();

            // Find the nearest neighboring cluster to each cluster.
            // We will be choosing random noise points positioned in the space between clusters that are near neighbors.
            foreach (var clusterId in clusters.ClassLabels())
            {
                var cp = pccp.FindClusterApproximately(clusterId).Swap(clusterId);
                closest.Add(new Tuple <string, string>(cp.Color1, cp.Color2));
            }

            // We need to pick random points from each cluster, so must convert from Sets to Lists for performance.
            var clustersAsLists = new Dictionary <string, List <UnsignedPoint> > ();

            foreach (var pair in clusters.LabelToPoints)
            {
                clustersAsLists [pair.Key] = pair.Value.ToList();
            }

            // Pick random pairs of clusters that are close neighbors.
            // Then pick a random point from each cluster and compute a weighted average of the two points.
            // This will construct noise points that tend to form a filament between two clusters.
            // Such connecting filaments pose the greatest likelihood of merging two distinct
            // clusters into one, the very error that must be compensated for by an improved algorithm.
            for (var i = 0; i < noisePointsToAdd; i++)
            {
                var whereToAdd = closest [r.Next(closest.Count)];
                // The weight will range from 0.18 to 0.82 so as to keep most noise points from being inside a cluster,
                // which would make them non-noisy.
                var weight1 = r.NextDouble() * 0.64 + 0.18;
                var weight2 = 1.0 - weight1;
                var c1      = clustersAsLists[whereToAdd.Item1];
                var c2      = clustersAsLists[whereToAdd.Item2];
                var p1      = c1[r.Next(c1.Count)];
                var p2      = c2[r.Next(c2.Count)];
                var vRandom = new int[Dimensions];
                for (var iDim = 0; iDim < vRandom.Length; iDim++)
                {
                    vRandom [iDim] = (int)(weight1 * p1.Coordinates [iDim] + weight2 * p2.Coordinates [iDim]);
                }
                var pRandom = new UnsignedPoint(vRandom);
                var d1      = c1.Select(p => pRandom.Measure(p)).Min();
                var d2      = c2.Select(p => pRandom.Measure(p)).Min();
                var cRandom = d1 < d2 ? whereToAdd.Item1 : whereToAdd.Item2;
                clusters.Add(pRandom, cRandom);
                Noise.Add(pRandom);
            }
        }
        /// <summary>
        /// Make a Classification of N-Dimensional data where the inputs are arrays of integers and the final element in each matrix
        /// is the number of its category.
        /// </summary>
        /// <param name="pointsPlusClass">Data to classify.</param>
        /// <returns>A Classification of the points.</returns>
        public static Classification <UnsignedPoint, string> MakeClassification(IList <int[]> pointsPlusClass)
        {
            var dimensions = pointsPlusClass[0].Length - 1; // The last number for each point is its category.
            var c          = new Classification <UnsignedPoint, string>();

            foreach (var pointPlusClass in pointsPlusClass)
            {
                var point = new UnsignedPoint(pointPlusClass.Take(dimensions).ToArray());
                c.Add(point, pointPlusClass[dimensions].ToString(CultureInfo.InvariantCulture));
            }
            return(c);
        }
Esempio n. 14
0
        /// <summary>
        /// Find the points adjacent to the given point in the Hilbert ordering, then sort them by the cartesian distance, from nearest to farthest.
        /// </summary>
        /// <param name="point">Reference point to seek in the index.</param>
        /// <param name="rangeLength">Number of points to retrieve from the index. Half of these points will precede and half succeed the given point
        /// in the index, unless we are near the beginning or end of the index, in which case the range will be shifted.</param>
        /// <param name="includePointItself">If false, the reference point will not be present in the results.
        /// If true, the point will be present in the results.</param>
        /// <returns>The points nearest to the reference point in both Hilbert and Cartesian ordering, sorted from nearest to farthest.</returns>
        public IEnumerable <UnsignedPoint> NearestFromRange(UnsignedPoint point, int rangeLength, bool includePointItself = false)
        {
            rangeLength = includePointItself ? rangeLength : rangeLength + 1;
            var middlePosition = SortedPosition(point);
            var rangeStart     = Math.Max(0, middlePosition - rangeLength / 2);

            return(SortedPoints
                   .Skip(rangeStart)
                   .Take(rangeLength)
                   .Where(p => includePointItself || !p.Equals(point))
                   .OrderBy(p => p.Measure(point)));
        }
        /// <summary>
        /// Searches for the point in the first cluster that is closest to a corresponding point in the second cluster
        /// and returns an approximate result.
        ///
        /// This finds the centroid C1 of the first cluster, then the point P2 in the second cluster closest to centroid C1, then the
        /// point P1 in the first cluster closest to P2.
        ///
        /// NOTE: If the two clusters overlap or are shaped irregularly, this is likely to return a poor result.
        /// If the clusters are spherical, the results are likely to be very good.
        /// </summary>
        /// <param name="color1">Indicates the first cluster to be searched.</param>
        /// <param name="color2">Indicates the second cluster to be searched.</param>
        /// <returns>An approximate result, inclusing one point from each cluster and the square of the distance between them.</returns>
        public ClosestPair FindPairByCentroids(TLabel color1, TLabel color2)
        {
            var points1 = Clusters.PointsInClass(color1);
            var points2 = Clusters.PointsInClass(color2);
            var c1      = UnsignedPoint.Centroid(points1);
            var p2      = points2
                          .OrderBy(p => c1.Measure(p))
                          .First()
            ;
            var closest = points1.Select(p1 => new ClosestPair(color1, p1, color2, p2, p1.Measure(p2))).OrderBy(cp => cp.SquareDistance).First();

            return(closest.Swap(color1));
        }
Esempio n. 16
0
        static List <double> GaussianRadiusDistances(int n, int dimensions, int maxCoordinate, int sigma)
        {
            var center          = Enumerable.Range(0, dimensions).Select(i => maxCoordinate / 2).ToArray();
            var deviations      = Enumerable.Range(0, dimensions).Select(i => (double)sigma).ToArray();
            var affectedIndices = Enumerable.Range(0, dimensions).ToArray();
            var generator       = new EllipsoidalGenerator(center, deviations, affectedIndices);
            var tempPoint       = new int[dimensions];
            var centerPoint     = new UnsignedPoint(center);
            var points          = Enumerable.Range(0, n).Select(i => new UnsignedPoint(generator.Generate(tempPoint))).ToList();
            var distances       = points.Select(p => centerPoint.Distance(p)).OrderBy(dist => dist).ToList();

            return(distances);
        }
 /// <summary>
 /// If Color1 does not match color1, swap Color1 and Color2 and Point1 with Point2.
 /// </summary>
 /// <param name="color1">The color that should match Color1.</param>
 public ClosestPair Swap(TLabel color1)
 {
     ValidateColor(Color1, "Swapping with null Color1");
     if (!Color1.Equals(color1))
     {
         var tempColor = Color1;
         Color1 = Color2;
         Color2 = tempColor;
         var tempPoint = Point1;
         Point1 = Point2;
         Point2 = tempPoint;
     }
     return(this);
 }
        /// <summary>
        /// UnsignedPoint.SquareDistanceCompare has an optimization.
        /// This tests if that optimization can be used in a given case.
        /// </summary>
        /// <returns><c>true</c>, if distance optimization is usable, <c>false</c> otherwise.</returns>
        /// <param name="p1">First point to compare.</param>
        /// <param name="p2">Second point to compare.</param>
        /// <param name="squareDistance">Test if the distance between the points is less than, equal to or greater than this given distance.</param>
        private bool IsDistanceOptimizationUsable(UnsignedPoint p1, UnsignedPoint p2, long squareDistance)
        {
            var delta = p1.Magnitude - p2.Magnitude;
            var low   = (long)Math.Floor(delta * delta);

            if (squareDistance < low)
            {
                return(true);
            }

            var high = p1.SquareMagnitude + p2.SquareMagnitude;

            return(squareDistance > high);
        }
        /// <summary>
        /// Format a point as a delimited string record, without the terminating newline.
        /// </summary>
        /// <returns>The record.</returns>
        /// <param name="point">Point to format.</param>
        /// <param name="fieldDelimiter">Field delimiter.</param>
        string PointToRecord(UnsignedPoint point, string fieldDelimiter = ",")
        {
            var category = FinalClassification.GetClassLabel(point);
            var id       = InputDataIds[point];

            var sb = new StringBuilder();

            sb.Append(id).Append(fieldDelimiter).Append(category);
            foreach (var coordinate in point.LazyCoordinates())
            {
                sb.Append(fieldDelimiter).Append(coordinate);
            }
            return(sb.ToString());
        }
Esempio n. 20
0
 /// <summary>
 /// For a pair of points, loop over many grid sizes and determine whether the points will be in the same or different cells at that size.
 /// </summary>
 /// <param name="point1">First point to test.</param>
 /// <param name="point2">Second point to test.</param>
 /// <param name="sameCellPerBits">Accumulate the results here, adding to what has been recorded for other pairs of points.</param>
 private void LoopOverBits(UnsignedPoint point1, UnsignedPoint point2, int[] sameCellPerBits)
 {
     for (var bits = 1; bits < BitsPerDimension; bits++)
     {
         var cellSize = 1 << (BitsPerDimension - bits);
         if (InSameCell(point1, point2, cellSize))
         {
             sameCellPerBits[bits]++;
         }
         else // All subsequent values of bits will necessarily split points apart.
         {
             break;
         }
     }
 }
Esempio n. 21
0
 /// <summary>
 /// Get the Hilbert position for a given point after balancing it, performing an optional permutation of the coordinates.
 /// The point may have its coordinates reduced in precision if bitsPerDimension is lower than the required value.
 /// </summary>
 /// <param name="unbalancedPoint">Point prior to balancing.</param>
 /// <param name="bitsPerDimension">Number of bits per dimension to use in forming the Hilbert position,
 /// which may be lower than the number of bits required to faithfully represent all coordinate values,
 /// causing the coordinate values of all coordinates to be reduced in precision.</param>
 /// <param name="perm">Permutation to apply to coordinates, scrambling their order in a consistent way for all points.</param>
 /// <returns>The Hilbert position.</returns>
 public BigInteger ToHilbertPosition(UnsignedPoint unbalancedPoint, int bitsPerDimension, Permutation <uint> perm = null)
 {
     uint[] balancedCoordinates;
     if (perm == null)
     {
         balancedCoordinates = Balance(unbalancedPoint.Coordinates, bitsPerDimension);
         return(balancedCoordinates.HilbertIndex(bitsPerDimension));
     }
     else
     {
         balancedCoordinates = Balance(unbalancedPoint.Coordinates, bitsPerDimension);
         var permutedCoordinates = perm.ApplyToArray(balancedCoordinates);
         return(permutedCoordinates.HilbertIndex(bitsPerDimension));
     }
 }
Esempio n. 22
0
        public void SparseToUnsignedMeasureWhereMissingValueIsPositive()
        {
            var sparseData1 = new Dictionary <int, uint>
            {
                [5]  = 1,
                [7]  = 2,
                [10] = 3,
                [15] = 4
            };
            var missingValue           = 1U;
            var p1                     = new SparsePoint(sparseData1, 20, missingValue);
            var p2                     = new UnsignedPoint(new[] { 0, 0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0 });
            var actualSquareDistance   = p1.Measure(p2);
            var expectedSquareDistance = 42L; // 46 + 30 - 2(4+6+6+1)

            Assert.AreEqual(expectedSquareDistance, actualSquareDistance, $"Sparse-to-unsigned Distances with MissingValue={missingValue} do not match.");
        }
Esempio n. 23
0
        public ClusterRadius(IList <UnsignedPoint> points)
        {
            Centroid = UnsignedPoint.Centroid(points);
            var radiusSum = 0.0;

            MaximumRadius = 0;
            foreach (var point in points)
            {
                var distance = Centroid.Distance(point);
                MaximumRadius = Math.Max(MaximumRadius, distance);
                radiusSum    += distance;
            }
            if (points.Count > 0)
            {
                MeanRadius = radiusSum / points.Count;
            }
        }
        /// <summary>
        /// Merge the clusters containing two points if the distance separating them does not exceed MergeSquareDistance.
        /// The points given here may be HilbertPoints frmo a HilbertIndex or UnsignedPoints already present in the Classification.
        /// In case of the former, a lookup is performed based on the id to find the proper UnsignedPoint corresponding to the HilbertPoint.
        /// </summary>
        /// <returns><c>true</c>, if a new merge performed, <c>false</c> if too far to merge or already merged.</returns>
        /// <param name="p1">First point to compare.</param>
        /// <param name="p2">Second point.</param>
        /// <param name="maxSquareDistance">If a positive value, use this as the maximum distance permitted between points.
        /// Otherwise, use MergeSquareDistance.</param>
        private bool MergeIfNear(UnsignedPoint p1, UnsignedPoint p2, long maxSquareDistance = 0)
        {
            var p1InClusters = IdsToPoints[p1.UniqueId];
            var p2InClusters = IdsToPoints[p2.UniqueId];

            maxSquareDistance = (maxSquareDistance <= 0) ? MergeSquareDistance : maxSquareDistance;
            if (p1InClusters.SquareDistanceCompare(p2InClusters, maxSquareDistance) <= 0)
            {
                var c1 = Clusters.GetClassLabel(p1InClusters);
                var c2 = Clusters.GetClassLabel(p2InClusters);
                return(Clusters.Merge(c1, c2));
            }
            else
            {
                return(false);
            }
        }
Esempio n. 25
0
        /// <summary>
        /// Unioning the results of several different indices, find the composite accuracy of using them all
        /// in combination to find the nearest neighbors.
        /// </summary>
        /// <param name="indices">Indices.</param>
        /// <param name="point">Point whos enearest neighbors are sought.</param>
        /// <param name="k">Number of nearest neighbors who are sought.</param>
        /// <param name="rangeLength">Number of points to draw from each index.</param>
        /// <returns>A value from zero to 1.0, where 1.0 means perfectly accurate.</returns>
        public static double CompositeAccuracy(IList <HilbertOrderedIndex> indices, UnsignedPoint point, int k, int rangeLength)
        {
            // Note the tricky use of Equivalent. The points from different indices should not be directly compared,
            // so we need to map a point from the first index to the equivalent point in another, then map back
            // for the final tally.
            var allNeighbors = new HashSet <UnsignedPoint>();
            var firstIndex   = indices[0];

            allNeighbors.UnionWith(firstIndex.Nearest(firstIndex.Equivalent(point), k));
            var fromRange = new HashSet <UnsignedPoint>();

            fromRange.UnionWith(
                indices.SelectMany(i => i.NearestFromRange(i.Equivalent(point), rangeLength)
                                   .Where(p => allNeighbors.Contains(firstIndex.Equivalent(p))))
                );
            return(fromRange.Count() / (double)k);
        }
Esempio n. 26
0
        /// <summary>
        /// Create two random clusters that may be separated from one another by enough distance
        /// that they do not overlap, or be partly overlapping, or fully overlapping.
        ///
        /// NOTE: This type of setup is to test divisive clustering, that divides two partly mixed gaussians.
        /// </summary>
        /// <param name="overlapPercent">A number from zero to 100.
        /// If zero, the clusters do not overlap at all.
        /// If fifty, then the clusters partly overlap.
        /// If 100, the clusters have the same center, so are indistinguishable.</param>
        /// <returns>The two clusters.</returns>
        public Classification <UnsignedPoint, string> TwoClusters(double overlapPercent)
        {
            var clusters = new Classification <UnsignedPoint, string>();

            r = new FastRandom();
            var farthestDistanceFromClusterCenter = 0.0;

            var minDistance     = EllipsoidalGenerator.MinimumSeparation(MaxDistanceStdDev, Dimensions);
            var centerGenerator = new DiffuseGenerator(Dimensions, minDistance)
            {
                // Keep the centers of the clusters away from the edge, so that points do not go out of bounds and have their coordinates truncated.
                // Keep the maximum coordinate farther away, because we will pick the second point by shifting one coordinate
                // in the higher direction.
                Minimum = MaxDistanceStdDev,
                Maximum = MaxCoordinate - MaxDistanceStdDev - (int)minDistance
            };
            var iCluster       = 0;
            var clusterCenter1 = centerGenerator.Take(1).FirstOrDefault();
            var clusterCenter2 = (int[])clusterCenter1.Clone();

            clusterCenter2[0] += (int)(minDistance * (100.0 - overlapPercent) / 100.0);
            var centers = new[] { clusterCenter1, clusterCenter2 };

            foreach (var clusterCenter in centers)
            {
                var centerPoint    = new UnsignedPoint(clusterCenter);
                var clusterSize    = r.Next(MinClusterSize, MaxClusterSize);
                var pointGenerator = new EllipsoidalGenerator(clusterCenter, RandomDoubles(Dimensions, MinDistanceStdDev, MaxDistanceStdDev, r), Dimensions);
                var clusterId      = iCluster.ToString();
                foreach (var iPoint in Enumerable.Range(1, clusterSize))
                {
                    UnsignedPoint p;
                    clusters.Add(
                        p = new UnsignedPoint(pointGenerator.Generate(new int[Dimensions])),
                        clusterId
                        );
                    var distance = Math.Sqrt(centerPoint.Measure(p));
                    farthestDistanceFromClusterCenter = Math.Max(farthestDistanceFromClusterCenter, distance);
                }
                iCluster++;
            }
            //TODO: Go back and recluster the points. Put each point into the cluster whose centroid
            //      it is nearest. Thus, if two clusters partly overlap, the points from one will be pushed into the other.
            return(clusters);
        }
        /// <summary>
        /// Merge the clusters to which the two points belong, if their sizes permit.
        ///
        /// No more than one of the clusters may have a size greater than or equal to UnmergeableSize.
        /// </summary>
        /// <param name="p1">Point belonging to first cluster to merge.</param>
        /// <param name="p2">Point belonging to second cluster to merge.</param>
        /// <param name="forceMerge">If true and UnmergeableSize is the sole obstacle to the merge, perform the merge anyways.
        /// If false, honor UnmergeableSize.</param>
        /// <returns>True if the merge was performed successfully, false otherwise.</returns>
        private bool Merge(UnsignedPoint p1, UnsignedPoint p2, bool forceMerge = false)
        {
            var category1 = Clusters.GetClassLabel(p1);
            var category2 = Clusters.GetClassLabel(p2);

            if (category1.Equals(category2))
            {
                return(false);
            }
            var size1 = Clusters.PointsInClass(category1).Count;
            var size2 = Clusters.PointsInClass(category2).Count;

            if (size1 >= UnmergeableSize && size2 >= UnmergeableSize && !forceMerge)
            {
                return(false);
            }
            return(Clusters.Merge(category1, category2));
        }
        public void MeasureDistanceSquared()
        {
            const int dims = 100;
            var       p1   = new uint[dims];
            var       p2   = new uint[dims];
            var       expectedSquareDistance = 0L;

            for (var i = 0; i < dims; i++)
            {
                p1[i] = (uint)(i % 37) * 10;
                p2[i] = (uint)(i % 18) * 17;
                long delta = (long)p1[i] - (long)p2[i];
                expectedSquareDistance += delta * delta;
            }
            var up1 = new UnsignedPoint(p1);
            var up2 = new UnsignedPoint(p2);
            var actualSquareDistance = up1.Measure(up2);

            Assert.AreEqual(expectedSquareDistance, actualSquareDistance, "Distances do not match");
        }
        /// <summary>
        /// Finds exactly the two closest points (one of each color) and their square distance
        /// using an exhaustive algorithm that compares the distances of every point in one cluster
        /// to every point in the other.
        ///
        /// This compares points in two of the clusters, ignoring points in all other clusters.
        /// </summary>
        /// <param name="color1">Label of the first cluster.</param>
        /// <param name="color2">Label of the second cluster.</param>
        /// <returns>The point with Color1, the point with Color2 and the square distance between them.</returns>
        public ClosestPair FindPairExhaustively(TLabel color1, TLabel color2)
        {
            var           shortestDistance = long.MaxValue;
            UnsignedPoint p1Shortest       = null;
            UnsignedPoint p2Shortest       = null;

            foreach (var p1 in Clusters.PointsInClass(color1))
            {
                foreach (var p2 in Clusters.PointsInClass(color2))
                {
                    var d = p1.Measure(p2);
                    if (d < shortestDistance)
                    {
                        shortestDistance = d;
                        p1Shortest       = p1;
                        p2Shortest       = p2;
                    }
                }
            }
            return(new ClosestPair(color1, p1Shortest, color2, p2Shortest, shortestDistance).Swap(color1));
        }
        /// <summary>
        /// UnsignedPoint.SquareDistanceCompare has an optimization.
        /// This tests if an extension of that optimization can be used in a given case.
        /// </summary>
        /// <returns><c>true</c>, if distance optimization is usable, <c>false</c> otherwise.</returns>
        /// <param name="p1">First point to compare.</param>
        /// <param name="p2">Second point to compare.</param>
        /// <param name="squareDistance">Test if the distance between the points is less than, equal to or greater than this given distance.</param>
        /// <param name="bitsPerDimension">Number of bits needed to represent the larges value of any coordinate of any point.</param>
        private bool IsExtendedDistanceOptimizationUsable(UnsignedPoint p1, UnsignedPoint p2, long squareDistance, int bitsPerDimension)
        {
            if (IsDistanceOptimizationUsable(p1, p2, squareDistance))
            {
                return(true);
            }

            var maxCoordinate     = (1 << bitsPerDimension) - 1;
            var cornerSqDistance1 = 0L;
            var cornerSqDistance2 = 0L;

            for (var d = 0; d < p1.Dimensions; d++)
            {
                long delta;
                delta              = maxCoordinate - p1[d];
                cornerSqDistance1 += delta * delta;
                delta              = maxCoordinate - p2[d];
                cornerSqDistance2 += delta * delta;
            }
            var cornerDistance1 = Math.Sqrt(cornerSqDistance1);
            var cornerDistance2 = Math.Sqrt(cornerSqDistance2);

            var delta2 = cornerDistance1 - cornerDistance2;
            var low    = (long)Math.Floor(delta2 * delta2);

            if (squareDistance < low)
            {
                return(true);
            }

            var high = cornerSqDistance1 + cornerSqDistance2;

            if (squareDistance > high)
            {
                return(true);
            }

            return(false);
        }