Exemple #1
0
        public static Tuple <string, BigInteger, int> commandLine_HI_initIndex(string[] command_line, int index)
        {
            int   bpd    = FindBitsPerDimension(6);
            float SCALAR = (float)Math.Pow(10, 3);
            var   coords = parseCommandLine(command_line);

            int[] scaledIntCoordinates = new int[command_line.Length];

            for (int i = 0; i < coords.Length; i++)
            {
                float floatCoordinate  = (float)Double.Parse(coords[i], System.Globalization.NumberStyles.Float);
                float scaledCoordinate = floatCoordinate * SCALAR;
                int   coordinate       = (int)scaledCoordinate;
                scaledIntCoordinates[i] = coordinate;
            }

            string[] coordinatesWithAppendedHilbertIndex = new string[command_line.Length + 1];
            for (int i = 0; i < command_line.Length; i++)
            {
                coordinatesWithAppendedHilbertIndex[i] = command_line[i];
            }
            HilbertPoint hilbertPoint = new HilbertPoint(scaledIntCoordinates, bpd);



            Tuple <string, BigInteger, int> line_HI_index = new Tuple <string, BigInteger, int>(lineToString(command_line), hilbertPoint.HilbertIndex, index);

            return(line_HI_index);
        }
        /// <summary>
        /// Lookup a point by its id.
        /// </summary>
        /// <returns>The point whose id matches the given id, or null.</returns>
        /// <param name="id">UniqueId of a HilbertPoint.</param>
        public HilbertPoint FindById(int id)
        {
            HilbertPoint p = null;

            IdsToPoints.TryGetValue(id, out p);
            return(p);
        }
Exemple #3
0
        /// <summary>
        /// Count the number of Neighbors this point has the in window to either side of it along the Hilbert curve.
        ///
        /// These neighbors must be no farther away than the NeighborhoodRadius.
        /// These neighbors must be in the window to either side of the given point along the Hilbert curve.
        /// </summary>
        /// <returns>Count of neighbors.</returns>
        /// <param name="point">Point whose neighbors are to be counted.</param>
        public int NeighborsInWindow(HilbertPoint point)
        {
            MeasureWindow();
            var iPoint1 = Index.SortedPosition(point);
            var center  = iPoint1;

            if (center < WindowRadius)
            {
                center = WindowRadius;
            }
            else if (center > Count - WindowRadius - 1)
            {
                center = Count - WindowRadius - 1;
            }

            var start = center - WindowRadius;
            var stop  = center + WindowRadius;

            return(Distances[iPoint1].Keys.Count(i => i >= start && i <= stop));

            // If we wanted all points in the neighborhood, not just in the window, we would do this:
            //   return Distances[iPoint1].Count;
            // Why don't we? Though in many cases it may lead to a mo accurate value for some points,
            // it would worsen the correlation. If most points undercount, and some are accurate, that
            // would be inferior.
        }
        /// <summary>
        /// Compose an enumerable that encompasses a range of points starting at the given point and running for the given length.
        /// If the point is too close to the end of the list in sorted order, fewer items than rangeLength may be returned.
        /// </summary>
        /// <param name="p">Point where range starts.</param>
        /// <param name="rangeLength">Range length.</param>
        public IEnumerable <HilbertPoint> Range(HilbertPoint p, int rangeLength)
        {
            var position   = SortedPosition(p);
            var rangeStart = Math.Min(Math.Max(0, position - rangeLength / 2), Count - rangeLength);

            return(SortedPoints.Skip(rangeStart).Take(rangeLength));
        }
Exemple #5
0
        private void OptimalIndexTestCase(
            int hilbertTries, int minClusterSize, int maxClusterSize, int dimensions, int clusterCount, int acceptableClusterCount,
            int bitsPerDimension, int outlierSize, int noiseSkipBy)
        {
            var data = new GaussianClustering
            {
                ClusterCount   = clusterCount,
                Dimensions     = dimensions,
                MaxCoordinate  = 1000,
                MinClusterSize = minClusterSize,
                MaxClusterSize = maxClusterSize
            };
            var clusters = data.MakeClusters();
            var points   = clusters.Points().Select(p => HilbertPoint.CastOrConvert(p, bitsPerDimension, true)).ToList();
            var results  = OptimalIndex.Search(
                points,
                outlierSize,
                noiseSkipBy,
                hilbertTries,     // maxTrials
                4                 // maxIterationsWithoutImprovement
                );
            var message = $"Estimated cluster count = {results.EstimatedClusterCount}, actual = {clusterCount}, acceptable = {acceptableClusterCount}";

            Console.WriteLine(message);
            Assert.LessOrEqual(results.EstimatedClusterCount, acceptableClusterCount, $"HilbertIndex fragmented by more than 50%: {message}");
        }
        /// <summary>
        /// Find how accurate NearestFromRange is when searching for the neighbors of a single given reference point.
        /// This finds the true K-nearest neighbors of the reference point (using Nearest)
        /// and the approximate K-nearest neighbors using the Hilbert index,
        /// then compare how accurate the Hilbert index was.
        /// </summary>
        /// <param name="point">Reference point whose neighbors are sought.</param>
        /// <param name="k">Number of nearest neighbors sought.</param>
        /// <param name="rangeLength">Number of points in the Hilbert index to sample.</param>
        /// <returns>A value from zero to 1.0, where 1.0 means perfectly accurate.</returns>
        public double Accuracy(HilbertPoint point, int k, int rangeLength)
        {
            var allNeighbors = new HashSet <HilbertPoint>();

            allNeighbors.UnionWith(Nearest(point, k));
            var matches = NearestFromRange(point, rangeLength).Count(allNeighbors.Contains);

            return(matches / (double)k);
        }
Exemple #7
0
        public void AllColorPairsClosestClusterTest(int nPoints, int dimensions, int numClusters, int numCurvesToTry)
        {
            var rankHistogram = new int[numClusters + 1];             // We will skip the first element so as to have a one-based array.
            var data          = new GaussianClustering
            {
                ClusterCount   = numClusters,
                Dimensions     = dimensions,
                MaxCoordinate  = 1000,
                MinClusterSize = nPoints,
                MaxClusterSize = nPoints
            };
            var worstDistanceRatio = 1.0;
            var ratioSum           = 0.0;
            var ratioCount         = 0;
            var clusters           = data.MakeClusters();

            var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo();
            var results          = OptimalIndex
                                   .Search(
                clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(),
                5 /*outlier size */, 10 /* NoiseSkipBy */, 1 /* ReducedNoiseSkipBy */, numCurvesToTry
                );

            var pccp1         = new PolyChromaticClosestPoint <string>(clusters, results.Index);
            var allColorPairs = pccp1.FindAllClustersApproximately();

            foreach (var color1 in clusters.ClassLabels())
            {
                var exact       = pccp1.FindClusterExhaustively(color1).Swap(color1);
                var color1Pairs = allColorPairs
                                  .Where(cp => cp.Color1.Equals(color1) || cp.Color2.Equals(color1))
                                  .Select(cp => cp.Swap(color1))
                                  .ToList();
                var approximateColor2Distance = color1Pairs.First(cp => cp.Color2.Equals(exact.Color2)).SquareDistance;
                var approximateRank           = color1Pairs.Count(cp => cp.SquareDistance < approximateColor2Distance) + 1;
                rankHistogram[approximateRank]++;
#pragma warning disable RECS0018 // Comparison of floating point numbers with equality operator
                var ratio = exact.SquareDistance == 0.0 ? 0 : approximateColor2Distance / (double)exact.SquareDistance;
#pragma warning restore RECS0018 // Comparison of floating point numbers with equality operator
                ratioSum += ratio;
                ratioCount++;
                worstDistanceRatio = Math.Max(worstDistanceRatio, ratio);
            }
            Debug.WriteLine(string.Format("Worst distance overage   = {0:N3}%", (worstDistanceRatio - 1.0) * 100.0));
            Debug.WriteLine(string.Format("Average distance overage = {0:N3}%", ((ratioSum / ratioCount) - 1.0) * 100.0));
            for (var iRank = 1; iRank <= numClusters; iRank++)
            {
                if (rankHistogram[iRank] > 0 || iRank < 4)
                {
                    Debug.WriteLine(string.Format("For {0} Clusters the closest cluster found was Ranked #{1}.", rankHistogram[iRank], iRank));
                }
            }
            // Accept a win, place or show: the true closest cluster shows up as no worse than the 3rd ranked cluster according to the approximate measure.
            Assert.IsTrue(rankHistogram[1] + rankHistogram[2] + rankHistogram[3] == numClusters,
                          string.Format("Found the closest cluster for {0} colors", rankHistogram[1])
                          );
        }
        /// <summary>
        /// Find the points adjacent to the given point in the Hilbert ordering, then sort them by the cartesian distance, from nearest to farthest.
        /// </summary>
        /// <param name="point">Reference point to seek in the index.</param>
        /// <param name="rangeLength">Number of points to retrieve from the index. Half of these points will precede and half succeed the given point
        /// in the index, unless we are near the beginning or end of the index, in which case the range will be shifted.</param>
        /// <param name="includePointItself">If false, the reference point will not be present in the results.
        /// If true, the point will be present in the results.</param>
        /// <returns>The points nearest to the reference point in both Hilbert and Cartesian ordering, sorted from nearest to farthest.</returns>
        public IEnumerable <HilbertPoint> NearestFromRange(HilbertPoint point, int rangeLength, bool includePointItself = false)
        {
            rangeLength = includePointItself ? rangeLength : rangeLength + 1;
            var middlePosition = SortedPosition(point);
            var rangeStart     = Math.Max(0, middlePosition - rangeLength / 2);

            return(SortedPoints
                   .Skip(rangeStart)
                   .Take(rangeLength)
                   .Where(p => includePointItself || !p.Equals(point))
                   .OrderBy(p => p.Measure(point)));
        }
        /// <summary>
        /// Sorts the points according to their position in a Hilbert curve.
        /// </summary>
        private void SortPoints()
        {
            var maxValue         = Clusters.Points().Select(p => p.MaxCoordinate).Max();
            var bitsPerDimension = ((int)maxValue + 1).SmallestPowerOfTwo();
            var index            = new Dictionary <HilbertPoint, UnsignedPoint> ();
            var hPoints          = new List <HilbertPoint> ();

            foreach (UnsignedPoint p in Clusters.Points())
            {
                var hp = new HilbertPoint(p.Coordinates, bitsPerDimension);
                index [hp] = p;
                hPoints.Add(hp);
            }
            hPoints.Sort();
            SortedPoints = hPoints.Select(hp => index [hp]).ToList();
        }
        /// <summary>
        /// Test if two points are adjacent, meaning that only a single coordiante differs between them and
        /// the difference in coordinate value is exactly one.
        /// </summary>
        /// <returns><c>true</c>, if points are adjacent, <c>false</c> otherwise.</returns>
        /// <param name="p1">First point.</param>
        /// <param name="p2">Second point.</param>
        static bool ArePointsAdjacent(HilbertPoint p1, HilbertPoint p2)
        {
            var maxCoordinateDistance    = 0;
            var differentDimensionsCount = 0;

            for (var dim = 0; dim < p1.Dimensions; dim++)
            {
                var diff = Math.Abs(p1[dim] - p2[dim]);
                if (diff != 0)
                {
                    differentDimensionsCount++;
                    maxCoordinateDistance = Math.Max(diff, maxCoordinateDistance);
                }
            }
            return(maxCoordinateDistance == 1 && differentDimensionsCount == 1);
        }
        /// <summary>
        /// Unioning the results of several different indices, find the composite accuracy of using them all
        /// in combination to find the nearest neighbors.
        /// </summary>
        /// <param name="indices">Indices.</param>
        /// <param name="point">Point whos enearest neighbors are sought.</param>
        /// <param name="k">Number of nearest neighbors who are sought.</param>
        /// <param name="rangeLength">Number of points to draw from each index.</param>
        /// <returns>A value from zero to 1.0, where 1.0 means perfectly accurate.</returns>
        public static double CompositeAccuracy(IList <HilbertIndex> indices, HilbertPoint point, int k, int rangeLength)
        {
            // Note the tricky use of Equivalent. The points from different indices should not be directly compared,
            // so we need to map a point from the first index to the equivalent point in another, then map back
            // for the final tally.
            var allNeighbors = new HashSet <HilbertPoint>();
            var firstIndex   = indices[0];

            allNeighbors.UnionWith(firstIndex.Nearest(firstIndex.Equivalent(point), k));
            var fromRange = new HashSet <HilbertPoint>();

            fromRange.UnionWith(
                indices.SelectMany(i => i.NearestFromRange(i.Equivalent(point), rangeLength)
                                   .Where(p => allNeighbors.Contains(firstIndex.Equivalent(p))))
                );
            return(fromRange.Count() / (double)k);
        }
        /// <summary>
        /// Create an index of all the points in a Classification, optionally adding a new dimension to each point to hold
        /// that point's classification index.
        /// </summary>
        /// <param name="clusters">Clusters of points, which could be UnsignedPoints or HilbertPoints.</param>
        /// <param name="bitsPerDimension">Bits per dimension to use when transforming UnsignedPoints into HilbertPoints,
        /// should that be necessary.
        /// If a non-positive number, compute the value by studying the data, using the smallest number capable of accommodating
        /// the largest coordinate values.</param>
        /// <param name="addClassificationDimension">If set to <c>true</c> add a classification dimension to the end of each point.
        /// The value will be the index of that point's cluster. Cluster ordering is arbitrary and dependent on the order that
        /// the set Classification.LabelToPoints.Values iterates over them.</param>
        public HilbertIndex(Classification <UnsignedPoint, string> clusters, int bitsPerDimension = 0, bool addClassificationDimension = false)
        {
            if (bitsPerDimension <= 0)
            {
                bitsPerDimension = FindBitsPerDimension(clusters.Points());
            }

            UnsortedPoints = new List <HilbertPoint>();
            foreach (var clusterWithNumber in clusters.LabelToPoints.Values.Select((c, i) => new { Cluster = c, Index = (uint)i }))
            {
                UnsortedPoints.AddRange(
                    clusterWithNumber.Cluster
                    .Select(p => addClassificationDimension ? p.AppendCoordinate(clusterWithNumber.Index) : p)
                    .Select(p => HilbertPoint.CastOrConvert(p, bitsPerDimension, true))
                    );
            }
            InitIndexing();
        }
Exemple #13
0
        /// <summary>
        /// Count how many neighbors are near the given point, within the NeighborhoodRadius.
        /// </summary>
        /// <param name="point">Point whose neighbors are to be counted.</param>
        /// <param name="allNeighbors">If false, only return the number of neighbors already known due to previous measurements.
        /// If true, make sure we measure the distance from this point to all other points, but still reuse
        /// any already computed distances.</param>
        public int Neighbors(HilbertPoint point, bool allNeighbors = true)
        {
            var iPoint1 = Index.SortedPosition(point);

            if (!AllMeasured[iPoint1] && allNeighbors)
            {
                for (var iPoint2 = 0; iPoint2 < Count; iPoint2++)
                {
                    // If all distances have already been computed for iPoint2,
                    // then we do not need to recompute that paricular distance.
                    if (iPoint1 != iPoint2 && !AllMeasured[iPoint2])
                    {
                        Measure(iPoint1, iPoint2, true);
                    }
                }
                Complete(point);
            }
            return(Distances[iPoint1].Count);
        }
        /// <summary>
        /// Verify the transformation in both directions, from 1-Dimensional index to N-dimensional point and back.
        ///
        /// This evaluates 2^(dims*bits) points, so be careful or the test will run for a long time and consume a lot of memory.
        /// </summary>
        /// <param name="dims">Dimensions for each point.</param>
        /// <param name="bits">Bits per dimension.</param>
        public void AdjacentPointsCase(int dims, int bits)
        {
            var points = new HilbertPoint[1 << (bits * dims)];

            for (var i = 0; i < points.Length; i++)
            {
                var hilbertIndex = new BigInteger(i);
                points[i] = new HilbertPoint(hilbertIndex, dims, bits);
                if (i > 0)
                {
                    var p1 = points[i - 1];
                    var p2 = points[i];
                    Assert.IsTrue(ArePointsAdjacent(p1, p2),
                                  string.Format("Points {0} and {1}",
                                                FormatPoint(p1), FormatPoint(p2)));
                }
                AssertPointMapsToHilbertIndex(points[i].Coordinates, hilbertIndex, dims, bits);
            }
        }
Exemple #15
0
 /// <summary>
 /// Estimates the density of points local to the given point.
 /// </summary>
 /// <returns>The density.</returns>
 /// <param name="point">Point whose local density is sought.</param>
 /// <param name="windowRadius">The distance to twice this many points will be measured,
 /// half to the left and half to the right of the given point along the Hilbert curve.</param>
 public long EstimatedDensity(HilbertPoint point, int windowRadius)
 {
     if (Estimator != null)
     {
         var windowSize = Math.Min(Count, 2 * windowRadius + 1);
         var iPoint1    = Index.SortedPosition(point);
         var start      = Math.Max(0, iPoint1 - windowRadius);
         start = Math.Min(start, Count - windowSize);
         return(Estimator(
                    Enumerable
                    .Range(start, windowSize)
                    .Where(i => i != iPoint1)
                    .Select(iPoint2 => Distances.Measure(iPoint1, iPoint2, false))));
     }
     else
     {
         // If our windowRadius and the one used by Distances are not the same, adjust the memo.
         if (Distances.WindowRadius != windowRadius)
         {
             Distances.WindowRadius = windowRadius;
         }
         return(Distances.NeighborsInWindow(point));
     }
 }
        public void CartesianToHilbert_Dim2Bits2()
        {
            var bits = 2;
            var size = 1 << bits;
            var sb   = new StringBuilder();

            for (var row = 0; row < size; row++)
            {
                for (var column = 0; column < size; column++)
                {
                    var cartesianPoint = new int[] { row, column };
                    var hilbertPoint   = new HilbertPoint(cartesianPoint, bits);
                    var hilbertIndex   = hilbertPoint.HilbertIndex;
                    sb.Append("Cart = [")
                    .Append(string.Join(",", cartesianPoint))
                    .Append("] Hilbert = ")
                    .Append(hilbertIndex.ToString())
                    .AppendLine();
                }
            }
            var diagnostic = sb.ToString();

            Console.WriteLine(diagnostic);
        }
Exemple #17
0
 /// <summary>
 /// Measure the square distance between the specified points, possibly reusing a memoized value.
 /// </summary>
 /// <param name="point1">Point1.</param>
 /// <param name="point2">Point2.</param>
 /// <param name="limitToNeighborhood">If false, return the correct square distance in all cases, and record its value
 /// if it does not exceed the NeighborhoodRadius and has not yet been recorded.
 /// If true and the distance has already been measured and recorded, return the recorded (and correct) square distance.
 /// If true and the distance has not yet been measured and AllMeasured is not set for either point,
 /// compute and return the proper square distance and record it if it does not exceed the NeighborhoodRadius.
 /// Otherwise, AllMeasured is true for one of the points and the value was not recorded because it exceeds NeighborhoodRadius,
 /// therefore return long.MaxValue.
 /// </param>
 public long Measure(HilbertPoint point1, HilbertPoint point2, bool limitToNeighborhood = false)
 {
     return(Measure(Index.SortedPosition(point1), Index.SortedPosition(point2), limitToNeighborhood));
 }
 private static int FindBitsPerDimension(IEnumerable <UnsignedPoint> points)
 {
     return(HilbertPoint.FindBitsPerDimension((int)points.Select(p => p.MaxCoordinate).Max()));
 }
 /// <summary>
 /// If two indices were composed from the same points but with their coordinates differently permuted,
 /// the corresponding points retain the same UniqueId (which isn't so unique after all).
 /// This will look up the corresponding point in this index of a point frmo another index.
 /// </summary>
 /// <param name="p">P.</param>
 public HilbertPoint Equivalent(HilbertPoint p)
 {
     return(FindById(p.UniqueId));
 }
 /// <summary>
 /// Pretty print a HilbertPoint.
 /// </summary>
 /// <returns>Formatted point.</returns>
 /// <param name="p">Point to pretty print.</param>
 static string FormatPoint(HilbertPoint p)
 {
     return(string.Format("Index: {0} Coords: [{1}]", p.HilbertIndex, string.Join(",", p.Coordinates)));
 }
Exemple #21
0
 /// <summary>
 /// Count exactly the number of points that are near the given point, that is, have a square distance
 /// that does not exceed NeighborhoodRadius.
 ///
 /// This may require the comparison of the point to every other point, which is expensive.
 /// However, if this is performed for several points, many of the distance computations will be reused.
 ///
 /// This value can be used as a density for density-based clustering.
 /// </summary>
 /// <returns>The count of neighbors.</returns>
 /// <param name="point">Point whose neighbors we must count.</param>
 public int ExactNeighbors(HilbertPoint point)
 {
     return(Distances.Neighbors(point, true));
 }
 /// <summary>
 /// Get the zero-based position of the point in UnsortedPoints.
 /// </summary>
 /// <param name="p">Point to lookup.</param>
 /// <returns>A zero-based position into the UnsortedPoints list.</returns>
 public int UnsortedPosition(HilbertPoint p)
 {
     return(Index[p].Original);
 }
Exemple #23
0
        public void DistanceDistribution()
        {
            /*
             *      Percentile,By Index,By Random
             *      -----------------------------
             *      0%,111.35,146.55
             *      1%,142.06,255.96
             *      2%,147.21,2163.43
             *      3%,151.2,2214.15
             *      4%,154.06,2245.2
             *      5%,156.24,2271.37
             *      6%,158.38,2292.29
             *      7%,160.42,2313.55
             *      8%,162.29,2327.14
             *      9%,164.07,2345.25
             *      10%,165.41,2359.95
             *      11%,166.72,2372.83
             *      12%,167.99,2386.15
             *      13%,169.29,2398.47
             *      14%,170.43,2410.01
             *      15%,171.53,2422.34
             *      16%,172.48,2432.43
             *      17%,173.58,2443.08
             *      18%,174.73,2454.27
             *      19%,175.56,2463.71
             *      20%,176.35,2472.97
             *      21%,177.35,2483.24
             *      22%,178.3,2491.9
             *      23%,179.1,2501.44
             *      24%,179.82,2510.26
             *      25%,180.64,2517.73
             *      26%,181.55,2524.97
             *      27%,182.33,2531.58
             *      28%,182.98,2538.08
             *      29%,183.67,2543.83
             *      30%,184.33,2550.93
             *      31%,185.09,2556.59
             *      32%,185.7,2563.37
             *      33%,186.41,2570.29
             *      34%,187.09,2577.29
             *      35%,187.7,2583.56
             *      36%,188.43,2589.95
             *      37%,189.07,2596.13
             *      38%,189.71,2602.24
             *      39%,190.46,2608.28
             *      40%,191.08,2615.25
             *      41%,191.79,2620.81
             *      42%,192.46,2626.02
             *      43%,193.09,2632.7
             *      44%,193.71,2638.18
             *      45%,194.31,2643.35
             *      46%,194.98,2648.69
             *      47%,195.65,2655.47
             *      48%,196.3,2660.26
             *      49%,196.96,2666.37
             *      50%,197.66,2670.94
             *      51%,198.34,2677.09
             *      52%,199.07,2681.9
             *      53%,199.72,2687.11
             *      54%,200.3,2692.42
             *      55%,201.06,2697.92
             *      56%,201.71,2703.76
             *      57%,202.4,2710.17
             *      58%,203.16,2715.06
             *      59%,203.82,2720.25
             *      60%,204.51,2725.99
             *      61%,205.32,2731.6
             *      62%,206.08,2736.59
             *      63%,206.79,2741.72
             *      64%,207.58,2746.59
             *      65%,208.29,2754.03
             *      66%,209.07,2760.81
             *      67%,209.8,2766.65
             *      68%,210.68,2771.98
             *      69%,211.71,2778.27
             *      70%,212.38,2784.23
             *      71%,213.19,2790.71
             *      72%,213.92,2796.42
             *      73%,214.82,2802.84
             *      74%,215.68,2809.36
             *      75%,216.54,2814.55
             *      76%,217.48,2821.32
             *      77%,218.43,2827.56
             *      78%,219.35,2833.35
             *      79%,220.28,2840.72
             *      80%,221.33,2848.87
             *      81%,222.31,2856.89
             *      82%,223.42,2864
             *      83%,224.46,2872.51
             *      84%,225.83,2881.09
             *      85%,227.06,2891.57
             *      86%,228.27,2900.46
             *      87%,229.63,2910.46
             *      88%,231.55,2919.5
             *      89%,233.59,2933.76
             *      90%,235.6,2944.88
             *      91%,237.25,2959.45
             *      92%,239.83,2976.08
             *      93%,241.88,2990.4
             *      94%,244.97,3010.08
             *      95%,248.23,3029.15
             *      96%,252.34,3052.37
             *      97%,260.68,3074.84
             *      98%,282.76,3112.43      *** Note the jump from 282 to 2550, which shows that the characteristic distance is about 282.
             *      99%,2550.87,3170.93
             *      100%,3114.89,3412.57
             */
            var data = new GaussianClustering
            {
                ClusterCount   = 100,
                Dimensions     = 50,
                MaxCoordinate  = 1000,
                MinClusterSize = 50,
                MaxClusterSize = 150
            };
            var clusters         = data.MakeClusters();
            var bitsPerDimension = 10;
            var points           = clusters.Points().Select(p => HilbertPoint.CastOrConvert(p, bitsPerDimension, true)).ToList();
            var results          = OptimalIndex.Search(
                points,
                5,                     // outlierSize
                10,                    // noiseSkipBy
                1000,                  // maxTrials
                4                      // maxIterationsWithoutImprovement
                );
            var pointsFromIndex  = results.Index.SortedPoints;
            var distancesRandom  = new List <long>();
            var distancesHilbert = new List <long>();
            var n   = pointsFromIndex.Count;
            var rng = new FastRandom();

            for (var i = 0; i < n - 1; i++)
            {
                var p1 = pointsFromIndex[i];
                var p2 = pointsFromIndex[i + 1];
                distancesHilbert.Add(p1.Measure(p2));

                var p3 = pointsFromIndex[rng.Next(n)];
                var p4 = pointsFromIndex[rng.Next(n)];
                distancesRandom.Add(p3.Measure(p4));
            }
            distancesHilbert.Sort();
            distancesRandom.Sort();
            Console.WriteLine("Percentile,By Index,By Random");
            for (var percentile = 0; percentile <= 100; percentile++)
            {
                var i           = Math.Min(n - 2, (n - 1) * percentile / 100);
                var distHilbert = Math.Round(Math.Sqrt(distancesHilbert[i]), 2);
                var distRandom  = Math.Round(Math.Sqrt(distancesRandom[i]), 2);
                Console.Write($"{percentile}%,{distHilbert},{distRandom}");
            }
        }
 /// <summary>
 /// Find the K-nearest neighbors of a given point according to the cartesian distance between the point and its neighbors.
 ///
 /// NOTE: This compares the point to all other points, hence is more costly than NearestFromRange but is guaranteed
 /// to find all near neighbors.
 /// </summary>
 /// <param name="point">Reference point whose neighbors are sought.</param>
 /// <param name="k">Number of nearest neighbors to find.</param>
 /// <param name="includePointItself">If false, the point is not considered its own neighbor and will not be present in the results.
 /// If true, the point is considered its own neighbor and will be present in the results,
 /// unless all the nearest neighbors are zero distance from this point, in which case it might not make the cut.</param>
 /// <returns>The nearest neighbors of the given point, sorted from nearest to farthest.</returns>
 public IEnumerable <HilbertPoint> Nearest(HilbertPoint point, int k, bool includePointItself = false)
 {
     return(SortedPoints
            .Where(p => includePointItself || !p.Equals(point))
            .BottomN <HilbertPoint, long>(point, k));
 }
Exemple #25
0
 /// <summary>
 /// Mark a point as being AllMeasured, meaning that we have measured the distance from that point to all other points
 /// and recorded the smaller distances of interest.
 /// </summary>
 /// <param name="point">Point.</param>
 public void Complete(HilbertPoint point)
 {
     AllMeasured[Index.SortedPosition(point)] = true;
 }
Exemple #26
0
        public void ClosestOfFiftyClusters()
        {
            int hilbertTries           = 1000;
            var correctColorCount      = 0;
            var correctCrosscheckCount = 0;
            var correctDistanceCount   = 0;
            var nPoints      = 100;
            var dimensions   = 100;
            var clusterCount = 50;
            var data         = new GaussianClustering
            {
                ClusterCount   = clusterCount,
                Dimensions     = dimensions,
                MaxCoordinate  = 1000,
                MinClusterSize = nPoints,
                MaxClusterSize = nPoints
            };

            var closestExact = new PolyChromaticClosestPoint <string> .ClosestPair();

            var closestApproximate = new PolyChromaticClosestPoint <string> .ClosestPair();

            var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo();

            var clusters = data.MakeClusters();

            Assert.AreEqual(clusterCount, clusters.NumPartitions, "Test data are grouped into fewer clusters than requested.");

            PolyChromaticClosestPoint <string> pccp;

            if (hilbertTries <= 1)
            {
                pccp = new PolyChromaticClosestPoint <string>(clusters);
            }
            else
            {
                var reducedNoiseSkipBy = 1;
                var results            = OptimalIndex.Search(
                    clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(),
                    5 /*outlier size */, 10 /* NoiseSkipBy */, reducedNoiseSkipBy, hilbertTries
                    );
                pccp = new PolyChromaticClosestPoint <string>(clusters, results.Index);
            }
            foreach (var color in pccp.Clusters.ClassLabels())
            {
                var exact       = pccp.FindClusterExhaustively(color);
                var approximate = pccp.FindClusterApproximately(color);
                var crosscheck  = pccp.FindClusterIteratively(color);

                if (exact.SquareDistance >= approximate.SquareDistance)
                {
                    correctDistanceCount++;
                }

                if (exact.Color2.Equals(approximate.Color2))
                {
                    correctColorCount++;
                }

                if (exact.Color2.Equals(crosscheck.Color2))
                {
                    correctCrosscheckCount++;
                }

                if (exact.SquareDistance < closestExact.SquareDistance)
                {
                    closestExact = exact;
                }

                if (approximate.SquareDistance < closestApproximate.SquareDistance)
                {
                    closestApproximate = approximate;
                }

                var ratio = approximate.SquareDistance / (double)exact.SquareDistance;
                Console.WriteLine(string.Format("Exact {0} vs Approx. {1} vs Cross {2}. Over by {3:N3}%", exact, approximate, crosscheck, (ratio - 1.0) * 100.0));
            }

            if (closestExact.SquareDistance >= closestApproximate.SquareDistance)
            {
                Console.WriteLine("DID FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate);
            }
            else
            {
                Console.WriteLine("DID NOT FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate);
            }

            Assert.IsTrue(correctColorCount == clusterCount && correctDistanceCount == clusterCount,
                          string.Format("Of {0} clusters, only {1} searches found the closest cluster and {2} found the shortest distance. Crosscheck = {3}",
                                        clusterCount,
                                        correctColorCount,
                                        correctDistanceCount,
                                        correctCrosscheckCount
                                        )
                          );
        }
 /// <summary>
 /// Get the zero-based position of the point in SortedPoints.
 /// </summary>
 /// <param name="p">Point to lookup.</param>
 /// <returns>A zero-based position into the SortedPoints list.</returns>
 public int SortedPosition(HilbertPoint p)
 {
     return(Index[p].Sorted);
 }
Exemple #28
0
        /// <summary>
        /// A test case for PolyChromaticClosestPoint.FindPairApproximately where clusters conform to a Gaussian distribution.
        /// </summary>
        /// <param name="nPoints">Number of points in each cluster.</param>
        /// <param name="dimensions">Number of Dimensions in each point.</param>
        /// <param name="numClusters">Number of clusters to create.</param>
        /// <param name="hilbertsToTry">Number of randomly generated Hilbert curves to try.</param>
        public void GaussianPolyChromaticPairTestCase(int nPoints, int dimensions, int numClusters, int hilbertsToTry = 1)
        {
            var successes  = 0;
            var worstRatio = 1.0;
            var color1     = "0";

            var data = new GaussianClustering
            {
                ClusterCount   = numClusters,
                Dimensions     = dimensions,
                MaxCoordinate  = 1000,
                MinClusterSize = nPoints,
                MaxClusterSize = nPoints
            };
            var clusters = data.MakeClusters();
            PolyChromaticClosestPoint <string> pccp;

            if (hilbertsToTry <= 1)
            {
                pccp = new PolyChromaticClosestPoint <string>(clusters);
            }
            else
            {
                var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo();
                var results          = OptimalIndex.Search(
                    clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(),
                    5 /*outlier size */, 10 /* NoiseSkipBy */, 1 /* ReducedNoiseSkipBy */, hilbertsToTry
                    );
                pccp = new PolyChromaticClosestPoint <string>(clusters, results.Index);
            }
            for (var iColor2 = 1; iColor2 < numClusters; iColor2++)
            {
                var color2 = iColor2.ToString();

                var exact       = pccp.FindPairExhaustively(color1, color2);
                var approximate = pccp.FindPairApproximately(color1, color2);

                var expectedDistance = exact.SquareDistance;
                var actualDistance   = approximate.SquareDistance;

                if (actualDistance <= expectedDistance)
                {
                    successes++;
                }
                else
                {
                    worstRatio = Math.Max(worstRatio, actualDistance / (double)expectedDistance);
                }

                if (exact.SquareDistance >= approximate.SquareDistance)
                {
                    Console.WriteLine("FindPairApproximately CORRECT.   Exact {0}. Approx {1}", exact, approximate);
                }
                else
                {
                    Console.WriteLine("FindPairApproximately INCORRECT. Exact {0}. Approx {1}. Too high by {2:N3}%",
                                      exact, approximate, 100.0 * (approximate.SquareDistance / (double)exact.SquareDistance - 1.0));
                }
            }

            Assert.AreEqual(numClusters - 1, successes,
                            string.Format("Did not succeed every time. Failed {0} of {1} times. Worst distance ratio is {2:N4}. {3} points of {4} dimensions.",
                                          numClusters - successes - 1,
                                          numClusters - 1,
                                          worstRatio,
                                          nPoints,
                                          dimensions
                                          )
                            );
        }
Exemple #29
0
 public static int FindBitsPerDimension(IReadOnlyList <UnsignedPoint> points)
 {
     return(HilbertPoint.FindBitsPerDimension((int)points.Select(p => p.MaxCoordinate).Max()));
 }
Exemple #30
0
        public void ClosestClusterTest(int nPoints, int dimensions, int numClusters, int numCurvesToTry, int numCurvesToKeep)
        {
            var correctColorCount    = 0;
            var correctDistanceCount = 0;
            var data = new GaussianClustering
            {
                ClusterCount   = numClusters,
                Dimensions     = dimensions,
                MaxCoordinate  = 1000,
                MinClusterSize = nPoints,
                MaxClusterSize = nPoints
            };

            var closestExact = new PolyChromaticClosestPoint <string> .ClosestPair();

            var closestApproximate = new PolyChromaticClosestPoint <string> .ClosestPair();

            var clusters = data.MakeClusters();
            var pccps    = new List <PolyChromaticClosestPoint <string> >();

            var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo();

            var bestIndices = OptimalIndex.SearchMany(
                clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(),
                numCurvesToKeep,
                5 /*outlier size */, 10 /* NoiseSkipBy */, 1 /* ReducedNoiseSkipBy */, numCurvesToTry
                );

            //var pointLists = bestIndices.Select(result => result.Index.SortedPoints).ToList();
            //foreach (var pList in pointLists)
            //	pccps.Add(new PolyChromaticClosestPoint<string>(clusters, pList));

            var indices = bestIndices.Select(result => result.Index).ToList();

            foreach (var index in indices)
            {
                pccps.Add(new PolyChromaticClosestPoint <string>(clusters, index));
            }

            var pccp1 = pccps[0];

            foreach (var color in pccp1.Clusters.ClassLabels())
            {
                var exact       = pccp1.FindClusterExhaustively(color);
                var approximate = pccps.Select(pccp => pccp.FindClusterApproximately(color)).OrderBy(cp => cp).First();

                if (exact.SquareDistance >= approximate.SquareDistance)
                {
                    correctDistanceCount++;
                }

                if (exact.Color2.Equals(approximate.Color2))
                {
                    correctColorCount++;
                }

                if (exact.SquareDistance < closestExact.SquareDistance)
                {
                    closestExact = exact;
                }

                if (approximate.SquareDistance < closestApproximate.SquareDistance)
                {
                    closestApproximate = approximate;
                }

                var ratio = approximate.SquareDistance / (double)exact.SquareDistance;
                Console.WriteLine(string.Format("Exact {0} vs Approx. {1}. Over by {2:N3}%", exact, approximate, (ratio - 1.0) * 100.0));
            }

            if (closestExact.SquareDistance >= closestApproximate.SquareDistance)
            {
                Console.WriteLine("DID FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate);
            }
            else
            {
                Console.WriteLine("DID NOT FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate);
            }

            Assert.IsTrue(correctColorCount == numClusters && correctDistanceCount == numClusters,
                          string.Format("Of {0} clusters, only {1} searches found the closest cluster and {2} found the shortest distance.",
                                        numClusters,
                                        correctColorCount,
                                        correctDistanceCount
                                        )
                          );
        }