/// <summary> /// Compare the exact coarseness with an estimate for all numbers of bits. /// /// This takes an assemblage of many clusters and finds the most concentrated /// cluster according to a single bit Hilbert curve. /// Then it composes a GridCoarseness for the points in that cluster. /// </summary> /// <param name="numPoints">Number of points</param> /// <param name="dimensions">Number of dimensions</param> /// <param name="clusterCount">Number of clusters</param> /// <param name="maxCoordinate">Larges value any cooedinate of any dimension can hold</param> /// <param name="maxStdDeviation">Maximum standard deviation among coordinate values relative to the center of each Gaussian cluster generated.</param> /// <param name="minStdDeviation">Maximum standard deviation among coordinate values relative to the center of each Gaussian cluster generated.</param> /// <returns>The GridCoarseness.</returns> GridCoarseness MakeTestGrid(int numPoints, int dimensions, int clusterCount, int maxCoordinate, int minStdDeviation = 10, int maxStdDeviation = 30) { var avgClusterSize = numPoints / clusterCount; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = avgClusterSize - 100, MaxClusterSize = avgClusterSize + 100, MaxDistanceStdDev = maxStdDeviation, MinDistanceStdDev = minStdDeviation }; var clusters = data.MakeClusters(); var points = clusters.Points().ToList(); PointBalancer balancer = null; var bitsRequired = (maxCoordinate + 1).SmallestPowerOfTwo(); var lowresSort = HilbertSort.SortWithTies(points, 1, ref balancer); var largestBucket = lowresSort.OrderByDescending(bucket => bucket.Length).FirstOrDefault(); var bucketSize = largestBucket.Length; var grid = new GridCoarseness(largestBucket, bitsRequired); return(grid); }
public void DensityCompared() { var bitsPerDimension = 10; var data = new GaussianClustering { ClusterCount = 50, Dimensions = 100, MaxCoordinate = (1 << bitsPerDimension) - 1, MinClusterSize = 100, MaxClusterSize = 500 }; var expectedClusters = data.MakeClusters(); var hIndex = new HilbertIndex(expectedClusters, bitsPerDimension); var cc = new ClusterCounter { NoiseSkipBy = 10, OutlierSize = 5, ReducedNoiseSkipBy = 1 }; var count = cc.Count(hIndex.SortedPoints); var neighborhoodDistance = count.MaximumSquareDistance * 2 / 5; var numPoints = hIndex.SortedPoints.Count; var windowRadius = (int)Math.Sqrt(numPoints / 2); var dMeter = new DensityMeter(hIndex, neighborhoodDistance, windowRadius); Console.WriteLine($"Window Radius = {windowRadius}. {hIndex.SortedPoints.Count} points"); Console.Write("Exact,Estimated"); for (var i = 0; i < numPoints; i++) { var p = hIndex.SortedPoints[i]; var exact = dMeter.ExactNeighbors(p); var estimate = dMeter.EstimatedDensity(p, windowRadius); Console.Write($"{exact},{estimate}"); } }
private void OptimalIndexTestCase( int hilbertTries, int minClusterSize, int maxClusterSize, int dimensions, int clusterCount, int acceptableClusterCount, int bitsPerDimension, int outlierSize, int noiseSkipBy) { var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = 1000, MinClusterSize = minClusterSize, MaxClusterSize = maxClusterSize }; var clusters = data.MakeClusters(); var points = clusters.Points().Select(p => HilbertPoint.CastOrConvert(p, bitsPerDimension, true)).ToList(); var results = OptimalIndex.Search( points, outlierSize, noiseSkipBy, hilbertTries, // maxTrials 4 // maxIterationsWithoutImprovement ); var message = $"Estimated cluster count = {results.EstimatedClusterCount}, actual = {clusterCount}, acceptable = {acceptableClusterCount}"; Console.WriteLine(message); Assert.LessOrEqual(results.EstimatedClusterCount, acceptableClusterCount, $"HilbertIndex fragmented by more than 50%: {message}"); }
/// <summary> /// For random clustered data, discover how unique shortened versions of the Hilbert index are. /// </summary> /// <param name="numPoints">Number of points.</param> /// <param name="dimensions">Dimensions per point.</param> /// <param name="clusterCount">Number of clusters.</param> /// <param name="smallBucketSize">Count of items that constitutes a small bucket.</param> /// <param name="maxCoordinate">Highest permitted coordinate value.</param> public void ClusteredUniquenessByBits(int numPoints, int dimensions, int clusterCount, int smallBucketSize, int maxCoordinate) { var clusterSizeVariation = 100; var minClusterSize = (numPoints / clusterCount) - clusterSizeVariation; var maxClusterSize = (numPoints / clusterCount) + clusterSizeVariation; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = minClusterSize, MaxClusterSize = maxClusterSize }; var clusters = data.MakeClusters(); var points = clusters.Points().ToList(); PointBalancer balancer = null; var bitsRequired = (maxCoordinate + 1).SmallestPowerOfTwo(); for (var iBits = 1; iBits <= bitsRequired; iBits++) { var maxBucketSize = MaxBucketSizePerBits(points, iBits, smallBucketSize, ref balancer, out int pointsInSmallBuckets); var pctInSmallBuckets = 100.0 * pointsInSmallBuckets / points.Count; Console.WriteLine($"Bits: {iBits} Max Bucket: {maxBucketSize} # in Small Buckets: {pointsInSmallBuckets} - {pctInSmallBuckets} %"); } }
/// <summary> /// Create test data in known clusters, perform unattended clustering, time the process. /// Make no attempt to verify the correctness of the result. /// The timing does not include the creation of the test data, just the clustering. /// </summary> /// <param name="numPoints">Number of points to cluster.</param> /// <param name="clusterCount">Cluster count.</param> /// <param name="dimensions">Dimensions per point.</param> /// <param name="clusterSizeVariation">Cluster size variation. /// The average number of points per cluster is numPoints/clusterCount. /// The actual size of a given cluster will be permitted to vary by as much as ± clusterSizeVariation. /// </param> /// <param name="maxCoordinate">All points will have coordinate values in the range 0 to maxCoordinate.</param> /// <returns>Time in seconds and an Boolean which is false if the clustering did not produce perfect results.</returns> private Tuple <double, bool> ClassifyPerformance(int numPoints, int clusterCount, int dimensions, int clusterSizeVariation = 0, int maxCoordinate = 1000) { var minClusterSize = (numPoints / clusterCount) - clusterSizeVariation; var maxClusterSize = (numPoints / clusterCount) + clusterSizeVariation; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = minClusterSize, MaxClusterSize = maxClusterSize }; var expectedClusters = data.MakeClusters(); var timer = new Stopwatch(); timer.Start(); var classifier = new HilbertClassifier(expectedClusters.Points(), 10); classifier.IndexConfig.UseSample = true; var actualClusters = classifier.Classify(); timer.Stop(); var success = expectedClusters.IsSimilarTo(actualClusters); if (!success) { Console.WriteLine($"Clustering was not perfect. # of Clusters actual/expected: {actualClusters.NumPartitions}/{expectedClusters.NumPartitions}"); } var seconds = timer.ElapsedMilliseconds / 1000.0; return(new Tuple <double, bool>(seconds, success)); }
public void ClusterWithoutFiles() { var bitsPerDimension = 10; var data = new GaussianClustering { ClusterCount = 20, Dimensions = 50, MaxCoordinate = (1 << bitsPerDimension) - 1, MinClusterSize = 200, MaxClusterSize = 600 }; var expectedClassification = data.MakeClusters(); var config = new SlashConfig() { AcceptableBCubed = 0.98 }; config.Index.BitsPerDimension = bitsPerDimension; config.UseNoFiles(); var command = new SlashCommand(SlashCommand.CommandType.Cluster, config) { InputFile = null, OutputFile = null }; command.Configuration.DensityClassifier.SkipDensityClassification = true; // Need to put this here, because the command initializes the logger differently. Logger.SetupForTests(null); command.LoadData(expectedClassification); command.Execute(); Assert.IsTrue(command.IsClassificationAcceptable, $"The BCubed value of {command.MeasuredChange.BCubed} was not good enough."); }
/// <summary> /// For the same test data, create a single HilbertIndex many times and average the execution time across all indices. /// /// The goal is to identify how the time depends on number of points N, number of dimensions D, and bits per coordinate B. /// (It should be insensitive to cluster count K.) /// </summary> /// <param name="N">Number of points to index.</param> /// <param name="K">Number of clusters of points to create.</param> /// <param name="D">Number dimensions.</param> /// <param name="B">Number bits.</param> /// <param name="repeats">Number of times to repeat.</param> /// <returns>Average number of seconds to create the index, averaged over several tries. /// The time excludes the time to create the test data. /// </returns> private double SingleIndexCreationPerformanceCase(int N, int K, int D, int B, int repeats) { var data = new GaussianClustering { ClusterCount = K, Dimensions = D, MaxCoordinate = (1 << B) - 1, MinClusterSize = N / K, MaxClusterSize = N / K }; var clusters = data.MakeClusters(); var timer = new Stopwatch(); var totalTimeMilliseconds = 0L; for (var i = 0; i < repeats; i++) { timer.Reset(); timer.Start(); var hIndex = new HilbertIndex(clusters, B); Assert.AreEqual(N, hIndex.Count, "Index has wrong number of points"); timer.Stop(); totalTimeMilliseconds += timer.ElapsedMilliseconds; } return((double)totalTimeMilliseconds / (1000.0 * repeats)); }
public void Classify_DensitySpread() { var clusterCount = 50; var dimensions = 100; var maxCoordinate = 1000; var acceptableBCubed = 0.99; var clusterSizes = new int[50]; foreach (var i in Enumerable.Range(0, 50)) { clusterSizes[i] = 100 + (100 * i); } var minClusterSize = clusterSizes.Min(); var maxClusterSize = clusterSizes.Max(); var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = minClusterSize, MaxClusterSize = maxClusterSize, ClusterSizes = clusterSizes }; ClusterCore(data, acceptableBCubed); }
/// <summary> /// A test case for PolyChromaticClosestPoint.FindPairByCentroids where clusters conform to a Gaussian distribution. /// </summary> /// <param name="nPoints">Number of points in each cluster.</param> /// <param name="dimensions">Number of Dimensions in each point.</param> /// <param name="numClusters">Number of clusters to create.</param> public void FindPairByCentroidsTestCase(int nPoints, int dimensions, int numClusters) { var successes = 0; var worstRatio = 1.0; var color1 = "0"; var data = new GaussianClustering { ClusterCount = numClusters, Dimensions = dimensions, MaxCoordinate = 1000, MinClusterSize = nPoints, MaxClusterSize = nPoints }; var clusters = data.MakeClusters(); PolyChromaticClosestPoint <string> pccp; pccp = new PolyChromaticClosestPoint <string>(clusters); for (var iColor2 = 1; iColor2 < numClusters; iColor2++) { var color2 = iColor2.ToString(); var exact = pccp.FindPairExhaustively(color1, color2); var approximate = pccp.FindPairByCentroids(color1, color2); var expectedDistance = exact.SquareDistance; var actualDistance = approximate.SquareDistance; if (actualDistance <= expectedDistance) { successes++; } else { worstRatio = Math.Max(worstRatio, actualDistance / (double)expectedDistance); } if (exact.SquareDistance >= approximate.SquareDistance) { Console.WriteLine("FindPairByCentroids CORRECT. Exact {0}. Approx {1}", exact, approximate); } else { Console.WriteLine("FindPairByCentroids INCORRECT. Exact {0}. Approx {1}. Too high by {2:N3}%", exact, approximate, 100.0 * (approximate.SquareDistance / (double)exact.SquareDistance - 1.0)); } } Assert.AreEqual(numClusters - 1, successes, string.Format("Did not succeed every time. Failed {0} of {1} times. Worst distance ratio is {2:N4}. {3} points of {4} dimensions.", numClusters - successes - 1, numClusters - 1, worstRatio, nPoints, dimensions ) ); }
public void AllColorPairsClosestClusterTest(int nPoints, int dimensions, int numClusters, int numCurvesToTry) { var rankHistogram = new int[numClusters + 1]; // We will skip the first element so as to have a one-based array. var data = new GaussianClustering { ClusterCount = numClusters, Dimensions = dimensions, MaxCoordinate = 1000, MinClusterSize = nPoints, MaxClusterSize = nPoints }; var worstDistanceRatio = 1.0; var ratioSum = 0.0; var ratioCount = 0; var clusters = data.MakeClusters(); var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo(); var results = OptimalIndex .Search( clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(), 5 /*outlier size */, 10 /* NoiseSkipBy */, 1 /* ReducedNoiseSkipBy */, numCurvesToTry ); var pccp1 = new PolyChromaticClosestPoint <string>(clusters, results.Index); var allColorPairs = pccp1.FindAllClustersApproximately(); foreach (var color1 in clusters.ClassLabels()) { var exact = pccp1.FindClusterExhaustively(color1).Swap(color1); var color1Pairs = allColorPairs .Where(cp => cp.Color1.Equals(color1) || cp.Color2.Equals(color1)) .Select(cp => cp.Swap(color1)) .ToList(); var approximateColor2Distance = color1Pairs.First(cp => cp.Color2.Equals(exact.Color2)).SquareDistance; var approximateRank = color1Pairs.Count(cp => cp.SquareDistance < approximateColor2Distance) + 1; rankHistogram[approximateRank]++; #pragma warning disable RECS0018 // Comparison of floating point numbers with equality operator var ratio = exact.SquareDistance == 0.0 ? 0 : approximateColor2Distance / (double)exact.SquareDistance; #pragma warning restore RECS0018 // Comparison of floating point numbers with equality operator ratioSum += ratio; ratioCount++; worstDistanceRatio = Math.Max(worstDistanceRatio, ratio); } Debug.WriteLine(string.Format("Worst distance overage = {0:N3}%", (worstDistanceRatio - 1.0) * 100.0)); Debug.WriteLine(string.Format("Average distance overage = {0:N3}%", ((ratioSum / ratioCount) - 1.0) * 100.0)); for (var iRank = 1; iRank <= numClusters; iRank++) { if (rankHistogram[iRank] > 0 || iRank < 4) { Debug.WriteLine(string.Format("For {0} Clusters the closest cluster found was Ranked #{1}.", rankHistogram[iRank], iRank)); } } // Accept a win, place or show: the true closest cluster shows up as no worse than the 3rd ranked cluster according to the approximate measure. Assert.IsTrue(rankHistogram[1] + rankHistogram[2] + rankHistogram[3] == numClusters, string.Format("Found the closest cluster for {0} colors", rankHistogram[1]) ); }
/// <summary> /// Create test data in known clusters, perform unattended clustering, and compare the results to the known clusters. /// The test passes if the BCubed value is high enough. /// </summary> /// <param name="numPoints">Number of points to cluster.</param> /// <param name="clusterCount">Cluster count.</param> /// <param name="dimensions">Dimensions per point.</param> /// <param name="clusterSizeVariation">Cluster size variation. /// The average number of points per cluster is numPoints/clusterCount. /// The actual size of a given cluster will be permitted to vary by as much as ± clusterSizeVariation. /// </param> /// <param name="maxCoordinate">All points will have coordinate values in the range 0 to maxCoordinate.</param> /// <param name="acceptableBCubed">The comparison of the actual and expected clusters must yield a BCubed value /// that is this high or higher. A value of 1.0 means a perfect clustering, with no points out of place.</param> private void ClassifyCase(int numPoints, int clusterCount, int dimensions, int clusterSizeVariation = 0, int maxCoordinate = 1000, double acceptableBCubed = 0.99) { var minClusterSize = (numPoints / clusterCount) - clusterSizeVariation; var maxClusterSize = (numPoints / clusterCount) + clusterSizeVariation; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = minClusterSize, MaxClusterSize = maxClusterSize }; ClusterCore(data, acceptableBCubed); }
private void ClusterCore(GaussianClustering data, double acceptableBCubed) { var expectedClusters = data.MakeClusters(); var classifier = new HilbertClassifier(expectedClusters.Points(), 10); classifier.IndexConfig.UseSample = true; var actualClusters = classifier.Classify(); var comparison = expectedClusters.Compare(actualClusters); var message = $" Comparison of clusters: {comparison}.\n Clusters expected/actual: {expectedClusters.NumPartitions}/{actualClusters.NumPartitions}."; Console.WriteLine(message); Console.WriteLine($" Large clusters: {actualClusters.NumLargePartitions(classifier.OutlierSize)}"); Assert.GreaterOrEqual(comparison.BCubed, acceptableBCubed, $"Clustering was not good enough. BCubed = {comparison.BCubed}"); }
private Dictionary <string, CorrelationStats> DensityCorrelationCases(int[] varyWindowRadius, int[] varyNumPoints, int dimensions, int clusterCount, int repeats = 1) { var stats = new Dictionary <string, CorrelationStats>(); for (var iRepeat = 0; iRepeat < repeats; iRepeat++) { foreach (var numPoints in varyNumPoints) { var bitsPerDimension = 10; var clusterSize = numPoints / clusterCount; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = (1 << bitsPerDimension) - 1, MinClusterSize = clusterSize, MaxClusterSize = clusterSize }; var expectedClusters = data.MakeClusters(); var hIndex = new HilbertIndex(expectedClusters, bitsPerDimension); var cc = new ClusterCounter { NoiseSkipBy = 10, OutlierSize = 5, ReducedNoiseSkipBy = 1 }; var count = cc.Count(hIndex.SortedPoints); var neighborhoodDistance = count.MaximumSquareDistance * 2 / 5; var dMeter = new DensityMeter(hIndex, neighborhoodDistance, varyWindowRadius[0]); // It is more efficient to process windowRadius in descending order, // because the DistanceMemo can reuse more work that way. Once a larger window has been processed, // it includes all shorter windows as well. foreach (var windowRadius in varyWindowRadius.OrderByDescending(r => r)) { var label = MakeLabel(numPoints, windowRadius, dimensions, clusterCount); CorrelationStats corStats; if (!stats.TryGetValue(label, out corStats)) { corStats = new CorrelationStats(label); stats[label] = corStats; } corStats.Add(DensityCorrelationCase(dMeter, windowRadius)); Console.Write(corStats); } } } return(stats); }
private Classification <UnsignedPoint, string> TestData(int[] clusterSizes, int dimensions, int maxCoordinate) { var clusterCount = clusterSizes.Length; var minClusterSize = clusterSizes.Min(); var maxClusterSize = clusterSizes.Max(); var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = minClusterSize, MaxClusterSize = maxClusterSize, ClusterSizes = clusterSizes }; return(data.MakeClusters()); }
/// <summary> /// Create test data in known chained clusters, perform unattended clustering, and compare the results to the known clusters. /// The test passes if the BCubed value is high enough. /// </summary> /// <param name="numPoints">Number of points to cluster.</param> /// <param name="clusterCount">Cluster count.</param> /// <param name="chainLength">Number of segments in each chain.</param> /// <param name="dimensions">Dimensions per point.</param> /// <param name="clusterSizeVariation">Cluster size variation. /// The average number of points per cluster is numPoints/clusterCount. /// The actual size of a given cluster will be permitted to vary by as much as ± clusterSizeVariation. /// </param> /// <param name="maxCoordinate">All points will have coordinate values in the range 0 to maxCoordinate.</param> /// <param name="acceptableBCubed">The comparison of the actual and expected clusters must yield a BCubed value /// that is this high or higher. A value of 1.0 means a perfect clustering, with no points out of place.</param> private void ClassifyChainCase(int numPoints, int clusterCount, int chainLength, int dimensions, int clusterSizeVariation = 0, int maxCoordinate = 1000, double acceptableBCubed = 0.99) { var minClusterSize = (numPoints / clusterCount) - clusterSizeVariation; var maxClusterSize = (numPoints / clusterCount) + clusterSizeVariation; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = minClusterSize, MaxClusterSize = maxClusterSize, MaxDistanceStdDev = 300, MinDistanceStdDev = 150 }; ClusterChainCore(data, acceptableBCubed, chainLength); }
UnsignedPoint[] TestData(int numPoints, int dimensions, int clusterCount, int maxCoordinate, int minStdDeviation, int maxStdDeviation, out int bitsPerDimension) { var avgClusterSize = numPoints / clusterCount; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = avgClusterSize - 100, MaxClusterSize = avgClusterSize + 100, MaxDistanceStdDev = maxStdDeviation, MinDistanceStdDev = minStdDeviation }; var clusters = data.MakeClusters(); var points = clusters.Points().ToArray(); bitsPerDimension = (maxCoordinate + 1).SmallestPowerOfTwo(); return(points); }
public void DensityCorrelation() { var bitsPerDimension = 10; var data = new GaussianClustering { ClusterCount = 50, Dimensions = 100, MaxCoordinate = (1 << bitsPerDimension) - 1, MinClusterSize = 100, MaxClusterSize = 500 }; var expectedClusters = data.MakeClusters(); var hIndex = new HilbertIndex(expectedClusters, bitsPerDimension); var cc = new ClusterCounter { NoiseSkipBy = 10, OutlierSize = 5, ReducedNoiseSkipBy = 1 }; var count = cc.Count(hIndex.SortedPoints); // Choice of neighborhoodDistance is crucial. // - If it is too large, then a huge number of neighbors will be caught up in the dragnet, and estimating // that value with a window into the Hilbert curve will yield poor results. Why? If there are 200 neighbors // and your window size is 100 then many points will have their neighbor count saturate near 100 and // no meaningful variation in density will be found. // - If it is too small, then too few neighbors (or none!) will be found, and we get no meaningful density. // - We know that almost every point has two neighbors within MaximumSquareDistance, so we should // make it smaller than MaximumSquareDistance. var neighborhoodDistance = count.MaximumSquareDistance * 2 / 5; var numPoints = hIndex.SortedPoints.Count; var windowRadius = (int)Math.Sqrt(numPoints / 2); var dMeter = new DensityMeter(hIndex, neighborhoodDistance, windowRadius); Func <HilbertPoint, long> exactMetric = p => (long)dMeter.ExactNeighbors(p); Func <HilbertPoint, long> estimatedMetric = p => (long)dMeter.EstimatedDensity(p, windowRadius); var correlator = new KendallTauCorrelation <HilbertPoint, long>(exactMetric, estimatedMetric); var correlation = correlator.TauB(hIndex.SortedPoints.Take(1000)); Console.WriteLine($"Correlation between exact and estimated density is: {correlation}"); Assert.GreaterOrEqual(correlation, 0.90, $"Correlation {correlation} is not high enough"); }
private void ClusterChainCore(GaussianClustering data, double acceptableBCubed, int chainLength) { var expectedClusters = data.MakeChains(chainLength); var classifier = new HilbertClassifier(expectedClusters.Points(), 10); classifier.IndexConfig.UseSample = true; var actualClusters = classifier.Classify(); var comparison = expectedClusters.Compare(actualClusters); var message = $" Comparison of clusters: {comparison}.\n Clusters expected/actual: {expectedClusters.NumPartitions}/{actualClusters.NumPartitions}."; Logger.Info(message); var message2 = $" Large clusters: {actualClusters.NumLargePartitions(classifier.OutlierSize)}"; Logger.Info(message2); var pointsInOutliers = actualClusters.LabelToPoints.Values .Select(values => values.Count()) .Where(count => count < classifier.OutlierSize) .Sum(); var message3 = $" Points in Outliers/Total Point: {pointsInOutliers} / {actualClusters.NumPoints}"; Logger.Info(message3); Assert.GreaterOrEqual(comparison.BCubed, acceptableBCubed, $"Clustering was not good enough. BCubed = {comparison.BCubed}"); }
/// <summary> /// UnsignedPoint.SquareDistanceCompare has an optimization. This tests how often this optimization /// can be exploited in a realistic test. The comparison will be against an estimated characteristic distance /// between points. This distance is assumed to be close enough to trigger two points to be merged into a single cluster. /// </summary> private double SquareDistanceCompareOptimizableCase(int totalComparisons, bool useExtendedOptimization = false) { // 1. Make test data. var bitsPerDimension = 10; var data = new GaussianClustering { ClusterCount = 100, Dimensions = 100, MaxCoordinate = (1 << bitsPerDimension) - 1, MinClusterSize = 50, MaxClusterSize = 150 }; var clusters = data.MakeClusters(); // 2. Create HilbertIndex for points. var hIndex = new HilbertIndex(clusters, bitsPerDimension); // 3. Deduce the characteristic distance. var counter = new ClusterCounter { OutlierSize = 5, NoiseSkipBy = 10 }; var count = counter.Count(hIndex.SortedPoints); var mergeDistance = count.MaximumSquareDistance; var longDistance = 5 * mergeDistance; // 4. Select random pairs of points and see how many distance comparisons can exploit the optimization. var rng = new FastRandom(); var points = clusters.Points().ToList(); var ableToUseOptimizationsAtShortDistance = 0; var ableToUseOptimizationsAtLongDistance = 0; for (var i = 0; i < totalComparisons; i++) { var p1 = points[rng.Next(points.Count)]; var p2 = points[rng.Next(points.Count)]; if (useExtendedOptimization) { if (IsExtendedDistanceOptimizationUsable(p1, p2, mergeDistance, bitsPerDimension)) { ableToUseOptimizationsAtShortDistance++; } if (IsExtendedDistanceOptimizationUsable(p1, p2, longDistance, bitsPerDimension)) { ableToUseOptimizationsAtLongDistance++; } } else { if (IsDistanceOptimizationUsable(p1, p2, mergeDistance)) { ableToUseOptimizationsAtShortDistance++; } if (IsDistanceOptimizationUsable(p1, p2, longDistance)) { ableToUseOptimizationsAtLongDistance++; } } } var percentOptimizable = 100.0 * ableToUseOptimizationsAtShortDistance / totalComparisons; var percentOptimizableLongDistance = 100.0 * ableToUseOptimizationsAtLongDistance / totalComparisons; var message = $"Comparisons were {percentOptimizable} % Optimizable at short distance, {percentOptimizableLongDistance} % at long distance"; Console.WriteLine(message); return(percentOptimizable); }
public void LowresVersusHiresCase(int numPoints, int dimensions, int clusterCount, int lowresBits) { var maxCoordinate = 1000; var clusterSizeVariation = 100; var minClusterSize = (numPoints / clusterCount) - clusterSizeVariation; var maxClusterSize = (numPoints / clusterCount) + clusterSizeVariation; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = minClusterSize, MaxClusterSize = maxClusterSize }; var clusters = data.MakeClusters(); var points = clusters.Points().ToList(); PointBalancer balancer = null; var hiresSort = HilbertSort.BalancedSort(points, ref balancer); var lowresSort = HilbertSort.SortWithTies(points, lowresBits, ref balancer); var lowresPositions = new Dictionary <UnsignedPoint, int>(); var hiresPosition = new Dictionary <UnsignedPoint, int>(); foreach (var p in hiresSort.Select((p, i) => { hiresPosition[p] = i; return(p); })) { ; } foreach (var ties in lowresSort.Select((p, i) => new { Points = p, Position = i })) { foreach (var point in ties.Points) { lowresPositions[point] = ties.Position; } } // Compare the positions of many pairs of points in the two orderings to see that // they are either in the same relative order // or tied for position in the lowres ordering. var actualNumPoints = points.Count; var largestBucket = lowresSort.Select(bucket => bucket.Length).Max(); var caseDescription = $"N = {actualNumPoints} D = {dimensions} K = {clusterCount} B = {lowresBits}"; Console.WriteLine(caseDescription); Console.WriteLine($"Lowres buckets = {lowresSort.Count} Largest bucket = {largestBucket}"); int outOfPlaceCount = 0; for (var i = 0; i < actualNumPoints - 1; i++) { var p1 = points[i]; for (var j = i + 1; j < actualNumPoints; j++) { var p2 = points[j]; var lowresPosition1 = lowresPositions[p1]; var lowresPosition2 = lowresPositions[p2]; var hiresPosition1 = hiresPosition[p1]; var hiresPosition2 = hiresPosition[p2]; if (lowresPosition1 != lowresPosition2) { if (lowresPosition1 < lowresPosition2 != hiresPosition1 < hiresPosition2) { outOfPlaceCount++; } } } } var msg = $"Out of place count = {outOfPlaceCount}"; Console.WriteLine(msg); Assert.AreEqual(0, outOfPlaceCount, msg); }
public void DistanceDistribution() { /* * Percentile,By Index,By Random * ----------------------------- * 0%,111.35,146.55 * 1%,142.06,255.96 * 2%,147.21,2163.43 * 3%,151.2,2214.15 * 4%,154.06,2245.2 * 5%,156.24,2271.37 * 6%,158.38,2292.29 * 7%,160.42,2313.55 * 8%,162.29,2327.14 * 9%,164.07,2345.25 * 10%,165.41,2359.95 * 11%,166.72,2372.83 * 12%,167.99,2386.15 * 13%,169.29,2398.47 * 14%,170.43,2410.01 * 15%,171.53,2422.34 * 16%,172.48,2432.43 * 17%,173.58,2443.08 * 18%,174.73,2454.27 * 19%,175.56,2463.71 * 20%,176.35,2472.97 * 21%,177.35,2483.24 * 22%,178.3,2491.9 * 23%,179.1,2501.44 * 24%,179.82,2510.26 * 25%,180.64,2517.73 * 26%,181.55,2524.97 * 27%,182.33,2531.58 * 28%,182.98,2538.08 * 29%,183.67,2543.83 * 30%,184.33,2550.93 * 31%,185.09,2556.59 * 32%,185.7,2563.37 * 33%,186.41,2570.29 * 34%,187.09,2577.29 * 35%,187.7,2583.56 * 36%,188.43,2589.95 * 37%,189.07,2596.13 * 38%,189.71,2602.24 * 39%,190.46,2608.28 * 40%,191.08,2615.25 * 41%,191.79,2620.81 * 42%,192.46,2626.02 * 43%,193.09,2632.7 * 44%,193.71,2638.18 * 45%,194.31,2643.35 * 46%,194.98,2648.69 * 47%,195.65,2655.47 * 48%,196.3,2660.26 * 49%,196.96,2666.37 * 50%,197.66,2670.94 * 51%,198.34,2677.09 * 52%,199.07,2681.9 * 53%,199.72,2687.11 * 54%,200.3,2692.42 * 55%,201.06,2697.92 * 56%,201.71,2703.76 * 57%,202.4,2710.17 * 58%,203.16,2715.06 * 59%,203.82,2720.25 * 60%,204.51,2725.99 * 61%,205.32,2731.6 * 62%,206.08,2736.59 * 63%,206.79,2741.72 * 64%,207.58,2746.59 * 65%,208.29,2754.03 * 66%,209.07,2760.81 * 67%,209.8,2766.65 * 68%,210.68,2771.98 * 69%,211.71,2778.27 * 70%,212.38,2784.23 * 71%,213.19,2790.71 * 72%,213.92,2796.42 * 73%,214.82,2802.84 * 74%,215.68,2809.36 * 75%,216.54,2814.55 * 76%,217.48,2821.32 * 77%,218.43,2827.56 * 78%,219.35,2833.35 * 79%,220.28,2840.72 * 80%,221.33,2848.87 * 81%,222.31,2856.89 * 82%,223.42,2864 * 83%,224.46,2872.51 * 84%,225.83,2881.09 * 85%,227.06,2891.57 * 86%,228.27,2900.46 * 87%,229.63,2910.46 * 88%,231.55,2919.5 * 89%,233.59,2933.76 * 90%,235.6,2944.88 * 91%,237.25,2959.45 * 92%,239.83,2976.08 * 93%,241.88,2990.4 * 94%,244.97,3010.08 * 95%,248.23,3029.15 * 96%,252.34,3052.37 * 97%,260.68,3074.84 * 98%,282.76,3112.43 *** Note the jump from 282 to 2550, which shows that the characteristic distance is about 282. * 99%,2550.87,3170.93 * 100%,3114.89,3412.57 */ var data = new GaussianClustering { ClusterCount = 100, Dimensions = 50, MaxCoordinate = 1000, MinClusterSize = 50, MaxClusterSize = 150 }; var clusters = data.MakeClusters(); var bitsPerDimension = 10; var points = clusters.Points().Select(p => HilbertPoint.CastOrConvert(p, bitsPerDimension, true)).ToList(); var results = OptimalIndex.Search( points, 5, // outlierSize 10, // noiseSkipBy 1000, // maxTrials 4 // maxIterationsWithoutImprovement ); var pointsFromIndex = results.Index.SortedPoints; var distancesRandom = new List <long>(); var distancesHilbert = new List <long>(); var n = pointsFromIndex.Count; var rng = new FastRandom(); for (var i = 0; i < n - 1; i++) { var p1 = pointsFromIndex[i]; var p2 = pointsFromIndex[i + 1]; distancesHilbert.Add(p1.Measure(p2)); var p3 = pointsFromIndex[rng.Next(n)]; var p4 = pointsFromIndex[rng.Next(n)]; distancesRandom.Add(p3.Measure(p4)); } distancesHilbert.Sort(); distancesRandom.Sort(); Console.WriteLine("Percentile,By Index,By Random"); for (var percentile = 0; percentile <= 100; percentile++) { var i = Math.Min(n - 2, (n - 1) * percentile / 100); var distHilbert = Math.Round(Math.Sqrt(distancesHilbert[i]), 2); var distRandom = Math.Round(Math.Sqrt(distancesRandom[i]), 2); Console.Write($"{percentile}%,{distHilbert},{distRandom}"); } }
/// <summary> /// A test case for PolyChromaticClosestPoint.FindPairApproximately where clusters conform to a Gaussian distribution. /// </summary> /// <param name="nPoints">Number of points in each cluster.</param> /// <param name="dimensions">Number of Dimensions in each point.</param> /// <param name="numClusters">Number of clusters to create.</param> /// <param name="hilbertsToTry">Number of randomly generated Hilbert curves to try.</param> public void GaussianPolyChromaticPairTestCase(int nPoints, int dimensions, int numClusters, int hilbertsToTry = 1) { var successes = 0; var worstRatio = 1.0; var color1 = "0"; var data = new GaussianClustering { ClusterCount = numClusters, Dimensions = dimensions, MaxCoordinate = 1000, MinClusterSize = nPoints, MaxClusterSize = nPoints }; var clusters = data.MakeClusters(); PolyChromaticClosestPoint <string> pccp; if (hilbertsToTry <= 1) { pccp = new PolyChromaticClosestPoint <string>(clusters); } else { var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo(); var results = OptimalIndex.Search( clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(), 5 /*outlier size */, 10 /* NoiseSkipBy */, 1 /* ReducedNoiseSkipBy */, hilbertsToTry ); pccp = new PolyChromaticClosestPoint <string>(clusters, results.Index); } for (var iColor2 = 1; iColor2 < numClusters; iColor2++) { var color2 = iColor2.ToString(); var exact = pccp.FindPairExhaustively(color1, color2); var approximate = pccp.FindPairApproximately(color1, color2); var expectedDistance = exact.SquareDistance; var actualDistance = approximate.SquareDistance; if (actualDistance <= expectedDistance) { successes++; } else { worstRatio = Math.Max(worstRatio, actualDistance / (double)expectedDistance); } if (exact.SquareDistance >= approximate.SquareDistance) { Console.WriteLine("FindPairApproximately CORRECT. Exact {0}. Approx {1}", exact, approximate); } else { Console.WriteLine("FindPairApproximately INCORRECT. Exact {0}. Approx {1}. Too high by {2:N3}%", exact, approximate, 100.0 * (approximate.SquareDistance / (double)exact.SquareDistance - 1.0)); } } Assert.AreEqual(numClusters - 1, successes, string.Format("Did not succeed every time. Failed {0} of {1} times. Worst distance ratio is {2:N4}. {3} points of {4} dimensions.", numClusters - successes - 1, numClusters - 1, worstRatio, nPoints, dimensions ) ); }
public void ClosestClusterTest(int nPoints, int dimensions, int numClusters, int numCurvesToTry, int numCurvesToKeep) { var correctColorCount = 0; var correctDistanceCount = 0; var data = new GaussianClustering { ClusterCount = numClusters, Dimensions = dimensions, MaxCoordinate = 1000, MinClusterSize = nPoints, MaxClusterSize = nPoints }; var closestExact = new PolyChromaticClosestPoint <string> .ClosestPair(); var closestApproximate = new PolyChromaticClosestPoint <string> .ClosestPair(); var clusters = data.MakeClusters(); var pccps = new List <PolyChromaticClosestPoint <string> >(); var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo(); var bestIndices = OptimalIndex.SearchMany( clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(), numCurvesToKeep, 5 /*outlier size */, 10 /* NoiseSkipBy */, 1 /* ReducedNoiseSkipBy */, numCurvesToTry ); //var pointLists = bestIndices.Select(result => result.Index.SortedPoints).ToList(); //foreach (var pList in pointLists) // pccps.Add(new PolyChromaticClosestPoint<string>(clusters, pList)); var indices = bestIndices.Select(result => result.Index).ToList(); foreach (var index in indices) { pccps.Add(new PolyChromaticClosestPoint <string>(clusters, index)); } var pccp1 = pccps[0]; foreach (var color in pccp1.Clusters.ClassLabels()) { var exact = pccp1.FindClusterExhaustively(color); var approximate = pccps.Select(pccp => pccp.FindClusterApproximately(color)).OrderBy(cp => cp).First(); if (exact.SquareDistance >= approximate.SquareDistance) { correctDistanceCount++; } if (exact.Color2.Equals(approximate.Color2)) { correctColorCount++; } if (exact.SquareDistance < closestExact.SquareDistance) { closestExact = exact; } if (approximate.SquareDistance < closestApproximate.SquareDistance) { closestApproximate = approximate; } var ratio = approximate.SquareDistance / (double)exact.SquareDistance; Console.WriteLine(string.Format("Exact {0} vs Approx. {1}. Over by {2:N3}%", exact, approximate, (ratio - 1.0) * 100.0)); } if (closestExact.SquareDistance >= closestApproximate.SquareDistance) { Console.WriteLine("DID FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate); } else { Console.WriteLine("DID NOT FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate); } Assert.IsTrue(correctColorCount == numClusters && correctDistanceCount == numClusters, string.Format("Of {0} clusters, only {1} searches found the closest cluster and {2} found the shortest distance.", numClusters, correctColorCount, correctDistanceCount ) ); }
/// <summary> /// Perform a classification of two clusters that are near enough to each other to partially overlap, causing problems. /// /// From this we can deduce which of six cases obtain (the SplitQuality). /// </summary> /// <returns>A Tuple with these parts: /// 1) comparison of actual to expected (with its BCubed), /// 2) the expected number of clusters /// 3) the actual number of clusters /// 4) a qualitative assessment of the results. /// </returns> /// <param name="numPoints">Number of points.</param> /// <param name="dimensions">Number of Dimensions.</param> /// <param name="overlapPercent">Overlap percent.</param> /// <param name="clusterSizeVariation">Cluster size variation.</param> /// <param name="maxCoordinate">Max value of any coordinate.</param> /// <param name="acceptablePrecision">Acceptable precision</param> /// <param name="useDensityClassifier">If set to <c>true</c> use density classifier.</param> private Tuple <ClusterMetric <UnsignedPoint, string>, int, int, SplitQuality> ClassifyTwoClustersHelper(int numPoints, int dimensions, double overlapPercent, int clusterSizeVariation = 0, int maxCoordinate = 1000, double acceptablePrecision = 0.98, bool useDensityClassifier = true) { Logger.SetupForTests(); var bitsPerDimension = maxCoordinate.SmallestPowerOfTwo(); var clusterCount = 2; var minClusterSize = (numPoints / clusterCount) - clusterSizeVariation; var maxClusterSize = (numPoints / clusterCount) + clusterSizeVariation; var outlierSize = 5; var radiusShrinkage = 0.6; // 0.7 merges too many that belong apart! var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = maxCoordinate, MinClusterSize = minClusterSize, MaxClusterSize = maxClusterSize }; var expectedClusters = data.TwoClusters(overlapPercent); Classification <UnsignedPoint, string> actualClusters; if (useDensityClassifier) { var hIndex = new HilbertIndex(expectedClusters, bitsPerDimension); var cc = new ClusterCounter { NoiseSkipBy = 10, OutlierSize = outlierSize, ReducedNoiseSkipBy = 1 }; var count = cc.Count(hIndex.SortedPoints); var unmergeableSize = expectedClusters.NumPoints / 6; var densityClassifier = new DensityClassifier(hIndex, count.MaximumSquareDistance, unmergeableSize) { MergeableShrinkage = radiusShrinkage }; actualClusters = densityClassifier.Classify(); } else { var classifier = new HilbertClassifier(expectedClusters.Points(), 10) { OutlierSize = outlierSize }; //classifier.IndexConfig.NoiseSkipBy = 0; classifier.IndexConfig.UseSample = false; actualClusters = classifier.Classify(); } var comparison = expectedClusters.Compare(actualClusters); SplitQuality qualitativeResult = SplitQuality.Unsplit; if (comparison.BCubed >= 1.0) { qualitativeResult = SplitQuality.PerfectSplit; } else if (actualClusters.NumPartitions == 1) { qualitativeResult = SplitQuality.Unsplit; } else if (actualClusters.NumPartitions > expectedClusters.NumPartitions && comparison.Precision >= 1.0) { qualitativeResult = SplitQuality.GoodOverSplit; } else if (actualClusters.NumPartitions > expectedClusters.NumPartitions && comparison.Precision >= acceptablePrecision) { qualitativeResult = SplitQuality.FairOverSplit; } else if (actualClusters.NumPartitions == expectedClusters.NumPartitions && comparison.Precision >= acceptablePrecision) { qualitativeResult = SplitQuality.GoodSplit; } else if (actualClusters.NumPartitions > expectedClusters.NumPartitions && comparison.Precision < 1.0) { qualitativeResult = SplitQuality.BadOverSplit; } else // Assume correct number of clusters. { qualitativeResult = SplitQuality.BadSplit; } Logger.Info($" Quality: {qualitativeResult} Comparison: {comparison}"); return(new Tuple <ClusterMetric <UnsignedPoint, string>, int, int, SplitQuality>( comparison, expectedClusters.NumPartitions, actualClusters.NumPartitions, qualitativeResult )); }
public void ClosestOfFiftyClusters() { int hilbertTries = 1000; var correctColorCount = 0; var correctCrosscheckCount = 0; var correctDistanceCount = 0; var nPoints = 100; var dimensions = 100; var clusterCount = 50; var data = new GaussianClustering { ClusterCount = clusterCount, Dimensions = dimensions, MaxCoordinate = 1000, MinClusterSize = nPoints, MaxClusterSize = nPoints }; var closestExact = new PolyChromaticClosestPoint <string> .ClosestPair(); var closestApproximate = new PolyChromaticClosestPoint <string> .ClosestPair(); var bitsPerDimension = (1 + data.MaxCoordinate).SmallestPowerOfTwo(); var clusters = data.MakeClusters(); Assert.AreEqual(clusterCount, clusters.NumPartitions, "Test data are grouped into fewer clusters than requested."); PolyChromaticClosestPoint <string> pccp; if (hilbertTries <= 1) { pccp = new PolyChromaticClosestPoint <string>(clusters); } else { var reducedNoiseSkipBy = 1; var results = OptimalIndex.Search( clusters.Points().Select(up => HilbertPoint.CastOrConvert(up, bitsPerDimension, true)).ToList(), 5 /*outlier size */, 10 /* NoiseSkipBy */, reducedNoiseSkipBy, hilbertTries ); pccp = new PolyChromaticClosestPoint <string>(clusters, results.Index); } foreach (var color in pccp.Clusters.ClassLabels()) { var exact = pccp.FindClusterExhaustively(color); var approximate = pccp.FindClusterApproximately(color); var crosscheck = pccp.FindClusterIteratively(color); if (exact.SquareDistance >= approximate.SquareDistance) { correctDistanceCount++; } if (exact.Color2.Equals(approximate.Color2)) { correctColorCount++; } if (exact.Color2.Equals(crosscheck.Color2)) { correctCrosscheckCount++; } if (exact.SquareDistance < closestExact.SquareDistance) { closestExact = exact; } if (approximate.SquareDistance < closestApproximate.SquareDistance) { closestApproximate = approximate; } var ratio = approximate.SquareDistance / (double)exact.SquareDistance; Console.WriteLine(string.Format("Exact {0} vs Approx. {1} vs Cross {2}. Over by {3:N3}%", exact, approximate, crosscheck, (ratio - 1.0) * 100.0)); } if (closestExact.SquareDistance >= closestApproximate.SquareDistance) { Console.WriteLine("DID FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate); } else { Console.WriteLine("DID NOT FIND the closest pair of points overall. Exact {0}. Approx {1}", closestExact, closestApproximate); } Assert.IsTrue(correctColorCount == clusterCount && correctDistanceCount == clusterCount, string.Format("Of {0} clusters, only {1} searches found the closest cluster and {2} found the shortest distance. Crosscheck = {3}", clusterCount, correctColorCount, correctDistanceCount, correctCrosscheckCount ) ); }
public double SquareDistanceCompareValidationCase(int numTriangulationPoints) { var correctResult = 0; var wrongResult = 0; var totalComparisons = 10000; var extraShortTrianagulatable = 0; var extraShortNotTrianagulatable = 0; var shortTrianagulatable = 0; var shortNotTrianagulatable = 0; var longTrianagulatable = 0; var longNotTrianagulatable = 0; // 1. Make test data. var bitsPerDimension = 10; var data = new GaussianClustering { ClusterCount = 100, Dimensions = 100, MaxCoordinate = (1 << bitsPerDimension) - 1, MinClusterSize = 50, MaxClusterSize = 150 }; var clusters = data.MakeClusters(); // 2. Create HilbertIndex for points. var hIndex = new HilbertIndex(clusters, bitsPerDimension); hIndex.SetTriangulation(numTriangulationPoints); // 3. Deduce the characteristic distance. var counter = new ClusterCounter { OutlierSize = 5, NoiseSkipBy = 10 }; var count = counter.Count(hIndex.SortedPoints); var mergeDistance = count.MaximumSquareDistance; var longDistance = 5 * mergeDistance; // 4. Select random pairs of the HilbertPoints points and see how many distance comparisons yield the correct result. var rng = new FastRandom(); var points = hIndex.SortedPoints.ToList(); for (var i = 0; i < totalComparisons; i++) { var p1 = points[rng.Next(points.Count)]; var p2 = points[rng.Next(points.Count)]; var d = p1.Measure(p2); if (d.CompareTo(mergeDistance) == p1.SquareDistanceCompare(p2, mergeDistance)) { correctResult++; } else { wrongResult++; } if (d.CompareTo(longDistance) == p1.SquareDistanceCompare(p2, longDistance)) { correctResult++; } else { wrongResult++; } if (p1.Triangulatable(p2, mergeDistance / 2)) { extraShortTrianagulatable++; } else { extraShortNotTrianagulatable++; } if (p1.Triangulatable(p2, mergeDistance)) { shortTrianagulatable++; } else { shortNotTrianagulatable++; } if (p1.Triangulatable(p2, longDistance)) { longTrianagulatable++; } else { longNotTrianagulatable++; } } var extraShortPct = 100.0 * extraShortTrianagulatable / (extraShortTrianagulatable + extraShortNotTrianagulatable); var shortPct = 100.0 * shortTrianagulatable / (shortTrianagulatable + shortNotTrianagulatable); var longPct = 100.0 * longTrianagulatable / (longTrianagulatable + longNotTrianagulatable); Console.WriteLine($"Triangulatable? \n XS: {extraShortPct} % \n Short: {shortPct} % Yes {shortTrianagulatable}, No {shortNotTrianagulatable}\n Long: {longPct} % Yes {longTrianagulatable}, No {longNotTrianagulatable}"); Assert.AreEqual(wrongResult, 0, $"{correctResult} correct, {wrongResult} wrong"); return(shortPct); }