public Histogram(BucketSize bucketSize, IEnumerable <HistogramBucket> buckets) { Debug.Assert( buckets.All(b => b.BucketSize.SnappedSize == bucketSize.SnappedSize), $"Histogram buckets don't match given bucketsize"); BucketSize = bucketSize; Buckets = new SortedList <decimal, HistogramBucket>(buckets.ToDictionary(b => b.LowerBound)); }
internal static decimal[] EstimateBucketResolutions( long numSamples, double minSample, double maxSample, long valuesPerBucketTarget, bool isIntegerColumn) { if (numSamples <= 0) { throw new System.ArgumentException( $"Argument numSamples should always be greater than zero, got {numSamples}."); } var range = maxSample - minSample; if (range <= 0) { return(new decimal[] { 1M }); } var valueDensity = numSamples / (maxSample - minSample); var targetBucketSize = valuesPerBucketTarget / valueDensity; var bucketSizeEstimate = new BucketSize(isIntegerColumn ? System.Math.Max(targetBucketSize, 5) : targetBucketSize); return(new[] { bucketSizeEstimate.Smaller(steps: 2), bucketSizeEstimate, bucketSizeEstimate.Larger(steps: 2), } .Where(b => !(b is null)) .Select(b => b !.SnappedSize) .Distinct() .ToArray()); }
internal HistogramBucket(decimal lowerBound, BucketSize bucketSize, NoisyCount noisyCount) { LowerBound = lowerBound; BucketSize = bucketSize; this.noisyCount = noisyCount; }
public bool Equals(HistogramBucket bucket) => noisyCount.Equals(bucket.noisyCount) && BucketSize.Equals(bucket.BucketSize) && LowerBound == bucket.LowerBound && Count == bucket.Count && CountNoise == bucket.CountNoise;