protected async override Task <Result> Explore() { var stats = await statsResultProvider.ResultAsync; var bucketsToSample = BucketUtils.EstimateBucketResolutions( stats.Count, stats.Min, stats.Max, ValuesPerBucketTarget, isIntegerColumn: ctx.ColumnType == DValueType.Integer); var histogramQ = await conn.Exec(new SingleColumnHistogram(ctx.Table, ctx.Column, bucketsToSample)); var histograms = Histogram.FromQueryRows(histogramQ.Rows); var valueCounts = histogramQ.Rows .GroupBy( row => row.BucketSize, (bs, rows) => (BucketSize: new BucketSize(bs), Rows: ValueCounts.Compute(rows))); var results = valueCounts.Join( histograms, v => v.BucketSize.SnappedSize, h => h.BucketSize.SnappedSize, (v, h) => new Result(v.Rows, h)); return(results .OrderBy(h => h.BucketSize.SnappedSize) .ThenBy(h => h.ValueCounts.SuppressedCount) .First()); }
public void BucketRange(int index, int bucketRange, string expected) { //arrange //act String result = BucketUtils.BucketRange(index, bucketRange); //assert Assert.Equal(expected, result); }
protected async override Task <List <HistogramWithCounts>?> Explore() { var stats = await statsResultProvider.ResultAsync; if (stats == null) { return(null); } var(minBound, maxBound) = (stats.Min, stats.Max); if (!minBound.HasValue || !maxBound.HasValue) { var distincts = await distinctValuesProvider.ResultAsync; if (distincts == null || distincts.ValueCounts.NonSuppressedNonNullCount == 0) { return(null); } var values = distincts.DistinctRows.Where(row => row.HasValue).Select(row => row.Value.GetDouble()); minBound ??= values.Min(); maxBound ??= values.Max(); } if (!minBound.HasValue || !maxBound.HasValue || minBound == maxBound) { Logger.LogWarning("Unable to calculate suitable bounds for numerical column {Context.Column}."); return(null); } var bucketsToSample = BucketUtils.EstimateBucketResolutions( stats.Count, (double)minBound, (double)maxBound, ValuesPerBucketTarget, isIntegerColumn: Context.ColumnInfo.Type == DValueType.Integer); var histogramQ = await Context.Exec(new SingleColumnHistogram(bucketsToSample)); var histograms = Histogram.FromQueryRows(histogramQ.Rows); var valueCounts = histogramQ.Rows .GroupBy( row => row.BucketSize, (bs, rows) => (BucketSize: new BucketSize(bs), Rows: ValueCounts.Compute(rows))); return(valueCounts .Join( histograms, v => v.BucketSize.SnappedSize, h => h.BucketSize.SnappedSize, (v, h) => new HistogramWithCounts(v.Rows, h)) .ToList()); }
private List <string> GenerateHeaders(IEnumerable <CohortGroup> cohortGroups) { List <string> headers = new List <string> { "Cohort", "Customers", }; var maximumBuckets = cohortGroups .Max(item => item.Buckets.Count()); for (int i = 0; i < maximumBuckets; i++) { headers .Add($"{BucketUtils.BucketRange(i, _settings.LifeCycleRange)} days"); } return(headers); }