protected async override Task <Result> Explore()
        {
            var stats = await statsResultProvider.ResultAsync;

            var bucketsToSample = BucketUtils.EstimateBucketResolutions(
                stats.Count,
                stats.Min,
                stats.Max,
                ValuesPerBucketTarget,
                isIntegerColumn: ctx.ColumnType == DValueType.Integer);

            var histogramQ = await conn.Exec(new SingleColumnHistogram(ctx.Table, ctx.Column, bucketsToSample));

            var histograms = Histogram.FromQueryRows(histogramQ.Rows);

            var valueCounts = histogramQ.Rows
                              .GroupBy(
                row => row.BucketSize,
                (bs, rows) => (BucketSize: new BucketSize(bs), Rows: ValueCounts.Compute(rows)));

            var results = valueCounts.Join(
                histograms,
                v => v.BucketSize.SnappedSize,
                h => h.BucketSize.SnappedSize,
                (v, h) => new Result(v.Rows, h));

            return(results
                   .OrderBy(h => h.BucketSize.SnappedSize)
                   .ThenBy(h => h.ValueCounts.SuppressedCount)
                   .First());
        }
        public void BucketRange(int index, int bucketRange, string expected)
        {
            //arrange

            //act
            String result = BucketUtils.BucketRange(index, bucketRange);

            //assert
            Assert.Equal(expected, result);
        }
Пример #3
0
        protected async override Task <List <HistogramWithCounts>?> Explore()
        {
            var stats = await statsResultProvider.ResultAsync;

            if (stats == null)
            {
                return(null);
            }

            var(minBound, maxBound) = (stats.Min, stats.Max);
            if (!minBound.HasValue || !maxBound.HasValue)
            {
                var distincts = await distinctValuesProvider.ResultAsync;
                if (distincts == null || distincts.ValueCounts.NonSuppressedNonNullCount == 0)
                {
                    return(null);
                }

                var values = distincts.DistinctRows.Where(row => row.HasValue).Select(row => row.Value.GetDouble());
                minBound ??= values.Min();
                maxBound ??= values.Max();
            }

            if (!minBound.HasValue || !maxBound.HasValue || minBound == maxBound)
            {
                Logger.LogWarning("Unable to calculate suitable bounds for numerical column {Context.Column}.");

                return(null);
            }

            var bucketsToSample = BucketUtils.EstimateBucketResolutions(
                stats.Count,
                (double)minBound,
                (double)maxBound,
                ValuesPerBucketTarget,
                isIntegerColumn: Context.ColumnInfo.Type == DValueType.Integer);

            var histogramQ = await Context.Exec(new SingleColumnHistogram(bucketsToSample));

            var histograms = Histogram.FromQueryRows(histogramQ.Rows);

            var valueCounts = histogramQ.Rows
                              .GroupBy(
                row => row.BucketSize,
                (bs, rows) => (BucketSize: new BucketSize(bs), Rows: ValueCounts.Compute(rows)));

            return(valueCounts
                   .Join(
                       histograms,
                       v => v.BucketSize.SnappedSize,
                       h => h.BucketSize.SnappedSize,
                       (v, h) => new HistogramWithCounts(v.Rows, h))
                   .ToList());
        }
        private List <string> GenerateHeaders(IEnumerable <CohortGroup> cohortGroups)
        {
            List <string> headers = new List <string> {
                "Cohort",
                "Customers",
            };

            var maximumBuckets = cohortGroups
                                 .Max(item => item.Buckets.Count());

            for (int i = 0; i < maximumBuckets; i++)
            {
                headers
                .Add($"{BucketUtils.BucketRange(i, _settings.LifeCycleRange)} days");
            }

            return(headers);
        }