Ejemplo n.º 1
0
        /// <summary>
        /// Shuffles the data randonly and then estimates the value at the given quantile.
        ///
        /// It chooses as its seed the mean of the first three items after shuffling.
        /// </summary>
        /// <param name="data">The data to analyze.
        /// Best results for randomly ordered, Gaussian distributed data.
        /// Poorest results for sorted data or data drawn from an unusual distribution.</param>
        /// <param name="quantileNumerator">Quantile numerator.
        /// For the third quartile, use three, for example.</param>
        /// <param name="quantileDenominator">Quantile denominator.
        /// For the third quartile, use four, for example.
        /// </param>
        /// <returns>The quantile value.
        /// For example, if quantileNumerator / quantileDenominator is one half, the median value is returned.
        /// </returns>
        public static int ShuffledEstimate(IEnumerable <int> data, double quantileNumerator = 50, double quantileDenominator = 100, Func <int, int> stepAdjuster = null)
        {
            var dataArray = data.ToArray();

            if (dataArray.Length == 0)
            {
                return(0);
            }
            else if (dataArray.Length < 100)
            {
                Array.Sort(dataArray);
                return(dataArray[(int)Math.Round((dataArray.Length - 1) * quantileNumerator / quantileDenominator, 0)]);
            }
            else
            {
                var shuffledData = dataArray.Shuffle();
                Array.Sort(shuffledData, 0, 21);
                var seed = shuffledData[(int)(20 * quantileNumerator / quantileDenominator)];

                var probe = new FrugalQuantile(seed, quantileNumerator, quantileDenominator)
                {
                    StepAdjuster = stepAdjuster ?? FrugalQuantile.LinearStepAdjuster
                };
                var estimate = probe.AddRange(shuffledData);
                // Console.WriteLine($"seed = {seed}, estimate = {estimate}");
                return(estimate);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Estimates the value at the given quantile for a collection of data.
        /// </summary>
        /// <param name="data">The data to analyze.
        /// Best results for randomly ordered, Gaussian distributed data.
        /// Poorest results for sorted data or data drawn from an unusual distribution.</param>
        /// <param name="quantileNumerator">Quantile numerator.
        /// For the third quartile, use three, for example.</param>
        /// <param name="quantileDenominator">Quantile denominator.
        /// For the third quartile, use four, for example.
        /// </param>
        /// <param name="seed">Seed value to use as initial value of the estimate.</param>
        /// <returns>The quantile value.
        /// For example, if quantileNumerator / quantileDenominator is one half, the median value is returned.
        /// </returns>
        public static int Estimate(IEnumerable <int> data, double quantileNumerator = 50, double quantileDenominator = 100, int seed = 0)
        {
            var probe = new FrugalQuantile(seed, quantileNumerator, quantileDenominator)
            {
                StepAdjuster = LinearStepAdjuster
            };

            return(probe.AddRange(data));
        }
Ejemplo n.º 3
0
 public DimensionTransform(IEnumerable <int> values)
 {
     Median = FrugalQuantile.ShuffledEstimate(
         values.Select(x =>
     {
         // Since FrugalQuantile is going to iterate over the data once anyways,
         // might as well piggyback to compute Minimum and Maximum.
         Minimum = Min(Minimum, x);
         Maximum = Max(Maximum, x);
         return(x);
     })
         );
     MinimumBitsRequired = BitsPerDimension = (Maximum + 1).SmallestPowerOfTwo();
 }