/// <summary> /// Shuffles the data randonly and then estimates the value at the given quantile. /// /// It chooses as its seed the mean of the first three items after shuffling. /// </summary> /// <param name="data">The data to analyze. /// Best results for randomly ordered, Gaussian distributed data. /// Poorest results for sorted data or data drawn from an unusual distribution.</param> /// <param name="quantileNumerator">Quantile numerator. /// For the third quartile, use three, for example.</param> /// <param name="quantileDenominator">Quantile denominator. /// For the third quartile, use four, for example. /// </param> /// <returns>The quantile value. /// For example, if quantileNumerator / quantileDenominator is one half, the median value is returned. /// </returns> public static int ShuffledEstimate(IEnumerable <int> data, double quantileNumerator = 50, double quantileDenominator = 100, Func <int, int> stepAdjuster = null) { var dataArray = data.ToArray(); if (dataArray.Length == 0) { return(0); } else if (dataArray.Length < 100) { Array.Sort(dataArray); return(dataArray[(int)Math.Round((dataArray.Length - 1) * quantileNumerator / quantileDenominator, 0)]); } else { var shuffledData = dataArray.Shuffle(); Array.Sort(shuffledData, 0, 21); var seed = shuffledData[(int)(20 * quantileNumerator / quantileDenominator)]; var probe = new FrugalQuantile(seed, quantileNumerator, quantileDenominator) { StepAdjuster = stepAdjuster ?? FrugalQuantile.LinearStepAdjuster }; var estimate = probe.AddRange(shuffledData); // Console.WriteLine($"seed = {seed}, estimate = {estimate}"); return(estimate); } }
/// <summary> /// Estimates the value at the given quantile for a collection of data. /// </summary> /// <param name="data">The data to analyze. /// Best results for randomly ordered, Gaussian distributed data. /// Poorest results for sorted data or data drawn from an unusual distribution.</param> /// <param name="quantileNumerator">Quantile numerator. /// For the third quartile, use three, for example.</param> /// <param name="quantileDenominator">Quantile denominator. /// For the third quartile, use four, for example. /// </param> /// <param name="seed">Seed value to use as initial value of the estimate.</param> /// <returns>The quantile value. /// For example, if quantileNumerator / quantileDenominator is one half, the median value is returned. /// </returns> public static int Estimate(IEnumerable <int> data, double quantileNumerator = 50, double quantileDenominator = 100, int seed = 0) { var probe = new FrugalQuantile(seed, quantileNumerator, quantileDenominator) { StepAdjuster = LinearStepAdjuster }; return(probe.AddRange(data)); }
public DimensionTransform(IEnumerable <int> values) { Median = FrugalQuantile.ShuffledEstimate( values.Select(x => { // Since FrugalQuantile is going to iterate over the data once anyways, // might as well piggyback to compute Minimum and Maximum. Minimum = Min(Minimum, x); Maximum = Max(Maximum, x); return(x); }) ); MinimumBitsRequired = BitsPerDimension = (Maximum + 1).SmallestPowerOfTwo(); }