Example #1
0
        /// <summary>
        /// Estimates the specified quantile
        /// </summary>
        /// <param name="quantile">The quantile to esimtate. Must be between 0 and 1.</param>
        /// <returns>The value for the estimated quantile</returns>
        public double Quantile(double quantile)
        {
            if (quantile < 0 || quantile > 1)
            {
                throw new ArgumentOutOfRangeException("Quantile must be between 0 and 1");
            }

            if (_centroids.Count == 0)
            {
                throw new InvalidOperationException("Cannot call Quantile() method until first Adding values to the digest");
            }

            if (_centroids.Count == 1)
            {
                return(_centroids.First().Value.Mean);
            }

            int      i    = 0;
            double   t    = 0;
            double   q    = quantile * _count;
            Centroid last = null;

            foreach (Centroid centroid in _centroids.Values)
            {
                last = centroid;
                double k = centroid.Count;

                if (q < t + k)
                {
                    double delta;
                    if (i == 0)
                    {
                        Centroid successor = _centroids.Successor(centroid.Mean).Value;
                        delta = successor.Mean - centroid.Mean;
                    }
                    else if (i == _centroids.Count - 1)
                    {
                        Centroid predecessor = _centroids.Predecessor(centroid.Mean).Value;
                        delta = centroid.Mean - predecessor.Mean;
                    }
                    else
                    {
                        Centroid successor   = _centroids.Successor(centroid.Mean).Value;
                        Centroid predecessor = _centroids.Predecessor(centroid.Mean).Value;
                        delta = (successor.Mean - predecessor.Mean) / 2;
                    }

                    double estimated = centroid.Mean + ((q - t) / k - .5) * delta;

                    // If estimated value is higher than max, return max
                    return(Math.Min(estimated, this.Max));
                }

                t += k;
                i++;
            }

            return(last.Mean);
        }
Example #2
0
        private double ComputeCentroidQuantile(Centroid centroid)
        {
            double sum = 0;

            foreach (Centroid c in _centroids.Values)
            {
                if (c.Mean > centroid.Mean)
                {
                    break;
                }
                sum += c.Count;
            }

            double denom = _count;

            return((centroid.Count / 2 + sum) / denom);
        }
Example #3
0
        private void ReInsertCentroid(double oldMean, Centroid c)
        {
            var ret = _centroids.Remove(oldMean);

            _centroids.Add(c.Mean, c);
        }
Example #4
0
        /// <summary>
        /// Estimates the specified quantile
        /// </summary>
        /// <param name="quantile">The quantile to esimtate. Must be between 0 and 1.</param>
        /// <returns>The value for the estimated quantile</returns>
        public double Quantile(double quantile)
        {
            if (quantile < 0 || quantile > 1)
            {
                throw new ArgumentOutOfRangeException("Quantile must be between 0 and 1");
            }

            if (_centroids.Count == 0)
            {
                throw new InvalidOperationException("Cannot call Quantile() method until first Adding values to the digest");
            }

            if (_centroids.Count == 1)
            {
                return(_centroids.First().Value.Mean);
            }

            double index = quantile * _count;

            if (index < 1)
            {
                return(Min);
            }
            if (index > Count - 1)
            {
                return(Max);
            }

            Centroid currentNode   = _centroids.First().Value;
            Centroid lastNode      = _centroids.Last().Value;
            double   currentWeight = currentNode.Count;

            if (currentWeight == 2 && index <= 2)
            {
                // first node is a double weight with one sample at min, sou we can infer location of other sample
                return(2 * currentNode.Mean - Min);
            }

            if (_centroids.Last().Value.Count == 2 && index > Count - 2)
            {
                // likewise for last centroid
                return(2 * lastNode.Mean - Max);
            }

            double weightSoFar = currentWeight / 2.0;

            if (index < weightSoFar)
            {
                return(WeightedAvg(Min, weightSoFar - index, currentNode.Mean, index - 1));
            }

            foreach (Centroid nextNode in _centroids.Values.Skip(1))
            {
                double nextWeight = nextNode.Count;
                double dw         = (currentWeight + nextWeight) / 2.0;

                if (index < weightSoFar + dw)
                {
                    double leftExclusion  = 0;
                    double rightExclusion = 0;
                    if (currentWeight == 1)
                    {
                        if (index < weightSoFar + 0.5)
                        {
                            return(currentNode.Mean);
                        }
                        else
                        {
                            leftExclusion = 0.5;
                        }
                    }
                    if (nextWeight == 1)
                    {
                        if (index >= weightSoFar + dw - 0.5)
                        {
                            return(nextNode.Mean);
                        }
                        else
                        {
                            rightExclusion = 0.5;
                        }
                    }
                    // centroids i and i+1 bracket our current point
                    // we interpolate, but the weights are diminished if singletons are present
                    double weight1 = index - weightSoFar - leftExclusion;
                    double weight2 = weightSoFar + dw - index - rightExclusion;
                    return(WeightedAvg(currentNode.Mean, weight2, nextNode.Mean, weight1));
                }

                weightSoFar  += dw;
                currentNode   = nextNode;
                currentWeight = nextWeight;
            }

            double w1 = index - weightSoFar;
            double w2 = Count - 1 - index;

            return(WeightedAvg(currentNode.Mean, w2, Max, w1));
        }
Example #5
0
        /// <summary>
        /// Add a new value to the T-Digest. Note that this method is NOT thread safe.
        /// </summary>
        /// <param name="value">The value to add</param>
        /// <param name="weight">The relative weight associated with this value. Default is 1 for all values.</param>
        public void Add(double value, double weight = 1)
        {
            if (weight <= 0)
            {
                throw new ArgumentOutOfRangeException("Weight must be greater than 0");
            }

            var first = _count == 0;

            _count += weight;

            if (first)
            {
                _oldAvg = value;
                _newAvg = value;
                Min     = value;
                Max     = value;
            }
            else
            {
                _newAvg = _oldAvg + (value - _oldAvg) / _count;
                _oldAvg = _newAvg;
                Max     = value > Max ? value : Max;
                Min     = value < Min ? value : Min;
            }

            if (_centroids.Count == 0)
            {
                _centroids.Add(value, new Centroid(value, weight));
                return;
            }

            var closest = GetClosestCentroids(value);

            var candidates = closest
                             .Select(c => new {
                Threshold = GetThreshold(ComputeCentroidQuantile(c)),
                Centroid  = c
            })
                             .Where(c => c.Centroid.Count + weight < c.Threshold)
                             .ToList();

            while (candidates.Count > 0 & weight > 0)
            {
                var cData   = candidates[_rand.Next() % candidates.Count];
                var delta_w = Math.Min(cData.Threshold - cData.Centroid.Count, weight);

                double oldMean;
                if (cData.Centroid.Update(delta_w, value, out oldMean))
                {
                    ReInsertCentroid(oldMean, cData.Centroid);
                }

                weight -= delta_w;
                candidates.Remove(cData);
            }

            if (weight > 0)
            {
                var toAdd = new Centroid(value, weight);

                if (_centroids.FindOrAdd(value, ref toAdd))
                {
                    double oldMean;

                    if (toAdd.Update(weight, toAdd.Mean, out oldMean))
                    {
                        ReInsertCentroid(oldMean, toAdd);
                    }
                }
            }

            if (_centroids.Count > (CompressionConstant / Accuracy))
            {
                Compress();
            }
        }