/// <summary> /// Estimates the specified quantile /// </summary> /// <param name="quantile">The quantile to esimtate. Must be between 0 and 1.</param> /// <returns>The value for the estimated quantile</returns> public double Quantile(double quantile) { if (quantile < 0 || quantile > 1) { throw new ArgumentOutOfRangeException("Quantile must be between 0 and 1"); } if (_centroids.Count == 0) { throw new InvalidOperationException("Cannot call Quantile() method until first Adding values to the digest"); } if (_centroids.Count == 1) { return(_centroids.First().Value.Mean); } int i = 0; double t = 0; double q = quantile * _count; Centroid last = null; foreach (Centroid centroid in _centroids.Values) { last = centroid; double k = centroid.Count; if (q < t + k) { double delta; if (i == 0) { Centroid successor = _centroids.Successor(centroid.Mean).Value; delta = successor.Mean - centroid.Mean; } else if (i == _centroids.Count - 1) { Centroid predecessor = _centroids.Predecessor(centroid.Mean).Value; delta = centroid.Mean - predecessor.Mean; } else { Centroid successor = _centroids.Successor(centroid.Mean).Value; Centroid predecessor = _centroids.Predecessor(centroid.Mean).Value; delta = (successor.Mean - predecessor.Mean) / 2; } double estimated = centroid.Mean + ((q - t) / k - .5) * delta; // If estimated value is higher than max, return max return(Math.Min(estimated, this.Max)); } t += k; i++; } return(last.Mean); }
private double ComputeCentroidQuantile(Centroid centroid) { double sum = 0; foreach (Centroid c in _centroids.Values) { if (c.Mean > centroid.Mean) { break; } sum += c.Count; } double denom = _count; return((centroid.Count / 2 + sum) / denom); }
private void ReInsertCentroid(double oldMean, Centroid c) { var ret = _centroids.Remove(oldMean); _centroids.Add(c.Mean, c); }
/// <summary> /// Estimates the specified quantile /// </summary> /// <param name="quantile">The quantile to esimtate. Must be between 0 and 1.</param> /// <returns>The value for the estimated quantile</returns> public double Quantile(double quantile) { if (quantile < 0 || quantile > 1) { throw new ArgumentOutOfRangeException("Quantile must be between 0 and 1"); } if (_centroids.Count == 0) { throw new InvalidOperationException("Cannot call Quantile() method until first Adding values to the digest"); } if (_centroids.Count == 1) { return(_centroids.First().Value.Mean); } double index = quantile * _count; if (index < 1) { return(Min); } if (index > Count - 1) { return(Max); } Centroid currentNode = _centroids.First().Value; Centroid lastNode = _centroids.Last().Value; double currentWeight = currentNode.Count; if (currentWeight == 2 && index <= 2) { // first node is a double weight with one sample at min, sou we can infer location of other sample return(2 * currentNode.Mean - Min); } if (_centroids.Last().Value.Count == 2 && index > Count - 2) { // likewise for last centroid return(2 * lastNode.Mean - Max); } double weightSoFar = currentWeight / 2.0; if (index < weightSoFar) { return(WeightedAvg(Min, weightSoFar - index, currentNode.Mean, index - 1)); } foreach (Centroid nextNode in _centroids.Values.Skip(1)) { double nextWeight = nextNode.Count; double dw = (currentWeight + nextWeight) / 2.0; if (index < weightSoFar + dw) { double leftExclusion = 0; double rightExclusion = 0; if (currentWeight == 1) { if (index < weightSoFar + 0.5) { return(currentNode.Mean); } else { leftExclusion = 0.5; } } if (nextWeight == 1) { if (index >= weightSoFar + dw - 0.5) { return(nextNode.Mean); } else { rightExclusion = 0.5; } } // centroids i and i+1 bracket our current point // we interpolate, but the weights are diminished if singletons are present double weight1 = index - weightSoFar - leftExclusion; double weight2 = weightSoFar + dw - index - rightExclusion; return(WeightedAvg(currentNode.Mean, weight2, nextNode.Mean, weight1)); } weightSoFar += dw; currentNode = nextNode; currentWeight = nextWeight; } double w1 = index - weightSoFar; double w2 = Count - 1 - index; return(WeightedAvg(currentNode.Mean, w2, Max, w1)); }
/// <summary> /// Add a new value to the T-Digest. Note that this method is NOT thread safe. /// </summary> /// <param name="value">The value to add</param> /// <param name="weight">The relative weight associated with this value. Default is 1 for all values.</param> public void Add(double value, double weight = 1) { if (weight <= 0) { throw new ArgumentOutOfRangeException("Weight must be greater than 0"); } var first = _count == 0; _count += weight; if (first) { _oldAvg = value; _newAvg = value; Min = value; Max = value; } else { _newAvg = _oldAvg + (value - _oldAvg) / _count; _oldAvg = _newAvg; Max = value > Max ? value : Max; Min = value < Min ? value : Min; } if (_centroids.Count == 0) { _centroids.Add(value, new Centroid(value, weight)); return; } var closest = GetClosestCentroids(value); var candidates = closest .Select(c => new { Threshold = GetThreshold(ComputeCentroidQuantile(c)), Centroid = c }) .Where(c => c.Centroid.Count + weight < c.Threshold) .ToList(); while (candidates.Count > 0 & weight > 0) { var cData = candidates[_rand.Next() % candidates.Count]; var delta_w = Math.Min(cData.Threshold - cData.Centroid.Count, weight); double oldMean; if (cData.Centroid.Update(delta_w, value, out oldMean)) { ReInsertCentroid(oldMean, cData.Centroid); } weight -= delta_w; candidates.Remove(cData); } if (weight > 0) { var toAdd = new Centroid(value, weight); if (_centroids.FindOrAdd(value, ref toAdd)) { double oldMean; if (toAdd.Update(weight, toAdd.Mean, out oldMean)) { ReInsertCentroid(oldMean, toAdd); } } } if (_centroids.Count > (CompressionConstant / Accuracy)) { Compress(); } }