private double ComputeCentroidQuantile(Centroid centroid) { double sum = 0; foreach (Centroid c in centroids.Values) { if (c.Mean > centroid.Mean) { break; } sum += c.Count; } double denominator = count; return((centroid.Count / 2 + sum) / denominator); }
/// <summary> /// Add a new value to the T-Digest. Note that this method is NOT thread safe. /// </summary> /// <param name="value">The value to add</param> /// <param name="weight">The relative weight associated with this value. Default is 1 for all values.</param> public void Add(double value, double weight = 1) { if (weight <= 0) { throw new ArgumentOutOfRangeException(nameof(weight), "must be greater than 0"); } var first = count == 0; count += weight; if (first) { oldAvg = value; newAvg = value; Min = value; Max = value; } else { newAvg = oldAvg + (value - oldAvg) / count; oldAvg = newAvg; Max = value > Max ? value : Max; Min = value < Min ? value : Min; } if (centroids.Count == 0) { centroids.Add(value, new Centroid(value, weight)); return; } var closest = GetClosestCentroids(value); var candidates = closest .Select(c => new { Threshold = GetThreshold(ComputeCentroidQuantile(c)), Centroid = c }) .Where(c => c.Centroid.Count + weight < c.Threshold) .ToList(); while (candidates.Count > 0 & weight > 0) { var cData = candidates[_rand.Next() % candidates.Count]; var deltaW = Math.Min(cData.Threshold - cData.Centroid.Count, weight); double oldMean; if (cData.Centroid.Update(deltaW, value, out oldMean)) { ReInsertCentroid(oldMean, cData.Centroid); } weight -= deltaW; candidates.Remove(cData); } if (weight > 0) { var toAdd = new Centroid(value, weight); if (centroids.FindOrAdd(value, ref toAdd)) { double oldMean; if (toAdd.Update(weight, toAdd.Mean, out oldMean)) { ReInsertCentroid(oldMean, toAdd); } } } if (centroids.Count > (CompressionConstant / Accuracy)) { Compress(); } }
private void ReInsertCentroid(double oldMean, Centroid c) { centroids.Remove(oldMean); centroids.Add(c.Mean, c); }
/// <summary> /// Estimates the specified quantile /// </summary> /// <param name="quantile">The quantile to estimate. Must be between 0 and 1.</param> /// <returns>The value for the estimated quantile</returns> public double Quantile(double quantile) { if (quantile < 0 || quantile > 1) { throw new ArgumentOutOfRangeException(nameof(quantile), "must be between 0 and 1"); } if (centroids.Count == 0) { throw new InvalidOperationException( "Cannot call Quantile() method until first Adding values to the digest"); } if (centroids.Count == 1) { return(centroids.First().Value.Mean); } double index = quantile * count; if (index < 1) { return(Min); } if (index > Count - 1) { return(Max); } Centroid currentNode = centroids.First().Value; Centroid lastNode = centroids.Last().Value; double currentWeight = currentNode.Count; if (Math.Abs(currentWeight - 2) < Tolerance && index <= 2) { // first node is a double weight with one sample at min, sou we can infer location of other sample return(2 * currentNode.Mean - Min); } if (Math.Abs(centroids.Last().Value.Count - 2) < Tolerance && index > Count - 2) { // likewise for last centroid return(2 * lastNode.Mean - Max); } double weightSoFar = currentWeight / 2.0; if (index < weightSoFar) { return(WeightedAvg(Min, weightSoFar - index, currentNode.Mean, index - 1)); } foreach (Centroid nextNode in centroids.Values.Skip(1)) { double nextWeight = nextNode.Count; double dw = (currentWeight + nextWeight) / 2.0; if (index < weightSoFar + dw) { double leftExclusion = 0; double rightExclusion = 0; if (Math.Abs(currentWeight - 1) < Tolerance) { if (index < weightSoFar + 0.5) { return(currentNode.Mean); } leftExclusion = 0.5; } if (Math.Abs(nextWeight - 1) < Tolerance) { if (index >= weightSoFar + dw - 0.5) { return(nextNode.Mean); } rightExclusion = 0.5; } // centroids i and i+1 bracket our current point // we interpolate, but the weights are diminished if singletons are present double weight1 = index - weightSoFar - leftExclusion; double weight2 = weightSoFar + dw - index - rightExclusion; return(WeightedAvg(currentNode.Mean, weight2, nextNode.Mean, weight1)); } weightSoFar += dw; currentNode = nextNode; currentWeight = nextWeight; } double w1 = index - weightSoFar; double w2 = Count - 1 - index; return(WeightedAvg(currentNode.Mean, w2, Max, w1)); }