private double GetAvgPercentileError(IList <double> all, TDigest digest) { return(Enumerable.Range(1, 999) .Select(n => n / 1000.0) .Select(q => Math.Abs(all.Quantile(q) - digest.Quantile(q))) .Average()); }
public void NegativeMax() { var digest = new TDigest(); digest.Add(-1); Assert.Equal(-1, digest.Max); }
public void TestUniformDistribution() { Random r = new Random(); TDigest digest = new TDigest(); List <double> actual = new List <double>(); for (int i = 0; i < 50000; i++) { var v = r.NextDouble(); digest.Add(v); actual.Add(v); } actual.Sort(); Assert.AreEqual(50000, actual.Count); Assert.AreEqual(50000, digest.Count); Assert.IsTrue(GetAvgError(actual, digest) < .01); Assert.IsTrue(MaxIsEqual(actual, digest)); Assert.IsTrue(MinIsEqual(actual, digest)); var avgError = GetAvgPercentileError(actual, digest); Assert.IsTrue(avgError < .0005); }
/// <summary> /// Converts to a <see cref="Distribution"/>. /// </summary> /// <returns>The distribution.</returns> public Distribution ToDistribution() { TDigest merged = new TDigest(Accuracy, Compression); foreach (TDigest dist in PerThreadDist.Values) { foreach (DistributionPoint centroid in dist.GetDistribution()) { merged.Add(centroid.Value, centroid.Count); } } if (merged.CentroidCount > Recompression_Threshold_Factor / Accuracy) { merged = Compress(merged); } var centroids = merged.GetDistribution() .Select(centroid => new KeyValuePair <double, int>( centroid.Value, (int)centroid.Count )) .ToList(); return(new Distribution(MinuteMillis, centroids)); }
/// <summary> /// Gets a <see cref="Snapshot" /> of the contents of the histogram prior to the current /// minute. /// </summary> /// <returns>The snapshot.</returns> public Snapshot GetSnapshot() { var snapshot = new TDigest(Accuracy, Compression); globalHistogramBinsLock.EnterUpgradeableReadLock(); try { foreach (var bin in GetGlobalHistogramBinsList()) { foreach (var dist in bin.PerThreadDist.Values) { foreach (var centroid in dist.GetDistribution()) { snapshot.Add(centroid.Value, centroid.Count); } } } } finally { globalHistogramBinsLock.ExitUpgradeableReadLock(); } return(new Snapshot(snapshot)); }
public void TestEdgeCases() { // No elements TDigest digest = new TDigest(); try { digest.Quantile(.5); Assert.Fail("Didn't throw exception when quantile() called before adding any elements"); } catch (InvalidOperationException) { } // one element digest.Add(50); var v = digest.Quantile(.5); Assert.AreEqual(50, v); v = digest.Quantile(0); Assert.AreEqual(50, v); v = digest.Quantile(1); Assert.AreEqual(50, v); // Two elements digest.Add(100); v = digest.Quantile(1); Assert.AreEqual(100, v); }
public void Setup() { if (this._dataFewLow != null) { return; } Random rand = new Random(1234567); this._dataFewLow = Enumerable.Range(0, _few).Select(x => (double)rand.Next(_few)).ToArray(); this._dataManyLow = Enumerable.Range(0, _many).Select(x => (double)rand.Next(_few)).ToArray(); this._dataFewHigh = Enumerable.Range(0, _few).Select(x => (double)rand.Next(_many)).ToArray(); this._dataManyHigh = Enumerable.Range(0, _many).Select(x => (double)rand.Next(_many)).ToArray(); this._dataFewLowL = this._dataFewLow.Select(x => (long)x).ToArray(); this._dataManyLowL = this._dataManyLow.Select(x => (long)x).ToArray(); this._dataFewHighL = this._dataFewHigh.Select(x => (long)x).ToArray(); this._dataManyHighL = this._dataManyHigh.Select(x => (long)x).ToArray(); this._p2_95Fast = new PsquareSinglePercentileAlgorithmBuilder(95, Precision.LessPreciseAndFaster); this._p2_99Fast = new PsquareSinglePercentileAlgorithmBuilder(99, Precision.LessPreciseAndFaster); this._p2_95Normal = new PsquareSinglePercentileAlgorithmBuilder(95, Precision.Normal); this._p2_99Normal = new PsquareSinglePercentileAlgorithmBuilder(99, Precision.Normal); this._ckms_95lowPrec = new ConstantErrorBasicCKMSBuilder(0.001, new double[] { 95 }); this._ckms_95highPrec = new ConstantErrorBasicCKMSBuilder(0.000001, new double[] { 95 }); this._hdr_low = new IntHistogram(Int32.MaxValue / 2, 0); this._hdr_high = new IntHistogram(Int32.MaxValue, 0); this._tDigest = new TDigest(); }
public void MinValue() { var digest = new TDigest(); for (var i = 100; i > 0; i--) { digest.Add(i); Assert.Equal(i, digest.Min); } }
public void MaxValue() { var digest = new TDigest(); for (var i = 0; i < 100; i++) { digest.Add(i); Assert.Equal(i, digest.Max); } }
public void Count() { var digest = new TDigest(); for (var i = 0; i < 100; i++) { digest.Add(0); Assert.Equal(i + 1, digest.Count); } }
/// <summary> /// Adds a set of centroids to the thread-local <see cref="TDigest"/> distribution. /// </summary> /// <param name="threadId">The thread id.</param> /// <param name="means">The centroid values.</param> /// <param name="counts">The centroid weights/sample counts.</param> public void BulkUpdateByThreadId(int threadId, IList <double> means, IList <int> counts) { if (means != null && counts != null) { TDigest dist = GetDistByThreadId(threadId); for (int i = 0; i < Math.Min(means.Count, counts.Count); ++i) { dist.Add(means[i], counts[i]); } } }
/// <summary> /// Copy of TDigest's private Compress() method. /// </summary> /// <param name="digest"></param> /// <returns></returns> private static TDigest Compress(TDigest digest) { TDigest newTDigest = new TDigest(Accuracy, Compression); List <DistributionPoint> temp = digest.GetDistribution().ToList(); temp.Shuffle(); foreach (DistributionPoint centroid in temp) { newTDigest.Add(centroid.Value, centroid.Count); } return(newTDigest); }
public int TDigest() { int res = 0; for (int i = 0; i < _allocCount; i++) { var obj = new TDigest(); obj.Add(1d); obj.Add(2d); res += obj.GetHashCode(); } return(res); }
public void Average() { var digest = new TDigest(); var total = 0.0; for (var i = 100; i > 0; i--) { var value = new Random().NextDouble(); total += value; digest.Add(value); } Assert.Equal(total / 100, digest.Average, 10); }
public void Quantile() { var digest = new TDigest(); for (var i = 0; i <= 9; i++) { digest.Add(i); } Assert.Equal(2, digest.Quantile(.29999999)); Assert.Equal(3, digest.Quantile(.3)); Assert.Equal(3, digest.Quantile(.39999999)); Assert.Equal(8, digest.Quantile(.8)); Assert.Equal(9, digest.Quantile(.99999999)); }
public void TestFixForNegativeQuantileBug() { var r = new Random(); var numbers = new List <int>(); var digest = new TDigest(); for (var i = 0; i < 10 * 1000; i++) { var n = r.NextDouble() < 0.001 ? 10001 : r.Next(0, 100); digest.Add(n); numbers.Add(n); var q99 = digest.Quantile(0.99); Assert.IsTrue(q99 >= 0, string.Format("q99: {0}, numbers: {1}", q99, string.Join(",", numbers))); } }
public void GetsAllDistributionPoints() { var digest = new TDigest(); var total = new Random().Next(10, 100); for (int i = 0; i < total; i++) { digest.Add(i); } var points = digest.GetDistribution(); Assert.Equal(total, points.Length); for (int i = 0; i < total; i++) { Assert.Equal(i, points[i].Value); } }
public void TestMerge() { Random r = new Random(); TDigest digestA = new TDigest(); TDigest digestAll = new TDigest(); List <double> actual = new List <double>(); for (int i = 0; i < 10000; i++) { var n = (r.Next() % 50) + (r.Next() % 50); digestA.Add(n); digestAll.Add(n); actual.Add(n); } TDigest digestB = new TDigest(); List <double> actualB = new List <double>(); for (int i = 0; i < 10000; i++) { var n = (r.Next() % 100) + (r.Next() % 100); digestB.Add(n); digestAll.Add(n); actual.Add(n); } actual.Sort(); var merged = TDigest.Merge(digestA, digestB); Assert.AreEqual(actual.Count, merged.Count); var avgError = GetAvgPercentileError(actual, merged); Assert.IsTrue(avgError < .5); var trueAvg = actual.Average(); var deltaAvg = Math.Abs(digestAll.Average - merged.Average); Assert.IsTrue(deltaAvg < .01); }
public void TestSequential() { Random r = new Random(); TDigest digest = new TDigest(.01); List <double> actual = new List <double>(); for (int i = 0; i < 10000; i++) { digest.Add(i); actual.Add(i); } actual.Sort(); Assert.IsTrue(GetAvgError(actual, digest) < .01); Assert.IsTrue(MaxIsEqual(actual, digest)); Assert.IsTrue(MinIsEqual(actual, digest)); var avgError = GetAvgPercentileError(actual, digest); Assert.IsTrue(avgError < 5); }
public void TestSerialization() { Random r = new Random(); TDigest digestA = new TDigest(); for (int i = 0; i < 10000; i++) { var n = (r.Next() % 50) + (r.Next() % 50); digestA.Add(n); } byte[] s = digestA.Serialize(); TDigest digestB = new TDigest(s); var a = digestA.GetDistribution(); var b = digestB.GetDistribution(); for (int i = 0; i < a.Length; i++) { var ce = a[i].Count == b[i].Count; var me = a[i].Value == b[i].Value; Assert.IsTrue(ce && me, "Centroid means or counts are not equal after serialization"); } Assert.AreEqual(digestA.Average, digestB.Average, "Averages are not equal after serialization"); Assert.AreEqual(digestA.Count, digestB.Count, "Counts are not equal after serialization"); Assert.AreEqual(digestA.CentroidCount, digestB.CentroidCount, "Centroid Counts are not equal after serialization"); Assert.AreEqual(digestA.CompressionConstant, digestB.CompressionConstant, "Compression Constants are not equal after serialization"); Assert.AreEqual(digestA.Accuracy, digestB.Accuracy, "Accuracies are not equal after serialization"); var differences = Enumerable.Range(1, 999) .Select(n => n / 1000.0) .Where(q => digestA.Quantile(q) != digestB.Quantile(q)) .Select(q => new { q, A = digestA.Quantile(q), B = digestB.Quantile(q) }) .ToList(); var areEqual = !differences.Any(); Assert.IsTrue(areEqual, "Serialized TDigest is not the same as original"); }
static void Main(string[] args) { Random r = new Random(); TDigest digestA = new TDigest(); TDigest digestAll = new TDigest(); List <double> actual = new List <double>(); for (int i = 0; i < 10000; i++) { var n = (r.Next() % 50) + (r.Next() % 50); digestA.Add(n); //digestAll.Add(n); actual.Add(n); } TDigest digestB = new TDigest(); List <double> actualB = new List <double>(); for (int i = 0; i < 10000; i++) { var n = (r.Next() % 100) + (r.Next() % 100); digestB.Add(n); digestAll.Add(n); actual.Add(n); } actual.Sort(); var merged = TDigest.Merge(digestA, digestB); //Debug.Assert.AreEqual(actual.Count, merged.Count); //var avgError = GetAvgError(actual, merged); //Assert.IsTrue(avgError < .5); var trueAvg = actual.Average(); var deltaAvg = Math.Abs(digestAll.Average - merged.Average); }
public void QuantileEdges() { var digest = new TDigest(); var min = Double.MaxValue; var max = Double.MinValue; for (var i = 100; i > 0; i--) { var value = new Random().NextDouble(); if (value > max) { max = value; } if (value < min) { min = value; } digest.Add(value); } Assert.Equal(min, digest.Quantile(0.001)); Assert.Equal(max, digest.Quantile(1)); }
public void TestNormalDistribution() { Random r = new Random(); TDigest digest = new TDigest(); List <double> actual = new List <double>(); for (int i = 0; i < 10000; i++) { var n = (r.Next() % 100) + (r.Next() % 100); digest.Add(n); actual.Add(n); } actual.Sort(); var z = digest.Quantile(0); Assert.IsTrue(GetAvgError(actual, digest) < .01); Assert.IsTrue(MaxIsEqual(actual, digest)); Assert.IsTrue(MinIsEqual(actual, digest)); var avgError = GetAvgPercentileError(actual, digest); Assert.IsTrue(avgError < .5); }
public void WeightMustBePositive() { var digest = new TDigest(); Assert.Throws <ArgumentOutOfRangeException>(() => digest.Add(1, -1)); }
internal Snapshot(TDigest distribution) { this.distribution = distribution; }
private double GetAvgError(IList <double> actual, TDigest digest) { return(Math.Abs(actual.Average() - digest.Average)); }
private bool MaxIsEqual(IList <double> actual, TDigest digest) { return(actual.Max() == digest.Max); }
private bool MinIsEqual(IList <double> actual, TDigest digest) { return(actual.Min() == digest.Min); }