/// <summary> /// Converts to a <see cref="Distribution"/>. /// </summary> /// <returns>The distribution.</returns> public Distribution ToDistribution() { TDigest merged = new TDigest(Accuracy, Compression); foreach (TDigest dist in PerThreadDist.Values) { foreach (DistributionPoint centroid in dist.GetDistribution()) { merged.Add(centroid.Value, centroid.Count); } } if (merged.CentroidCount > Recompression_Threshold_Factor / Accuracy) { merged = Compress(merged); } var centroids = merged.GetDistribution() .Select(centroid => new KeyValuePair <double, int>( centroid.Value, (int)centroid.Count )) .ToList(); return(new Distribution(MinuteMillis, centroids)); }
/// <summary> /// Copy of TDigest's private Compress() method. /// </summary> /// <param name="digest"></param> /// <returns></returns> private static TDigest Compress(TDigest digest) { TDigest newTDigest = new TDigest(Accuracy, Compression); List <DistributionPoint> temp = digest.GetDistribution().ToList(); temp.Shuffle(); foreach (DistributionPoint centroid in temp) { newTDigest.Add(centroid.Value, centroid.Count); } return(newTDigest); }
public void GetsAllDistributionPoints() { var digest = new TDigest(); var total = new Random().Next(10, 100); for (int i = 0; i < total; i++) { digest.Add(i); } var points = digest.GetDistribution(); Assert.Equal(total, points.Length); for (int i = 0; i < total; i++) { Assert.Equal(i, points[i].Value); } }
public void TestSerialization() { Random r = new Random(); TDigest digestA = new TDigest(); for (int i = 0; i < 10000; i++) { var n = (r.Next() % 50) + (r.Next() % 50); digestA.Add(n); } byte[] s = digestA.Serialize(); TDigest digestB = new TDigest(s); var a = digestA.GetDistribution(); var b = digestB.GetDistribution(); for (int i = 0; i < a.Length; i++) { var ce = a[i].Count == b[i].Count; var me = a[i].Value == b[i].Value; Assert.IsTrue(ce && me, "Centroid means or counts are not equal after serialization"); } Assert.AreEqual(digestA.Average, digestB.Average, "Averages are not equal after serialization"); Assert.AreEqual(digestA.Count, digestB.Count, "Counts are not equal after serialization"); Assert.AreEqual(digestA.CentroidCount, digestB.CentroidCount, "Centroid Counts are not equal after serialization"); Assert.AreEqual(digestA.CompressionConstant, digestB.CompressionConstant, "Compression Constants are not equal after serialization"); Assert.AreEqual(digestA.Accuracy, digestB.Accuracy, "Accuracies are not equal after serialization"); var differences = Enumerable.Range(1, 999) .Select(n => n / 1000.0) .Where(q => digestA.Quantile(q) != digestB.Quantile(q)) .Select(q => new { q, A = digestA.Quantile(q), B = digestB.Quantile(q) }) .ToList(); var areEqual = !differences.Any(); Assert.IsTrue(areEqual, "Serialized TDigest is not the same as original"); }