public void Merge_Sparse() { const int numToMerge = 4; const int bits = 18; const int cardinality = 4000; var hlls = new HyperLogLogPlus[numToMerge]; var baseline = new HyperLogLogPlus(bits, 25); for (var i = 0; i < numToMerge; ++i) { hlls[i] = new HyperLogLogPlus(bits, 25); for (int j = 0; j < cardinality; ++j) { double val = Rnd.NextDouble(); hlls[i].OfferHashed(Hash64(val)); baseline.OfferHashed(Hash64(val)); } } const long expectedCardinality = numToMerge * cardinality; HyperLogLogPlus hll = hlls[0]; hlls = hlls.Skip(1).ToArray(); long mergedEstimate = hll.Merge(hlls).Cardinality(); double se = expectedCardinality * (1.04 / Math.Sqrt(Math.Pow(2, bits))); Console.WriteLine("Expect estimate: {0} is between {1} and {2}", mergedEstimate, expectedCardinality - (3 * se), expectedCardinality + (3 * se)); double err = Math.Abs(mergedEstimate - expectedCardinality) / (double)expectedCardinality; Console.WriteLine("Percentage error " + err); Assert.That(err, Is.LessThan(0.1)); Assert.That(mergedEstimate, Is.InRange(expectedCardinality - (3 * se), expectedCardinality + (3 * se))); }
public void MergeSelf() { int[] cardinalities = { 0, 1, 10, 100, 1000, 10000, 100000 }; uint[] ps = { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 }; uint[] sps = { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 }; foreach (int cardinality in cardinalities) { for (int j = 0; j < ps.Length; j++) { foreach (var sp in sps) { if (sp < ps[j]) { continue; } var hll = new HyperLogLogPlus(ps[j], sp); for (int l = 0; l < cardinality; l++) { hll.OfferHashed(Hash64(Rnd.Next())); } var deserialized = HyperLogLogPlus.FromBytes(hll.ToBytes()); Console.WriteLine(ps[j] + "-" + sp + ": " + cardinality + " -> " + hll.Cardinality()); Assert.That(deserialized.Cardinality(), Is.EqualTo(hll.Cardinality())); var merged = hll.Merge(deserialized); Assert.That(merged.Cardinality(), Is.EqualTo(hll.Cardinality())); } } } }
public void HighCardinalityMergeWithPool() { var sw = Stopwatch.StartNew(); var pool = HyperLogLogPlus.CreateMemPool(); var hll = new HyperLogLogPlus(18, 25, pool); var hll_t = new HyperLogLogPlus(18, 25, pool); var buf = new byte[8]; const int size = (int)100000; for (int i = 0; i < size / 100; ++i) { var hll3 = new HyperLogLogPlus(18, 25, pool); for (int j = 0; j < 100; ++j) { Rnd.NextBytes(buf); hll3.OfferHashed(Hash64(buf)); } hll_t = hll.Merge(hll3); hll.Dispose(); hll = hll_t; hll3.Dispose(); } Console.WriteLine("expected: {0}, estimate: {1}, time: {2}", size, hll.Cardinality(), sw.Elapsed); long estimate = hll.Cardinality(); double err = Math.Abs(estimate - size) / (double)size; Console.WriteLine("Percentage error: " + err); Assert.That(err, Is.LessThan(0.1)); hll.Dispose(); }
public void MergeSelf_ForceNormal() { int[] cardinalities = { 0, 1, 10, 100, 1000, 10000, 100000, 1000000 }; foreach (var cardinality in cardinalities) { for (uint j = 4; j < 24; ++j) { Console.WriteLine("p=" + j); var hll = new HyperLogLogPlus(j, 0); for (var l = 0; l < cardinality; l++) { hll.OfferHashed(Hash64(Rnd.Next())); } Console.WriteLine("hllcardinality={0} cardinality={1}", hll.Cardinality(), cardinality); var deserialized = HyperLogLogPlus.FromBytes(hll.ToBytes()); Assert.That(deserialized.Cardinality(), Is.EqualTo(hll.Cardinality())); var merged = hll.Merge(deserialized); Console.WriteLine(merged.Cardinality() + " : " + hll.Cardinality()); Assert.That(merged.Cardinality(), Is.EqualTo(hll.Cardinality())); } } }