public void Merge_ManySparse() { const int numToMerge = 20; const int bits = 18; const int cardinality = 10000; var hlls = new HyperLogLogPlus[numToMerge]; var baseline = new HyperLogLogPlus(bits, 25); for (int i = 0; i < numToMerge; i++) { hlls[i] = new HyperLogLogPlus(bits, 25); for (int j = 0; j < cardinality; j++) { double val = Rnd.NextDouble(); hlls[i].OfferHashed(Hash64(val)); baseline.OfferHashed(Hash64(val)); } } const long expectedCardinality = numToMerge * cardinality; var hll = hlls[0]; hlls = hlls.Skip(1).ToArray(); long mergedEstimate = hll.Merge(hlls).Cardinality(); double se = expectedCardinality * (1.04 / Math.Sqrt(Math.Pow(2, bits))); Console.WriteLine("Expect estimate: {0} is between {1} and {2}", mergedEstimate, expectedCardinality - (3 * se), expectedCardinality + (3 * se)); Assert.That(mergedEstimate, Is.InRange(expectedCardinality - (3 * se), expectedCardinality + (3 * se))); }
public void Merge_Sparse() { const int numToMerge = 4; const int bits = 18; const int cardinality = 4000; var hlls = new HyperLogLogPlus[numToMerge]; var baseline = new HyperLogLogPlus(bits, 25); for (var i = 0; i < numToMerge; ++i) { hlls[i] = new HyperLogLogPlus(bits, 25); for (int j = 0; j < cardinality; ++j) { double val = Rnd.NextDouble(); hlls[i].OfferHashed(Hash64(val)); baseline.OfferHashed(Hash64(val)); } } const long expectedCardinality = numToMerge * cardinality; HyperLogLogPlus hll = hlls[0]; hlls = hlls.Skip(1).ToArray(); long mergedEstimate = hll.Merge(hlls).Cardinality(); double se = expectedCardinality * (1.04 / Math.Sqrt(Math.Pow(2, bits))); Console.WriteLine("Expect estimate: {0} is between {1} and {2}", mergedEstimate, expectedCardinality - (3 * se), expectedCardinality + (3 * se)); double err = Math.Abs(mergedEstimate - expectedCardinality) / (double)expectedCardinality; Console.WriteLine("Percentage error " + err); Assert.That(err, Is.LessThan(0.1)); Assert.That(mergedEstimate, Is.InRange(expectedCardinality - (3 * se), expectedCardinality + (3 * se))); }