private void CompareHLL(CardinalityEstimator hll1, CardinalityEstimator hll2) { CardinalityEstimatorState data = hll1.GetState(); CardinalityEstimatorState data2 = hll2.GetState(); Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex); Assert.AreEqual(data.IsSparse, data2.IsSparse); Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null)); Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) || (data.LookupSparse == null && data2.LookupSparse == null)); Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null)); if (data.DirectCount != null) { // DirectCount are subsets of each-other => they are the same set Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount)); } if (data.LookupSparse != null) { Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse)); } if (data.LookupDense != null) { Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense)); } }
private void TestDeserializer2(int cardinality) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality); CardinalityEstimator hll2; var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { using (var bw = new BinaryWriter(memoryStream)) { serializer.Write(bw, hll); } results = memoryStream.ToArray(); } using (var memoryStream = new MemoryStream(results)) { using (var br = new BinaryReader(memoryStream)) { hll2 = serializer.Read(br); } } CompareHLL(hll, hll2); }
public void TestSerializerCardinality1000() { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll, false); results = memoryStream.ToArray(); } CardinalityEstimatorState data = hll.GetState(); // Expected length is: // 4 bytes for the major and minor versions // 1 byte for the HashFunctionId // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in lookupSparse // 2+1 bytes for each element (ulong) in lookupSparse // 8 bytes for CountAdded Assert.AreEqual(22 + 3 * data.LookupSparse.Count, results.Length); Assert.AreEqual((byte)HashFunctionId.Murmur3, results.Skip(4).Take(1).First()); Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(5).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.AreEqual(2, results[9]); // IsSparse = true AND IsDirectCount = false Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(10).Take(4).ToArray(), 0)); Assert.AreEqual(1000UL, BitConverter.ToUInt64(results.Skip(14 + 3 * data.LookupSparse.Count).Take(8).ToArray(), 0)); // CountAdditions = 1000 }
public void ReportAccuracy() { var hll = new CardinalityEstimator(); double maxError = 0; var worstMember = 0; var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < 10000000; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); if (i % 1007 == 0) // just some interval to sample error at, can be any number { double error = (hll.Count() - (double)(i + 1)) / ((double)i + 1); if (error > maxError) { maxError = error; worstMember = i + 1; } } } Console.WriteLine("Worst: {0}", worstMember); Console.WriteLine("Max error: {0}", maxError); Assert.True(true); }
public void TestSerializerCardinality1000() { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll); results = memoryStream.ToArray(); } CardinalityEstimatorState data = hll.GetState(); // Expected length is 2908: // 4 bytes for the major and minor versions // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in lookupSparse // 2+1 bytes for each element (ulong) in lookupSparse Assert.AreEqual(13 + 3 * data.LookupSparse.Count, results.Length); Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(4).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.AreEqual(2, results[8]); // IsSparse = true AND IsDirectCount = false Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(9).Take(4).ToArray(), 0)); }
public void DirectCountingIsResetWhenMergingAlmostFullEstimators() { var addedEstimator = new CardinalityEstimator(); var mergedEstimator = new CardinalityEstimator(); for (int i = 0; i < 10_000; i++) { var guid = Guid.NewGuid().ToString(); addedEstimator.Add(guid); // Simulate some intermediate estimators being merged together var temporaryEstimator = new CardinalityEstimator(); temporaryEstimator.Add(guid); mergedEstimator.Merge(temporaryEstimator); } var serializer = new CardinalityEstimatorSerializer(); var stream1 = new MemoryStream(); serializer.Serialize(stream1, addedEstimator, true); var stream2 = new MemoryStream(); serializer.Serialize(stream2, mergedEstimator, true); Assert.Equal(stream1.Length, stream2.Length); }
public void TestSerializerCardinality10() { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(10); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll); results = memoryStream.ToArray(); } // Expected length is 93: // 4 bytes for the major and minor versions // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in DirectCount // 8 bytes for each element (ulong) in DirectCount Assert.AreEqual(93, results.Length); Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(4).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.AreEqual(3, results[8]); // IsSparse = true AND IsDirectCount = true Assert.AreEqual(10, BitConverter.ToInt32(results.Skip(9).Take(4).ToArray(), 0)); // Count = 10 }
/// <summary> /// Gets the number of indexing bits required to produce a given standard error /// </summary> /// <param name="stdError"> /// Standard error, which determines accuracy and memory consumption. For large cardinalities, the observed error is usually less than /// 3 * <paramref name="stdError" />. /// </param> /// <returns></returns> private static int GetAccuracyInBits(double stdError) { double sqrtm = 1.04 / stdError; var b = (int)Math.Ceiling(CardinalityEstimator.Log2(sqrtm * sqrtm)); return(b); }
private void TestSerializerCreatesSmallerData(int cardinality, out int customSize, out int defaultSize) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality); var customSerializer = new CardinalityEstimatorSerializer(); byte[] customSerializerResults; using (var memoryStream = new MemoryStream()) { customSerializer.Serialize(memoryStream, hll, false); customSerializerResults = memoryStream.ToArray(); customSize = customSerializerResults.Length; } var binaryFormatter = new BinaryFormatter(); byte[] defaultSerializerResults; using (var memoryStream = new MemoryStream()) { binaryFormatter.Serialize(memoryStream, hll); defaultSerializerResults = memoryStream.ToArray(); defaultSize = defaultSerializerResults.Length; } Assert.IsTrue(customSerializerResults.Length <= defaultSerializerResults.Length); }
public static byte[] SerializeCardinality(this CardinalityEstimator estimator, CardinalityEstimatorSerializer serializer) { using (var stream = new MemoryStream()) { serializer.Serialize(stream, estimator); return(stream.ToArray()); } }
public JsonResult Get() { ICardinalityEstimator <string> estimator = new CardinalityEstimator(); foreach (Fingerprint finger in _counterRepo.GetFingerprints()) { estimator.Add(finger.Hash); } return(new JsonResult(new { Clicks = estimator.Count() })); }
public void StaticMergeHandlesNullElements() { const int expectedBitsPerIndex = 11; var estimators = new List <CardinalityEstimator> { null, new CardinalityEstimator(expectedBitsPerIndex, HashFunctionId.Fnv1A), null }; CardinalityEstimator result = CardinalityEstimator.Merge(estimators); Assert.NotNull(result); Assert.Equal(expectedBitsPerIndex, result.GetState().BitsPerIndex); }
public void TestGetSigma() { // simulate a 64 bit hash and 14 bits for indexing const int bitsToCount = 64 - 14; Assert.Equal(51, CardinalityEstimator.GetSigma(0, bitsToCount)); Assert.Equal(50, CardinalityEstimator.GetSigma(1, bitsToCount)); Assert.Equal(47, CardinalityEstimator.GetSigma(8, bitsToCount)); Assert.Equal(1, CardinalityEstimator.GetSigma((ulong)(Math.Pow(2, bitsToCount) - 1), bitsToCount)); Assert.Equal(51, CardinalityEstimator.GetSigma((ulong)(Math.Pow(2, bitsToCount + 1)), bitsToCount)); }
public void SerializerCanDeserializeVersion2Point0() { var serializer = new CardinalityEstimatorSerializer(); CardinalityEstimator hllDirect = serializer.Deserialize(new MemoryStream(Resources.serializedDirect_v2_0)); CardinalityEstimator hllSparse = serializer.Deserialize(new MemoryStream(Resources.serializedSparse_v2_0)); CardinalityEstimator hllDense = serializer.Deserialize(new MemoryStream(Resources.serializedDense_v2_0)); Assert.AreEqual(50UL, hllDirect.Count()); Assert.AreEqual(151UL, hllSparse.Count()); Assert.AreEqual(5009UL, hllDense.Count()); }
private CardinalityEstimator CreateAndFillCardinalityEstimator(int cardinality = 1000000, int bits = 14) { var hll = new CardinalityEstimator(bits); var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < cardinality; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); } return(hll); }
/// <summary> /// Generates <paramref name="expectedCount" /> random (or sequential) elements and adds them to CardinalityEstimators, then asserts that /// the observed error rate is no more than <paramref name="maxAcceptedError" /> /// </summary> /// <param name="stdError">Expected standard error of the estimators (upper bound)</param> /// <param name="expectedCount">number of elements to generate in total</param> /// <param name="maxAcceptedError">Maximum allowed error rate. Default is 4 times <paramref name="stdError" /></param> /// <param name="numHllInstances">Number of estimators to create. Generated elements will be assigned to one of the estimators at random</param> /// <param name="sequential">When false, elements will be generated at random. When true, elements will be 0,1,2...</param> private void RunTest(double stdError, long expectedCount, double?maxAcceptedError = null, int numHllInstances = 1, bool sequential = false) { maxAcceptedError = maxAcceptedError ?? 4 * stdError; // should fail once in A LOT of runs int b = GetAccuracyInBits(stdError); var runStopwatch = new Stopwatch(); long gcMemoryAtStart = GetGcMemory(); // init HLLs var hlls = new CardinalityEstimator[numHllInstances]; for (var i = 0; i < numHllInstances; i++) { hlls[i] = new CardinalityEstimator(b); } var nextMember = new byte[ElementSizeInBytes]; runStopwatch.Start(); for (long i = 0; i < expectedCount; i++) { // pick random hll, add member int chosenHll = Rand.Next(numHllInstances); if (sequential) { hlls[chosenHll].Add(i); } else { Rand.NextBytes(nextMember); hlls[chosenHll].Add(nextMember); } } runStopwatch.Stop(); ReportMemoryCost(gcMemoryAtStart); // done here so references can't be GC'ed yet // Merge CardinalityEstimator mergedHll = CardinalityEstimator.Merge(hlls); Console.WriteLine("Run time: {0}", runStopwatch.Elapsed); Console.WriteLine("Expected {0}, got {1}", expectedCount, mergedHll.Count()); double obsError = Math.Abs(mergedHll.Count() / (double)(expectedCount) - 1.0); Console.WriteLine("StdErr: {0}. Observed error: {1}", stdError, obsError); Assert.True(obsError <= maxAcceptedError, string.Format("Observed error was over {0}", maxAcceptedError)); Console.WriteLine(); }
public void StaticMergeTest() { const int expectedBitsPerIndex = 11; var estimators = new CardinalityEstimator[10]; for (var i = 0; i < estimators.Length; i++) { estimators[i] = new CardinalityEstimator(expectedBitsPerIndex); estimators[i].Add(Rand.Next()); } CardinalityEstimator merged = CardinalityEstimator.Merge(estimators); Assert.Equal(10UL, merged.Count()); Assert.Equal(expectedBitsPerIndex, merged.GetState().BitsPerIndex); }
public void TestSize() { var estimator = new CardinalityEstimator(); Assert.AreEqual(0UL, estimator.CountElementsAdded); estimator.Add(0); estimator.Add(0); Assert.AreEqual(2UL, estimator.CountElementsAdded); var estimator2 = new CardinalityEstimator(); estimator2.Add(0); estimator.Merge(estimator2); Assert.AreEqual(3UL, estimator.CountElementsAdded); }
public void DeserializedEstimatorUsesSameHashAsOriginal() { // Prepare some elements IList <int> elements = new List <int>(); for (int i = 0; i < 150; i++) { elements.Add(Rand.Next()); } foreach (HashFunctionId hashFunctionId in Enum.GetValues(typeof(HashFunctionId))) { // Add elements to an estimator using the given hashFunctionId CardinalityEstimator original = new CardinalityEstimator(hashFunctionId: hashFunctionId); foreach (int element in elements) { original.Add(element); } // Serialize var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, original, false); results = memoryStream.ToArray(); } // Deserialize CardinalityEstimator deserialized; using (var memoryStream = new MemoryStream(results)) { deserialized = serializer.Deserialize(memoryStream, false); } // Add the elements again, should have no effect on state foreach (int element in elements) { deserialized.Add(element); } Assert.AreEqual(original.Count(), deserialized.Count()); } }
private void TestDeserializer(int cardinality) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality); CardinalityEstimator hll2; var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll); results = memoryStream.ToArray(); } using (var memoryStream = new MemoryStream(results)) { hll2 = serializer.Deserialize(memoryStream); } CardinalityEstimatorState data = hll.GetState(); CardinalityEstimatorState data2 = hll2.GetState(); Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex); Assert.AreEqual(data.IsSparse, data2.IsSparse); Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null)); Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) || (data.LookupSparse == null && data2.LookupSparse == null)); Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null)); if (data.DirectCount != null) { // DirectCount are subsets of each-other => they are the same set Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount)); } if (data.LookupSparse != null) { Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse)); } if (data.LookupDense != null) { Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense)); } }
public void TestCountAdditions() { var estimator = new CardinalityEstimator(); Assert.Equal(0UL, estimator.CountAdditions); estimator.Add(0); estimator.Add(0); Assert.Equal(2UL, estimator.CountAdditions); var estimator2 = new CardinalityEstimator(); estimator2.Add(0); estimator.Merge(estimator2); Assert.Equal(3UL, estimator.CountAdditions); }
private void TestSerializerCardinality100000Parameterized(bool useBinWriter) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(100000); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { if (useBinWriter) { using (var bw = new BinaryWriter(memoryStream)) { serializer.Write(bw, hll); } } else { serializer.Serialize(memoryStream, hll, false); } results = memoryStream.ToArray(); } CardinalityEstimatorState data = hll.GetState(); // Expected length is: // 4 bytes for the major and minor versions // 1 byte for the HashFunctionId // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in lookupDense // 1 bytes for each element (ulong) in lookupDense // 8 bytes for CountAdded Assert.Equal(22 + data.LookupDense.Length, results.Length); Assert.Equal((byte)HashFunctionId.Murmur3, results.Skip(4).Take(1).First()); Assert.Equal(14, BitConverter.ToInt32(results.Skip(5).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.Equal(0, results[9]); // IsSparse = false AND IsDirectCount = false Assert.Equal(data.LookupDense.Length, BitConverter.ToInt32(results.Skip(10).Take(4).ToArray(), 0)); Assert.Equal(100000UL, BitConverter.ToUInt64(results.Skip(14 + data.LookupDense.Length).Take(8).ToArray(), 0)); // CountAdditions = 100000 }
/// <summary> /// Serialize the given <paramref name="cardinalityEstimator" /> to <paramref name="stream" /> /// </summary> public void Serialize(Stream stream, CardinalityEstimator cardinalityEstimator) { using (var bw = new BinaryWriter(stream)) { bw.Write(DataFormatMajorVersion); bw.Write(DataFormatMinorVersion); CardinalityEstimatorState data = cardinalityEstimator.GetState(); bw.Write((byte)data.HashFunctionId); bw.Write(data.BitsPerIndex); bw.Write((byte)(((data.IsSparse ? 1 : 0) << 1) + (data.DirectCount != null ? 1 : 0))); if (data.DirectCount != null) { bw.Write(data.DirectCount.Count); foreach (ulong element in data.DirectCount) { bw.Write(element); } } else if (data.IsSparse) { bw.Write(data.LookupSparse.Count); foreach (KeyValuePair<ushort, byte> element in data.LookupSparse) { bw.Write(element.Key); bw.Write(element.Value); } } else { bw.Write(data.LookupDense.Length); foreach (byte element in data.LookupDense) { bw.Write(element); } } bw.Write(data.CountAdditions); bw.Flush(); } }
private void RunRecreationFromData(int cardinality = 1000000) { var hll = new CardinalityEstimator(); var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < cardinality; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); } CardinalityEstimatorState data = hll.GetState(); var hll2 = new CardinalityEstimator(data); CardinalityEstimatorState data2 = hll2.GetState(); Assert.Equal(data.BitsPerIndex, data2.BitsPerIndex); Assert.Equal(data.IsSparse, data2.IsSparse); Assert.True((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null)); Assert.True((data.LookupSparse != null && data2.LookupSparse != null) || (data.LookupSparse == null && data2.LookupSparse == null)); Assert.True((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null)); if (data.DirectCount != null) { // DirectCount are subsets of each-other => they are the same set Assert.True(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount)); } if (data.LookupSparse != null) { Assert.True(data.LookupSparse.DictionaryEqual(data2.LookupSparse)); } if (data.LookupDense != null) { Assert.True(data.LookupDense.SequenceEqual(data2.LookupDense)); } }
public void EstimatorWorksAfterDeserialization() { ICardinalityEstimator <int> original = new CardinalityEstimator(); original.Add(5); original.Add(7); Assert.Equal(2UL, original.Count()); var binaryFormatter = new BinaryFormatter(); using (var memoryStream = new MemoryStream()) { binaryFormatter.Serialize(memoryStream, original); memoryStream.Seek(0, SeekOrigin.Begin); CardinalityEstimator copy = (CardinalityEstimator)binaryFormatter.Deserialize(memoryStream); Assert.Equal(2UL, copy.Count()); copy.Add(5); copy.Add(7); Assert.Equal(2UL, copy.Count()); } }
private void TestDeserializer(int cardinality) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality); CardinalityEstimator hll2; var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll, false); results = memoryStream.ToArray(); } using (var memoryStream = new MemoryStream(results)) { hll2 = serializer.Deserialize(memoryStream, false); } CompareHLL(hll, hll2); }
/// <summary> /// Deserialize a <see cref="CardinalityEstimator" /> from the given <paramref name="stream" /> /// </summary> public CardinalityEstimator Deserialize(Stream stream) { using (var br = new BinaryReader(stream)) { int dataFormatMajorVersion = br.ReadUInt16(); int dataFormatMinorVersion = br.ReadUInt16(); AssertDataVersionCanBeRead(dataFormatMajorVersion, dataFormatMinorVersion); HashFunctionId hashFunctionId; if (dataFormatMajorVersion >= 2) { // Starting with version 2.0, the serializer writes the hash function ID hashFunctionId = (HashFunctionId)br.ReadByte(); } else { // Versions before 2.0 all used FNV-1a hashFunctionId = HashFunctionId.Fnv1A; } int bitsPerIndex = br.ReadInt32(); byte flags = br.ReadByte(); bool isSparse = ((flags & 2) == 2); bool isDirectCount = ((flags & 1) == 1); HashSet<ulong> directCount = null; IDictionary<ushort, byte> lookupSparse = isSparse ? new Dictionary<ushort, byte>() : null; byte[] lookupDense = null; if (isDirectCount) { int count = br.ReadInt32(); directCount = new HashSet<ulong>(); for (var i = 0; i < count; i++) { ulong element = br.ReadUInt64(); directCount.Add(element); } } else if (isSparse) { int count = br.ReadInt32(); for (var i = 0; i < count; i++) { ushort elementKey = br.ReadUInt16(); byte elementValue = br.ReadByte(); lookupSparse.Add(elementKey, elementValue); } } else { int count = br.ReadInt32(); lookupDense = br.ReadBytes(count); } // Starting with version 2.1, the serializer writes CountAdditions ulong countAdditions = 0UL; if (dataFormatMajorVersion >= 2 && dataFormatMinorVersion >= 1) { countAdditions = br.ReadUInt64(); } var data = new CardinalityEstimatorState { HashFunctionId = hashFunctionId, BitsPerIndex = bitsPerIndex, DirectCount = directCount, IsSparse = isSparse, LookupDense = lookupDense, LookupSparse = lookupSparse, CountAdditions = countAdditions, }; var result = new CardinalityEstimator(data); return result; } }
private CardinalityEstimator CreateAndFillCardinalityEstimator(int cardinality = 1000000) { var hll = new CardinalityEstimator(); var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < cardinality; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); } return hll; }
private void RunTest(double stdError, long expectedCount, double? maxAcceptedError = null, int numHllInstances = 1) { maxAcceptedError = maxAcceptedError ?? 5*stdError; // should fail appx once in 1.7 million runs int b = GetAccuracyInBits(stdError); var runStopwatch = new Stopwatch(); long gcMemoryAtStart = GetGcMemory(); // init HLLs var hlls = new CardinalityEstimator[numHllInstances]; for (var i = 0; i < numHllInstances; i++) { hlls[i] = new CardinalityEstimator(b); } var nextMember = new byte[ElementSizeInBytes]; runStopwatch.Start(); for (long i = 0; i < expectedCount; i++) { // pick random hll, add member int chosenHll = Rand.Next(numHllInstances); Rand.NextBytes(nextMember); hlls[chosenHll].Add(nextMember); } runStopwatch.Stop(); ReportMemoryCost(gcMemoryAtStart); // done here so references can't be GC'ed yet // Merge CardinalityEstimator mergedHll = CardinalityEstimator.Merge(hlls); Console.WriteLine("Run time: {0}", runStopwatch.Elapsed); Console.WriteLine("Expected {0}, got {1}", expectedCount, mergedHll.Count()); double obsError = Math.Abs(mergedHll.Count()/(double) (expectedCount) - 1.0); Console.WriteLine("StdErr: {0}. Observed error: {1}", stdError, obsError); Assert.IsTrue(obsError <= maxAcceptedError, string.Format("Observed error was over {0}", maxAcceptedError)); Console.WriteLine(); }
public void StaticMergeHandlesNullParameter() { CardinalityEstimator result = CardinalityEstimator.Merge(null as IEnumerable <CardinalityEstimator>); Assert.Null(result); }
public HyperLogLog(Stream stream) { _hyperLogLog = Serializer.Deserialize(stream); }
private void RunRecreationFromData(int cardinality = 1000000) { var hll = new CardinalityEstimator(); var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < cardinality; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); } CardinalityEstimatorState data = hll.GetState(); var hll2 = new CardinalityEstimator(data); CardinalityEstimatorState data2 = hll2.GetState(); Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex); Assert.AreEqual(data.IsSparse, data2.IsSparse); Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null)); Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) || (data.LookupSparse == null && data2.LookupSparse == null)); Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null)); if (data.DirectCount != null) { // DirectCount are subsets of each-other => they are the same set Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount)); } if (data.LookupSparse != null) { Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse)); } if (data.LookupDense != null) { Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense)); } }
[Ignore] // Test runtime is long public void ReportAccuracy() { var hll = new CardinalityEstimator(); double maxError = 0; var worstMember = 0; var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < 10000000; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); if (i%1007 == 0) // just some interval to sample error at, can be any number { double error = (hll.Count() - (double) (i + 1))/((double) i + 1); if (error > maxError) { maxError = error; worstMember = i + 1; } } } Console.WriteLine("Worst: {0}", worstMember); Console.WriteLine("Max error: {0}", maxError); Assert.IsTrue(true); }
public HyperLogLog() { _hyperLogLog = new CardinalityEstimator(); }
/// <summary> /// Deserialize a <see cref="CardinalityEstimator" /> from the given <paramref name="stream" /> /// </summary> public CardinalityEstimator Deserialize(Stream stream) { using (var br = new BinaryReader(stream)) { int dataFormatMajorVersion = br.ReadUInt16(); int dataFormatMinorVersion = br.ReadUInt16(); AssertDataVersionCanBeRead(dataFormatMajorVersion, dataFormatMinorVersion); int bitsPerIndex = br.ReadInt32(); byte flags = br.ReadByte(); bool isSparse = ((flags & 2) == 2); bool isDirectCount = ((flags & 1) == 1); HashSet<ulong> directCount = null; IDictionary<ushort, byte> lookupSparse = isSparse ? new Dictionary<ushort, byte>() : null; byte[] lookupDense = null; if (isDirectCount) { int count = br.ReadInt32(); directCount = new HashSet<ulong>(); for (var i = 0; i < count; i++) { ulong element = br.ReadUInt64(); directCount.Add(element); } } else if (isSparse) { int count = br.ReadInt32(); for (var i = 0; i < count; i++) { ushort elementKey = br.ReadUInt16(); byte elementValue = br.ReadByte(); lookupSparse.Add(elementKey, elementValue); } } else { int count = br.ReadInt32(); lookupDense = br.ReadBytes(count); } var data = new CardinalityEstimatorState { BitsPerIndex = bitsPerIndex, DirectCount = directCount, IsSparse = isSparse, LookupDense = lookupDense, LookupSparse = lookupSparse }; var result = new CardinalityEstimator(data); return result; } }