public void DirectCountingIsResetWhenMergingAlmostFullEstimators() { var addedEstimator = new CardinalityEstimator(); var mergedEstimator = new CardinalityEstimator(); for (int i = 0; i < 10_000; i++) { var guid = Guid.NewGuid().ToString(); addedEstimator.Add(guid); // Simulate some intermediate estimators being merged together var temporaryEstimator = new CardinalityEstimator(); temporaryEstimator.Add(guid); mergedEstimator.Merge(temporaryEstimator); } var serializer = new CardinalityEstimatorSerializer(); var stream1 = new MemoryStream(); serializer.Serialize(stream1, addedEstimator, true); var stream2 = new MemoryStream(); serializer.Serialize(stream2, mergedEstimator, true); Assert.Equal(stream1.Length, stream2.Length); }
public void TestSerializerCardinality10() { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(10); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll); results = memoryStream.ToArray(); } // Expected length is 93: // 4 bytes for the major and minor versions // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in DirectCount // 8 bytes for each element (ulong) in DirectCount Assert.AreEqual(93, results.Length); Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(4).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.AreEqual(3, results[8]); // IsSparse = true AND IsDirectCount = true Assert.AreEqual(10, BitConverter.ToInt32(results.Skip(9).Take(4).ToArray(), 0)); // Count = 10 }
private void TestDeserializer2(int cardinality) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality); CardinalityEstimator hll2; var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { using (var bw = new BinaryWriter(memoryStream)) { serializer.Write(bw, hll); } results = memoryStream.ToArray(); } using (var memoryStream = new MemoryStream(results)) { using (var br = new BinaryReader(memoryStream)) { hll2 = serializer.Read(br); } } CompareHLL(hll, hll2); }
private void TestSerializerCreatesSmallerData(int cardinality, out int customSize, out int defaultSize) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality); var customSerializer = new CardinalityEstimatorSerializer(); byte[] customSerializerResults; using (var memoryStream = new MemoryStream()) { customSerializer.Serialize(memoryStream, hll, false); customSerializerResults = memoryStream.ToArray(); customSize = customSerializerResults.Length; } var binaryFormatter = new BinaryFormatter(); byte[] defaultSerializerResults; using (var memoryStream = new MemoryStream()) { binaryFormatter.Serialize(memoryStream, hll); defaultSerializerResults = memoryStream.ToArray(); defaultSize = defaultSerializerResults.Length; } Assert.IsTrue(customSerializerResults.Length <= defaultSerializerResults.Length); }
public void TestSerializerCardinality1000() { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll); results = memoryStream.ToArray(); } CardinalityEstimatorState data = hll.GetState(); // Expected length is 2908: // 4 bytes for the major and minor versions // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in lookupSparse // 2+1 bytes for each element (ulong) in lookupSparse Assert.AreEqual(13 + 3 * data.LookupSparse.Count, results.Length); Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(4).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.AreEqual(2, results[8]); // IsSparse = true AND IsDirectCount = false Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(9).Take(4).ToArray(), 0)); }
public void TestSerializerCardinality1000() { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll, false); results = memoryStream.ToArray(); } CardinalityEstimatorState data = hll.GetState(); // Expected length is: // 4 bytes for the major and minor versions // 1 byte for the HashFunctionId // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in lookupSparse // 2+1 bytes for each element (ulong) in lookupSparse // 8 bytes for CountAdded Assert.AreEqual(22 + 3 * data.LookupSparse.Count, results.Length); Assert.AreEqual((byte)HashFunctionId.Murmur3, results.Skip(4).Take(1).First()); Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(5).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.AreEqual(2, results[9]); // IsSparse = true AND IsDirectCount = false Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(10).Take(4).ToArray(), 0)); Assert.AreEqual(1000UL, BitConverter.ToUInt64(results.Skip(14 + 3 * data.LookupSparse.Count).Take(8).ToArray(), 0)); // CountAdditions = 1000 }
public void SerializerCanDeserializeVersion2Point0() { var serializer = new CardinalityEstimatorSerializer(); CardinalityEstimator hllDirect = serializer.Deserialize(new MemoryStream(Resources.serializedDirect_v2_0)); CardinalityEstimator hllSparse = serializer.Deserialize(new MemoryStream(Resources.serializedSparse_v2_0)); CardinalityEstimator hllDense = serializer.Deserialize(new MemoryStream(Resources.serializedDense_v2_0)); Assert.AreEqual(50UL, hllDirect.Count()); Assert.AreEqual(151UL, hllSparse.Count()); Assert.AreEqual(5009UL, hllDense.Count()); }
public void DeserializedEstimatorUsesSameHashAsOriginal() { // Prepare some elements IList <int> elements = new List <int>(); for (int i = 0; i < 150; i++) { elements.Add(Rand.Next()); } foreach (HashFunctionId hashFunctionId in Enum.GetValues(typeof(HashFunctionId))) { // Add elements to an estimator using the given hashFunctionId CardinalityEstimator original = new CardinalityEstimator(hashFunctionId: hashFunctionId); foreach (int element in elements) { original.Add(element); } // Serialize var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, original, false); results = memoryStream.ToArray(); } // Deserialize CardinalityEstimator deserialized; using (var memoryStream = new MemoryStream(results)) { deserialized = serializer.Deserialize(memoryStream, false); } // Add the elements again, should have no effect on state foreach (int element in elements) { deserialized.Add(element); } Assert.AreEqual(original.Count(), deserialized.Count()); } }
private void TestDeserializer(int cardinality) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality); CardinalityEstimator hll2; var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll); results = memoryStream.ToArray(); } using (var memoryStream = new MemoryStream(results)) { hll2 = serializer.Deserialize(memoryStream); } CardinalityEstimatorState data = hll.GetState(); CardinalityEstimatorState data2 = hll2.GetState(); Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex); Assert.AreEqual(data.IsSparse, data2.IsSparse); Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null)); Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) || (data.LookupSparse == null && data2.LookupSparse == null)); Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null)); if (data.DirectCount != null) { // DirectCount are subsets of each-other => they are the same set Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount)); } if (data.LookupSparse != null) { Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse)); } if (data.LookupDense != null) { Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense)); } }
public void TestSerializerMultipleCardinalityAndBitsCombinations() { for (int bits = 4; bits <= 16; bits++) { for (int cardinality = 1; cardinality <= 1000; cardinality++) { var estimator = CreateAndFillCardinalityEstimator(cardinality, bits); CardinalityEstimatorSerializer serializer = new CardinalityEstimatorSerializer(); using (var stream = new MemoryStream()) { serializer.Serialize(stream, estimator, true); stream.Seek(0, SeekOrigin.Begin); var deserializedEstimator = serializer.Deserialize(stream); Assert.AreEqual(estimator.Count(), deserializedEstimator.Count(), "Estimators should have same count before and after serialization"); } } } }
private void TestSerializerCardinality100000Parameterized(bool useBinWriter) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(100000); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { if (useBinWriter) { using (var bw = new BinaryWriter(memoryStream)) { serializer.Write(bw, hll); } } else { serializer.Serialize(memoryStream, hll, false); } results = memoryStream.ToArray(); } CardinalityEstimatorState data = hll.GetState(); // Expected length is: // 4 bytes for the major and minor versions // 1 byte for the HashFunctionId // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in lookupDense // 1 bytes for each element (ulong) in lookupDense // 8 bytes for CountAdded Assert.Equal(22 + data.LookupDense.Length, results.Length); Assert.Equal((byte)HashFunctionId.Murmur3, results.Skip(4).Take(1).First()); Assert.Equal(14, BitConverter.ToInt32(results.Skip(5).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.Equal(0, results[9]); // IsSparse = false AND IsDirectCount = false Assert.Equal(data.LookupDense.Length, BitConverter.ToInt32(results.Skip(10).Take(4).ToArray(), 0)); Assert.Equal(100000UL, BitConverter.ToUInt64(results.Skip(14 + data.LookupDense.Length).Take(8).ToArray(), 0)); // CountAdditions = 100000 }
private void TestDeserializer(int cardinality) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality); CardinalityEstimator hll2; var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll, false); results = memoryStream.ToArray(); } using (var memoryStream = new MemoryStream(results)) { hll2 = serializer.Deserialize(memoryStream, false); } CompareHLL(hll, hll2); }
private void TestSerializerCreatesSmallerData(int cardinality, out int customSize, out int defaultSize) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality); var customSerializer = new CardinalityEstimatorSerializer(); byte[] customSerializerResults; using (var memoryStream = new MemoryStream()) { customSerializer.Serialize(memoryStream, hll); customSerializerResults = memoryStream.ToArray(); customSize = customSerializerResults.Length; } var binaryFormatter = new BinaryFormatter(); byte[] defaultSerializerResults; using (var memoryStream = new MemoryStream()) { binaryFormatter.Serialize(memoryStream, hll); defaultSerializerResults = memoryStream.ToArray(); defaultSize = defaultSerializerResults.Length; } Assert.IsTrue(customSerializerResults.Length <= defaultSerializerResults.Length); }
public static byte[] SerializeCardinality(this CardinalityEstimator estimator, CardinalityEstimatorSerializer serializer) { using (var stream = new MemoryStream()) { serializer.Serialize(stream, estimator); return(stream.ToArray()); } }
static HyperLogLog() { Serializer = new CardinalityEstimatorSerializer(); }
public void TestSerializerCardinality1000() { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll); results = memoryStream.ToArray(); } CardinalityEstimatorState data = hll.GetState(); // Expected length is 2908: // 4 bytes for the major and minor versions // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in lookupSparse // 2+1 bytes for each element (ulong) in lookupSparse Assert.AreEqual(13 + 3*data.LookupSparse.Count, results.Length); Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(4).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.AreEqual(2, results[8]); // IsSparse = true AND IsDirectCount = false Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(9).Take(4).ToArray(), 0)); }
public static CardinalityEstimator DeserializeCardinality(this byte[] estimator, CardinalityEstimatorSerializer serializer) { var stream = new MemoryStream(estimator); return(serializer.Deserialize(stream)); }
public void SerializerCanDeserializeVersion1Point0() { var serializer = new CardinalityEstimatorSerializer(); CardinalityEstimator hllDirect = serializer.Deserialize(new MemoryStream(Resources.serializedDirect_v1_0)); CardinalityEstimator hllSparse = serializer.Deserialize(new MemoryStream(Resources.serializedSparse_v1_0)); CardinalityEstimator hllDense = serializer.Deserialize(new MemoryStream(Resources.serializedDense_v1_0)); Assert.AreEqual(50UL, hllDirect.Count()); Assert.AreEqual(151UL, hllSparse.Count()); Assert.AreEqual(5005UL, hllDense.Count()); }