private void CompareHLL(CardinalityEstimator hll1, CardinalityEstimator hll2) { CardinalityEstimatorState data = hll1.GetState(); CardinalityEstimatorState data2 = hll2.GetState(); Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex); Assert.AreEqual(data.IsSparse, data2.IsSparse); Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null)); Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) || (data.LookupSparse == null && data2.LookupSparse == null)); Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null)); if (data.DirectCount != null) { // DirectCount are subsets of each-other => they are the same set Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount)); } if (data.LookupSparse != null) { Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse)); } if (data.LookupDense != null) { Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense)); } }
public void TestSerializerCardinality1000() { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll, false); results = memoryStream.ToArray(); } CardinalityEstimatorState data = hll.GetState(); // Expected length is: // 4 bytes for the major and minor versions // 1 byte for the HashFunctionId // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in lookupSparse // 2+1 bytes for each element (ulong) in lookupSparse // 8 bytes for CountAdded Assert.AreEqual(22 + 3 * data.LookupSparse.Count, results.Length); Assert.AreEqual((byte)HashFunctionId.Murmur3, results.Skip(4).Take(1).First()); Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(5).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.AreEqual(2, results[9]); // IsSparse = true AND IsDirectCount = false Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(10).Take(4).ToArray(), 0)); Assert.AreEqual(1000UL, BitConverter.ToUInt64(results.Skip(14 + 3 * data.LookupSparse.Count).Take(8).ToArray(), 0)); // CountAdditions = 1000 }
public void TestSerializerCardinality1000() { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll); results = memoryStream.ToArray(); } CardinalityEstimatorState data = hll.GetState(); // Expected length is 2908: // 4 bytes for the major and minor versions // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in lookupSparse // 2+1 bytes for each element (ulong) in lookupSparse Assert.AreEqual(13 + 3 * data.LookupSparse.Count, results.Length); Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(4).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.AreEqual(2, results[8]); // IsSparse = true AND IsDirectCount = false Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(9).Take(4).ToArray(), 0)); }
public void StaticMergeHandlesNullElements() { const int expectedBitsPerIndex = 11; var estimators = new List <CardinalityEstimator> { null, new CardinalityEstimator(expectedBitsPerIndex, HashFunctionId.Fnv1A), null }; CardinalityEstimator result = CardinalityEstimator.Merge(estimators); Assert.NotNull(result); Assert.Equal(expectedBitsPerIndex, result.GetState().BitsPerIndex); }
public void StaticMergeTest() { const int expectedBitsPerIndex = 11; var estimators = new CardinalityEstimator[10]; for (var i = 0; i < estimators.Length; i++) { estimators[i] = new CardinalityEstimator(expectedBitsPerIndex); estimators[i].Add(Rand.Next()); } CardinalityEstimator merged = CardinalityEstimator.Merge(estimators); Assert.Equal(10UL, merged.Count()); Assert.Equal(expectedBitsPerIndex, merged.GetState().BitsPerIndex); }
private void TestDeserializer(int cardinality) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality); CardinalityEstimator hll2; var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, hll); results = memoryStream.ToArray(); } using (var memoryStream = new MemoryStream(results)) { hll2 = serializer.Deserialize(memoryStream); } CardinalityEstimatorState data = hll.GetState(); CardinalityEstimatorState data2 = hll2.GetState(); Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex); Assert.AreEqual(data.IsSparse, data2.IsSparse); Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null)); Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) || (data.LookupSparse == null && data2.LookupSparse == null)); Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null)); if (data.DirectCount != null) { // DirectCount are subsets of each-other => they are the same set Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount)); } if (data.LookupSparse != null) { Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse)); } if (data.LookupDense != null) { Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense)); } }
private void TestSerializerCardinality100000Parameterized(bool useBinWriter) { CardinalityEstimator hll = CreateAndFillCardinalityEstimator(100000); var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { if (useBinWriter) { using (var bw = new BinaryWriter(memoryStream)) { serializer.Write(bw, hll); } } else { serializer.Serialize(memoryStream, hll, false); } results = memoryStream.ToArray(); } CardinalityEstimatorState data = hll.GetState(); // Expected length is: // 4 bytes for the major and minor versions // 1 byte for the HashFunctionId // 4 bytes for the Bits in Index // 1 byte for the IsSparse and IsDirectCount flags // 4 bytes for the number of elements in lookupDense // 1 bytes for each element (ulong) in lookupDense // 8 bytes for CountAdded Assert.Equal(22 + data.LookupDense.Length, results.Length); Assert.Equal((byte)HashFunctionId.Murmur3, results.Skip(4).Take(1).First()); Assert.Equal(14, BitConverter.ToInt32(results.Skip(5).Take(4).ToArray(), 0)); // Bits in Index = 14 Assert.Equal(0, results[9]); // IsSparse = false AND IsDirectCount = false Assert.Equal(data.LookupDense.Length, BitConverter.ToInt32(results.Skip(10).Take(4).ToArray(), 0)); Assert.Equal(100000UL, BitConverter.ToUInt64(results.Skip(14 + data.LookupDense.Length).Take(8).ToArray(), 0)); // CountAdditions = 100000 }
/// <summary> /// Serialize the given <paramref name="cardinalityEstimator" /> to <paramref name="stream" /> /// </summary> public void Serialize(Stream stream, CardinalityEstimator cardinalityEstimator) { using (var bw = new BinaryWriter(stream)) { bw.Write(DataFormatMajorVersion); bw.Write(DataFormatMinorVersion); CardinalityEstimatorState data = cardinalityEstimator.GetState(); bw.Write((byte)data.HashFunctionId); bw.Write(data.BitsPerIndex); bw.Write((byte)(((data.IsSparse ? 1 : 0) << 1) + (data.DirectCount != null ? 1 : 0))); if (data.DirectCount != null) { bw.Write(data.DirectCount.Count); foreach (ulong element in data.DirectCount) { bw.Write(element); } } else if (data.IsSparse) { bw.Write(data.LookupSparse.Count); foreach (KeyValuePair<ushort, byte> element in data.LookupSparse) { bw.Write(element.Key); bw.Write(element.Value); } } else { bw.Write(data.LookupDense.Length); foreach (byte element in data.LookupDense) { bw.Write(element); } } bw.Write(data.CountAdditions); bw.Flush(); } }
private void RunRecreationFromData(int cardinality = 1000000) { var hll = new CardinalityEstimator(); var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < cardinality; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); } CardinalityEstimatorState data = hll.GetState(); var hll2 = new CardinalityEstimator(data); CardinalityEstimatorState data2 = hll2.GetState(); Assert.Equal(data.BitsPerIndex, data2.BitsPerIndex); Assert.Equal(data.IsSparse, data2.IsSparse); Assert.True((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null)); Assert.True((data.LookupSparse != null && data2.LookupSparse != null) || (data.LookupSparse == null && data2.LookupSparse == null)); Assert.True((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null)); if (data.DirectCount != null) { // DirectCount are subsets of each-other => they are the same set Assert.True(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount)); } if (data.LookupSparse != null) { Assert.True(data.LookupSparse.DictionaryEqual(data2.LookupSparse)); } if (data.LookupDense != null) { Assert.True(data.LookupDense.SequenceEqual(data2.LookupDense)); } }
private void RunRecreationFromData(int cardinality = 1000000) { var hll = new CardinalityEstimator(); var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < cardinality; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); } CardinalityEstimatorState data = hll.GetState(); var hll2 = new CardinalityEstimator(data); CardinalityEstimatorState data2 = hll2.GetState(); Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex); Assert.AreEqual(data.IsSparse, data2.IsSparse); Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null)); Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) || (data.LookupSparse == null && data2.LookupSparse == null)); Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null)); if (data.DirectCount != null) { // DirectCount are subsets of each-other => they are the same set Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount)); } if (data.LookupSparse != null) { Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse)); } if (data.LookupDense != null) { Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense)); } }