public void DirectCountingIsResetWhenMergingAlmostFullEstimators()
        {
            var addedEstimator  = new CardinalityEstimator();
            var mergedEstimator = new CardinalityEstimator();

            for (int i = 0; i < 10_000; i++)
            {
                var guid = Guid.NewGuid().ToString();

                addedEstimator.Add(guid);

                // Simulate some intermediate estimators being merged together
                var temporaryEstimator = new CardinalityEstimator();
                temporaryEstimator.Add(guid);
                mergedEstimator.Merge(temporaryEstimator);
            }

            var serializer = new CardinalityEstimatorSerializer();

            var stream1 = new MemoryStream();

            serializer.Serialize(stream1, addedEstimator, true);

            var stream2 = new MemoryStream();

            serializer.Serialize(stream2, mergedEstimator, true);

            Assert.Equal(stream1.Length, stream2.Length);
        }
Exemple #2
0
        public void TestSerializerCardinality10()
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(10);

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                serializer.Serialize(memoryStream, hll);

                results = memoryStream.ToArray();
            }


            // Expected length is 93:
            // 4 bytes for the major and minor versions
            // 4 bytes for the Bits in Index
            // 1 byte for the IsSparse and IsDirectCount flags
            // 4 bytes for the number of elements in DirectCount
            // 8 bytes for each element (ulong) in DirectCount
            Assert.AreEqual(93, results.Length);


            Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(4).Take(4).ToArray(), 0)); // Bits in Index = 14
            Assert.AreEqual(3, results[8]);                                                  // IsSparse = true AND IsDirectCount = true
            Assert.AreEqual(10, BitConverter.ToInt32(results.Skip(9).Take(4).ToArray(), 0)); // Count = 10
        }
Exemple #3
0
        private void TestDeserializer2(int cardinality)
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality);
            CardinalityEstimator hll2;

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                using (var bw = new BinaryWriter(memoryStream))
                {
                    serializer.Write(bw, hll);
                }

                results = memoryStream.ToArray();
            }

            using (var memoryStream = new MemoryStream(results))
            {
                using (var br = new BinaryReader(memoryStream))
                {
                    hll2 = serializer.Read(br);
                }
            }

            CompareHLL(hll, hll2);
        }
Exemple #4
0
        private void TestSerializerCreatesSmallerData(int cardinality, out int customSize, out int defaultSize)
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality);

            var customSerializer = new CardinalityEstimatorSerializer();

            byte[] customSerializerResults;
            using (var memoryStream = new MemoryStream())
            {
                customSerializer.Serialize(memoryStream, hll, false);
                customSerializerResults = memoryStream.ToArray();
                customSize = customSerializerResults.Length;
            }

            var binaryFormatter = new BinaryFormatter();

            byte[] defaultSerializerResults;
            using (var memoryStream = new MemoryStream())
            {
                binaryFormatter.Serialize(memoryStream, hll);
                defaultSerializerResults = memoryStream.ToArray();
                defaultSize = defaultSerializerResults.Length;
            }

            Assert.IsTrue(customSerializerResults.Length <= defaultSerializerResults.Length);
        }
        public void TestSerializerCardinality10()
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(10);

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                serializer.Serialize(memoryStream, hll);

                results = memoryStream.ToArray();
            }


            // Expected length is 93:
            // 4 bytes for the major and minor versions
            // 4 bytes for the Bits in Index
            // 1 byte for the IsSparse and IsDirectCount flags
            // 4 bytes for the number of elements in DirectCount
            // 8 bytes for each element (ulong) in DirectCount
            Assert.AreEqual(93, results.Length);


            Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(4).Take(4).ToArray(), 0)); // Bits in Index = 14
            Assert.AreEqual(3, results[8]); // IsSparse = true AND IsDirectCount = true
            Assert.AreEqual(10, BitConverter.ToInt32(results.Skip(9).Take(4).ToArray(), 0)); // Count = 10
        }
Exemple #6
0
        public void TestSerializerCardinality1000()
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000);

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                serializer.Serialize(memoryStream, hll);

                results = memoryStream.ToArray();
            }

            CardinalityEstimatorState data = hll.GetState();

            // Expected length is 2908:
            // 4 bytes for the major and minor versions
            // 4 bytes for the Bits in Index
            // 1 byte for the IsSparse and IsDirectCount flags
            // 4 bytes for the number of elements in lookupSparse
            // 2+1 bytes for each element (ulong) in lookupSparse
            Assert.AreEqual(13 + 3 * data.LookupSparse.Count, results.Length);


            Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(4).Take(4).ToArray(), 0)); // Bits in Index = 14
            Assert.AreEqual(2, results[8]);                                                  // IsSparse = true AND IsDirectCount = false
            Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(9).Take(4).ToArray(), 0));
        }
Exemple #7
0
        public void TestSerializerCardinality1000()
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000);

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                serializer.Serialize(memoryStream, hll, false);

                results = memoryStream.ToArray();
            }

            CardinalityEstimatorState data = hll.GetState();

            // Expected length is:
            // 4 bytes for the major and minor versions
            // 1 byte for the HashFunctionId
            // 4 bytes for the Bits in Index
            // 1 byte for the IsSparse and IsDirectCount flags
            // 4 bytes for the number of elements in lookupSparse
            // 2+1 bytes for each element (ulong) in lookupSparse
            // 8 bytes for CountAdded
            Assert.AreEqual(22 + 3 * data.LookupSparse.Count, results.Length);

            Assert.AreEqual((byte)HashFunctionId.Murmur3, results.Skip(4).Take(1).First());
            Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(5).Take(4).ToArray(), 0));                                     // Bits in Index = 14
            Assert.AreEqual(2, results[9]);                                                                                      // IsSparse = true AND IsDirectCount = false
            Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(10).Take(4).ToArray(), 0));
            Assert.AreEqual(1000UL, BitConverter.ToUInt64(results.Skip(14 + 3 * data.LookupSparse.Count).Take(8).ToArray(), 0)); // CountAdditions = 1000
        }
Exemple #8
0
        public void SerializerCanDeserializeVersion2Point0()
        {
            var serializer = new CardinalityEstimatorSerializer();

            CardinalityEstimator hllDirect = serializer.Deserialize(new MemoryStream(Resources.serializedDirect_v2_0));
            CardinalityEstimator hllSparse = serializer.Deserialize(new MemoryStream(Resources.serializedSparse_v2_0));
            CardinalityEstimator hllDense  = serializer.Deserialize(new MemoryStream(Resources.serializedDense_v2_0));

            Assert.AreEqual(50UL, hllDirect.Count());
            Assert.AreEqual(151UL, hllSparse.Count());
            Assert.AreEqual(5009UL, hllDense.Count());
        }
Exemple #9
0
        public void DeserializedEstimatorUsesSameHashAsOriginal()
        {
            // Prepare some elements
            IList <int> elements = new List <int>();

            for (int i = 0; i < 150; i++)
            {
                elements.Add(Rand.Next());
            }

            foreach (HashFunctionId hashFunctionId in Enum.GetValues(typeof(HashFunctionId)))
            {
                // Add elements to an estimator using the given hashFunctionId
                CardinalityEstimator original = new CardinalityEstimator(hashFunctionId: hashFunctionId);
                foreach (int element in elements)
                {
                    original.Add(element);
                }

                // Serialize
                var    serializer = new CardinalityEstimatorSerializer();
                byte[] results;

                using (var memoryStream = new MemoryStream())
                {
                    serializer.Serialize(memoryStream, original, false);
                    results = memoryStream.ToArray();
                }

                // Deserialize
                CardinalityEstimator deserialized;
                using (var memoryStream = new MemoryStream(results))
                {
                    deserialized = serializer.Deserialize(memoryStream, false);
                }

                // Add the elements again, should have no effect on state
                foreach (int element in elements)
                {
                    deserialized.Add(element);
                }

                Assert.AreEqual(original.Count(), deserialized.Count());
            }
        }
        private void TestDeserializer(int cardinality)
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality);
            CardinalityEstimator hll2;

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                serializer.Serialize(memoryStream, hll);

                results = memoryStream.ToArray();
            }

            using (var memoryStream = new MemoryStream(results))
            {
                hll2 = serializer.Deserialize(memoryStream);
            }

            CardinalityEstimatorState data  = hll.GetState();
            CardinalityEstimatorState data2 = hll2.GetState();

            Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex);
            Assert.AreEqual(data.IsSparse, data2.IsSparse);

            Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null));
            Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) ||
                          (data.LookupSparse == null && data2.LookupSparse == null));
            Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null));

            if (data.DirectCount != null)
            {
                // DirectCount are subsets of each-other => they are the same set
                Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount));
            }
            if (data.LookupSparse != null)
            {
                Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse));
            }
            if (data.LookupDense != null)
            {
                Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense));
            }
        }
Exemple #11
0
 public void TestSerializerMultipleCardinalityAndBitsCombinations()
 {
     for (int bits = 4; bits <= 16; bits++)
     {
         for (int cardinality = 1; cardinality <= 1000; cardinality++)
         {
             var estimator = CreateAndFillCardinalityEstimator(cardinality, bits);
             CardinalityEstimatorSerializer serializer = new CardinalityEstimatorSerializer();
             using (var stream = new MemoryStream())
             {
                 serializer.Serialize(stream, estimator, true);
                 stream.Seek(0, SeekOrigin.Begin);
                 var deserializedEstimator = serializer.Deserialize(stream);
                 Assert.AreEqual(estimator.Count(), deserializedEstimator.Count(), "Estimators should have same count before and after serialization");
             }
         }
     }
 }
Exemple #12
0
        private void TestSerializerCardinality100000Parameterized(bool useBinWriter)
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(100000);

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                if (useBinWriter)
                {
                    using (var bw = new BinaryWriter(memoryStream))
                    {
                        serializer.Write(bw, hll);
                    }
                }
                else
                {
                    serializer.Serialize(memoryStream, hll, false);
                }

                results = memoryStream.ToArray();
            }

            CardinalityEstimatorState data = hll.GetState();

            // Expected length is:
            // 4 bytes for the major and minor versions
            // 1 byte for the HashFunctionId
            // 4 bytes for the Bits in Index
            // 1 byte for the IsSparse and IsDirectCount flags
            // 4 bytes for the number of elements in lookupDense
            // 1 bytes for each element (ulong) in lookupDense
            // 8 bytes for CountAdded
            Assert.Equal(22 + data.LookupDense.Length, results.Length);

            Assert.Equal((byte)HashFunctionId.Murmur3, results.Skip(4).Take(1).First());
            Assert.Equal(14, BitConverter.ToInt32(results.Skip(5).Take(4).ToArray(), 0));                                   // Bits in Index = 14
            Assert.Equal(0, results[9]);                                                                                    // IsSparse = false AND IsDirectCount = false
            Assert.Equal(data.LookupDense.Length, BitConverter.ToInt32(results.Skip(10).Take(4).ToArray(), 0));
            Assert.Equal(100000UL, BitConverter.ToUInt64(results.Skip(14 + data.LookupDense.Length).Take(8).ToArray(), 0)); // CountAdditions = 100000
        }
Exemple #13
0
        private void TestDeserializer(int cardinality)
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality);
            CardinalityEstimator hll2;

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                serializer.Serialize(memoryStream, hll, false);

                results = memoryStream.ToArray();
            }

            using (var memoryStream = new MemoryStream(results))
            {
                hll2 = serializer.Deserialize(memoryStream, false);
            }

            CompareHLL(hll, hll2);
        }
        private void TestSerializerCreatesSmallerData(int cardinality, out int customSize, out int defaultSize)
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality);

            var customSerializer = new CardinalityEstimatorSerializer();

            byte[] customSerializerResults;
            using (var memoryStream = new MemoryStream())
            {
                customSerializer.Serialize(memoryStream, hll);
                customSerializerResults = memoryStream.ToArray();
                customSize = customSerializerResults.Length;
            }

            var binaryFormatter = new BinaryFormatter();

            byte[] defaultSerializerResults;
            using (var memoryStream = new MemoryStream())
            {
                binaryFormatter.Serialize(memoryStream, hll);
                defaultSerializerResults = memoryStream.ToArray();
                defaultSize = defaultSerializerResults.Length;
            }

            Assert.IsTrue(customSerializerResults.Length <= defaultSerializerResults.Length);
        }
Exemple #15
0
        public static byte[] SerializeCardinality(this CardinalityEstimator estimator, CardinalityEstimatorSerializer serializer)
        {
            using (var stream = new MemoryStream())
            {
                serializer.Serialize(stream, estimator);

                return(stream.ToArray());
            }
        }
 static HyperLogLog()
 {
     Serializer = new CardinalityEstimatorSerializer();
 }
        public void TestSerializerCardinality1000()
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000);

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                serializer.Serialize(memoryStream, hll);

                results = memoryStream.ToArray();
            }

            CardinalityEstimatorState data = hll.GetState();

            // Expected length is 2908:
            // 4 bytes for the major and minor versions
            // 4 bytes for the Bits in Index 
            // 1 byte for the IsSparse and IsDirectCount flags
            // 4 bytes for the number of elements in lookupSparse
            // 2+1 bytes for each element (ulong) in lookupSparse
            Assert.AreEqual(13 + 3*data.LookupSparse.Count, results.Length);


            Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(4).Take(4).ToArray(), 0)); // Bits in Index = 14
            Assert.AreEqual(2, results[8]); // IsSparse = true AND IsDirectCount = false
            Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(9).Take(4).ToArray(), 0));
        }
        private void TestDeserializer(int cardinality)
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality);
            CardinalityEstimator hll2;

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                serializer.Serialize(memoryStream, hll);

                results = memoryStream.ToArray();
            }

            using (var memoryStream = new MemoryStream(results))
            {
                hll2 = serializer.Deserialize(memoryStream);
            }

            CardinalityEstimatorState data = hll.GetState();
            CardinalityEstimatorState data2 = hll2.GetState();

            Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex);
            Assert.AreEqual(data.IsSparse, data2.IsSparse);

            Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null));
            Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) ||
                          (data.LookupSparse == null && data2.LookupSparse == null));
            Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null));

            if (data.DirectCount != null)
            {
                // DirectCount are subsets of each-other => they are the same set
                Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount));
            }
            if (data.LookupSparse != null)
            {
                Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse));
            }
            if (data.LookupDense != null)
            {
                Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense));
            }
        }
Exemple #19
0
        public static CardinalityEstimator DeserializeCardinality(this byte[] estimator, CardinalityEstimatorSerializer serializer)
        {
            var stream = new MemoryStream(estimator);

            return(serializer.Deserialize(stream));
        }
        public void SerializerCanDeserializeVersion1Point0()
        {
            var serializer = new CardinalityEstimatorSerializer();

            CardinalityEstimator hllDirect = serializer.Deserialize(new MemoryStream(Resources.serializedDirect_v1_0));
            CardinalityEstimator hllSparse = serializer.Deserialize(new MemoryStream(Resources.serializedSparse_v1_0));
            CardinalityEstimator hllDense = serializer.Deserialize(new MemoryStream(Resources.serializedDense_v1_0));

            Assert.AreEqual(50UL, hllDirect.Count());
            Assert.AreEqual(151UL, hllSparse.Count());
            Assert.AreEqual(5005UL, hllDense.Count());
        }