Exemplo n.º 1
0
        private void CompareHLL(CardinalityEstimator hll1, CardinalityEstimator hll2)
        {
            CardinalityEstimatorState data  = hll1.GetState();
            CardinalityEstimatorState data2 = hll2.GetState();

            Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex);
            Assert.AreEqual(data.IsSparse, data2.IsSparse);

            Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null));
            Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) ||
                          (data.LookupSparse == null && data2.LookupSparse == null));
            Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null));

            if (data.DirectCount != null)
            {
                // DirectCount are subsets of each-other => they are the same set
                Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount));
            }
            if (data.LookupSparse != null)
            {
                Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse));
            }
            if (data.LookupDense != null)
            {
                Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense));
            }
        }
Exemplo n.º 2
0
        public void TestSerializerCardinality1000()
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000);

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                serializer.Serialize(memoryStream, hll, false);

                results = memoryStream.ToArray();
            }

            CardinalityEstimatorState data = hll.GetState();

            // Expected length is:
            // 4 bytes for the major and minor versions
            // 1 byte for the HashFunctionId
            // 4 bytes for the Bits in Index
            // 1 byte for the IsSparse and IsDirectCount flags
            // 4 bytes for the number of elements in lookupSparse
            // 2+1 bytes for each element (ulong) in lookupSparse
            // 8 bytes for CountAdded
            Assert.AreEqual(22 + 3 * data.LookupSparse.Count, results.Length);

            Assert.AreEqual((byte)HashFunctionId.Murmur3, results.Skip(4).Take(1).First());
            Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(5).Take(4).ToArray(), 0));                                     // Bits in Index = 14
            Assert.AreEqual(2, results[9]);                                                                                      // IsSparse = true AND IsDirectCount = false
            Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(10).Take(4).ToArray(), 0));
            Assert.AreEqual(1000UL, BitConverter.ToUInt64(results.Skip(14 + 3 * data.LookupSparse.Count).Take(8).ToArray(), 0)); // CountAdditions = 1000
        }
Exemplo n.º 3
0
        public void TestSerializerCardinality1000()
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000);

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                serializer.Serialize(memoryStream, hll);

                results = memoryStream.ToArray();
            }

            CardinalityEstimatorState data = hll.GetState();

            // Expected length is 2908:
            // 4 bytes for the major and minor versions
            // 4 bytes for the Bits in Index
            // 1 byte for the IsSparse and IsDirectCount flags
            // 4 bytes for the number of elements in lookupSparse
            // 2+1 bytes for each element (ulong) in lookupSparse
            Assert.AreEqual(13 + 3 * data.LookupSparse.Count, results.Length);


            Assert.AreEqual(14, BitConverter.ToInt32(results.Skip(4).Take(4).ToArray(), 0)); // Bits in Index = 14
            Assert.AreEqual(2, results[8]);                                                  // IsSparse = true AND IsDirectCount = false
            Assert.AreEqual(data.LookupSparse.Count, BitConverter.ToInt32(results.Skip(9).Take(4).ToArray(), 0));
        }
        public void StaticMergeHandlesNullElements()
        {
            const int expectedBitsPerIndex = 11;
            var       estimators           = new List <CardinalityEstimator> {
                null, new CardinalityEstimator(expectedBitsPerIndex, HashFunctionId.Fnv1A), null
            };
            CardinalityEstimator result = CardinalityEstimator.Merge(estimators);

            Assert.NotNull(result);
            Assert.Equal(expectedBitsPerIndex, result.GetState().BitsPerIndex);
        }
        public void StaticMergeTest()
        {
            const int expectedBitsPerIndex = 11;
            var       estimators           = new CardinalityEstimator[10];

            for (var i = 0; i < estimators.Length; i++)
            {
                estimators[i] = new CardinalityEstimator(expectedBitsPerIndex);
                estimators[i].Add(Rand.Next());
            }

            CardinalityEstimator merged = CardinalityEstimator.Merge(estimators);

            Assert.Equal(10UL, merged.Count());
            Assert.Equal(expectedBitsPerIndex, merged.GetState().BitsPerIndex);
        }
        private void TestDeserializer(int cardinality)
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality);
            CardinalityEstimator hll2;

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                serializer.Serialize(memoryStream, hll);

                results = memoryStream.ToArray();
            }

            using (var memoryStream = new MemoryStream(results))
            {
                hll2 = serializer.Deserialize(memoryStream);
            }

            CardinalityEstimatorState data  = hll.GetState();
            CardinalityEstimatorState data2 = hll2.GetState();

            Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex);
            Assert.AreEqual(data.IsSparse, data2.IsSparse);

            Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null));
            Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) ||
                          (data.LookupSparse == null && data2.LookupSparse == null));
            Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null));

            if (data.DirectCount != null)
            {
                // DirectCount are subsets of each-other => they are the same set
                Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount));
            }
            if (data.LookupSparse != null)
            {
                Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse));
            }
            if (data.LookupDense != null)
            {
                Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense));
            }
        }
Exemplo n.º 7
0
        private void TestSerializerCardinality100000Parameterized(bool useBinWriter)
        {
            CardinalityEstimator hll = CreateAndFillCardinalityEstimator(100000);

            var serializer = new CardinalityEstimatorSerializer();

            byte[] results;
            using (var memoryStream = new MemoryStream())
            {
                if (useBinWriter)
                {
                    using (var bw = new BinaryWriter(memoryStream))
                    {
                        serializer.Write(bw, hll);
                    }
                }
                else
                {
                    serializer.Serialize(memoryStream, hll, false);
                }

                results = memoryStream.ToArray();
            }

            CardinalityEstimatorState data = hll.GetState();

            // Expected length is:
            // 4 bytes for the major and minor versions
            // 1 byte for the HashFunctionId
            // 4 bytes for the Bits in Index
            // 1 byte for the IsSparse and IsDirectCount flags
            // 4 bytes for the number of elements in lookupDense
            // 1 bytes for each element (ulong) in lookupDense
            // 8 bytes for CountAdded
            Assert.Equal(22 + data.LookupDense.Length, results.Length);

            Assert.Equal((byte)HashFunctionId.Murmur3, results.Skip(4).Take(1).First());
            Assert.Equal(14, BitConverter.ToInt32(results.Skip(5).Take(4).ToArray(), 0));                                   // Bits in Index = 14
            Assert.Equal(0, results[9]);                                                                                    // IsSparse = false AND IsDirectCount = false
            Assert.Equal(data.LookupDense.Length, BitConverter.ToInt32(results.Skip(10).Take(4).ToArray(), 0));
            Assert.Equal(100000UL, BitConverter.ToUInt64(results.Skip(14 + data.LookupDense.Length).Take(8).ToArray(), 0)); // CountAdditions = 100000
        }
        /// <summary>
        ///     Serialize the given <paramref name="cardinalityEstimator" /> to <paramref name="stream" />
        /// </summary>
        public void Serialize(Stream stream, CardinalityEstimator cardinalityEstimator)
        {
            using (var bw = new BinaryWriter(stream))
            {
                bw.Write(DataFormatMajorVersion);
                bw.Write(DataFormatMinorVersion);

                CardinalityEstimatorState data = cardinalityEstimator.GetState();

                bw.Write((byte)data.HashFunctionId);
                bw.Write(data.BitsPerIndex);
                bw.Write((byte)(((data.IsSparse ? 1 : 0) << 1) + (data.DirectCount != null ? 1 : 0)));
                if (data.DirectCount != null)
                {
                    bw.Write(data.DirectCount.Count);
                    foreach (ulong element in data.DirectCount)
                    {
                        bw.Write(element);
                    }
                }
                else if (data.IsSparse)
                {
                    bw.Write(data.LookupSparse.Count);
                    foreach (KeyValuePair<ushort, byte> element in data.LookupSparse)
                    {
                        bw.Write(element.Key);
                        bw.Write(element.Value);
                    }
                }
                else
                {
                    bw.Write(data.LookupDense.Length);
                    foreach (byte element in data.LookupDense)
                    {
                        bw.Write(element);
                    }
                }

                bw.Write(data.CountAdditions);
                bw.Flush();
            }
        }
        private void RunRecreationFromData(int cardinality = 1000000)
        {
            var hll = new CardinalityEstimator();

            var nextMember = new byte[ElementSizeInBytes];

            for (var i = 0; i < cardinality; i++)
            {
                Rand.NextBytes(nextMember);
                hll.Add(nextMember);
            }

            CardinalityEstimatorState data = hll.GetState();

            var hll2 = new CardinalityEstimator(data);
            CardinalityEstimatorState data2 = hll2.GetState();

            Assert.Equal(data.BitsPerIndex, data2.BitsPerIndex);
            Assert.Equal(data.IsSparse, data2.IsSparse);

            Assert.True((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null));
            Assert.True((data.LookupSparse != null && data2.LookupSparse != null) ||
                        (data.LookupSparse == null && data2.LookupSparse == null));
            Assert.True((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null));

            if (data.DirectCount != null)
            {
                // DirectCount are subsets of each-other => they are the same set
                Assert.True(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount));
            }
            if (data.LookupSparse != null)
            {
                Assert.True(data.LookupSparse.DictionaryEqual(data2.LookupSparse));
            }
            if (data.LookupDense != null)
            {
                Assert.True(data.LookupDense.SequenceEqual(data2.LookupDense));
            }
        }
        private void RunRecreationFromData(int cardinality = 1000000)
        {
            var hll = new CardinalityEstimator();

            var nextMember = new byte[ElementSizeInBytes];
            for (var i = 0; i < cardinality; i++)
            {
                Rand.NextBytes(nextMember);
                hll.Add(nextMember);
            }

            CardinalityEstimatorState data = hll.GetState();

            var hll2 = new CardinalityEstimator(data);
            CardinalityEstimatorState data2 = hll2.GetState();

            Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex);
            Assert.AreEqual(data.IsSparse, data2.IsSparse);

            Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null));
            Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) ||
                          (data.LookupSparse == null && data2.LookupSparse == null));
            Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null));

            if (data.DirectCount != null)
            {
                // DirectCount are subsets of each-other => they are the same set
                Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount));
            }
            if (data.LookupSparse != null)
            {
                Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse));
            }
            if (data.LookupDense != null)
            {
                Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense));
            }
        }