Represents state of a CardinalityEstimator for serialization, CardinalityEstimatorSerializer
Example #1
0
        /// <summary>
        ///     Creates a CardinalityEstimator with the given <paramref name="state" />
        /// </summary>
        internal CardinalityEstimator(CardinalityEstimatorState state)
        {
            this.bitsPerIndex = state.BitsPerIndex;
            this.bitsForHll   = 64 - this.bitsPerIndex;
            this.m            = (int)Math.Pow(2, this.bitsPerIndex);
            this.alphaM       = GetAlphaM(this.m);
            this.subAlgorithmSelectionThreshold = GetSubAlgorithmSelectionThreshold(this.bitsPerIndex);

            // Init the hash function
            this.hashFunctionId = state.HashFunctionId;
            this.hashFunction   = HashFunctionFactory.GetHashFunction(this.hashFunctionId);

            // Init the direct count
            this.directCount = state.DirectCount != null ? new HashSet <ulong>(state.DirectCount) : null;

            // Init the sparse representation
            this.isSparse       = state.IsSparse;
            this.lookupSparse   = state.LookupSparse != null ? new Dictionary <ushort, byte>(state.LookupSparse) : null;
            this.lookupDense    = state.LookupDense;
            this.CountAdditions = state.CountAdditions;

            // Each element in the sparse representation takes 15 bytes, and there is some constant overhead
            this.sparseMaxElements = Math.Max(0, this.m / 15 - 10);
            // If necessary, switch to the dense representation
            if (this.sparseMaxElements <= 0)
            {
                SwitchToDenseRepresentation();
            }

            // if DirectCount is not null, populate the HLL lookup with its elements.  This allows serialization to include only directCount
            if (this.directCount != null)
            {
                foreach (ulong element in this.directCount)
                {
                    AddElementHash(element);
                }
            }
            else
            {
                this.directCount = null;
            }
        }
        /// <summary>
        ///     Serialize the given <paramref name="cardinalityEstimator" /> to <paramref name="stream" />
        /// </summary>
        public void Serialize(Stream stream, CardinalityEstimator cardinalityEstimator)
        {
            using (var bw = new BinaryWriter(stream))
            {
                bw.Write(DataFormatMajorVersion);
                bw.Write(DataFormatMinorVersion);

                CardinalityEstimatorState data = cardinalityEstimator.GetState();

                bw.Write((byte)data.HashFunctionId);
                bw.Write(data.BitsPerIndex);
                bw.Write((byte)(((data.IsSparse ? 1 : 0) << 1) + (data.DirectCount != null ? 1 : 0)));
                if (data.DirectCount != null)
                {
                    bw.Write(data.DirectCount.Count);
                    foreach (ulong element in data.DirectCount)
                    {
                        bw.Write(element);
                    }
                }
                else if (data.IsSparse)
                {
                    bw.Write(data.LookupSparse.Count);
                    foreach (KeyValuePair <ushort, byte> element in data.LookupSparse)
                    {
                        bw.Write(element.Key);
                        bw.Write(element.Value);
                    }
                }
                else
                {
                    bw.Write(data.LookupDense.Length);
                    foreach (byte element in data.LookupDense)
                    {
                        bw.Write(element);
                    }
                }

                bw.Write(data.CountAdditions);
                bw.Flush();
            }
        }
        /// <summary>
        ///     Deserialize a <see cref="CardinalityEstimator" /> from the given <paramref name="stream" />
        /// </summary>
        public CardinalityEstimator Deserialize(Stream stream)
        {
            using (var br = new BinaryReader(stream))
            {
                int dataFormatMajorVersion = br.ReadUInt16();
                int dataFormatMinorVersion = br.ReadUInt16();

                AssertDataVersionCanBeRead(dataFormatMajorVersion, dataFormatMinorVersion);

                HashFunctionId hashFunctionId;
                if (dataFormatMajorVersion >= 2)
                {
                    // Starting with version 2.0, the serializer writes the hash function ID
                    hashFunctionId = (HashFunctionId)br.ReadByte();
                }
                else
                {
                    // Versions before 2.0 all used FNV-1a
                    hashFunctionId = HashFunctionId.Fnv1A;
                }

                int  bitsPerIndex  = br.ReadInt32();
                byte flags         = br.ReadByte();
                bool isSparse      = ((flags & 2) == 2);
                bool isDirectCount = ((flags & 1) == 1);

                HashSet <ulong>            directCount  = null;
                IDictionary <ushort, byte> lookupSparse = isSparse ? new Dictionary <ushort, byte>() : null;
                byte[] lookupDense = null;

                if (isDirectCount)
                {
                    int count = br.ReadInt32();
                    directCount = new HashSet <ulong>();

                    for (var i = 0; i < count; i++)
                    {
                        ulong element = br.ReadUInt64();
                        directCount.Add(element);
                    }
                }
                else if (isSparse)
                {
                    int count = br.ReadInt32();

                    for (var i = 0; i < count; i++)
                    {
                        ushort elementKey   = br.ReadUInt16();
                        byte   elementValue = br.ReadByte();
                        lookupSparse.Add(elementKey, elementValue);
                    }
                }
                else
                {
                    int count = br.ReadInt32();
                    lookupDense = br.ReadBytes(count);
                }

                // Starting with version 2.1, the serializer writes CountAdditions
                ulong countAdditions = 0UL;
                if (dataFormatMajorVersion >= 2 && dataFormatMinorVersion >= 1)
                {
                    countAdditions = br.ReadUInt64();
                }

                var data = new CardinalityEstimatorState
                {
                    HashFunctionId = hashFunctionId,
                    BitsPerIndex   = bitsPerIndex,
                    DirectCount    = directCount,
                    IsSparse       = isSparse,
                    LookupDense    = lookupDense,
                    LookupSparse   = lookupSparse,
                    CountAdditions = countAdditions,
                };

                var result = new CardinalityEstimator(data);

                return(result);
            }
        }
        /// <summary>
        ///     Deserialize a <see cref="CardinalityEstimator" /> from the given <paramref name="stream" />
        /// </summary>
        public CardinalityEstimator Deserialize(Stream stream)
        {
            using (var br = new BinaryReader(stream))
            {
                int dataFormatMajorVersion = br.ReadUInt16();
                int dataFormatMinorVersion = br.ReadUInt16();

                AssertDataVersionCanBeRead(dataFormatMajorVersion, dataFormatMinorVersion);

                HashFunctionId hashFunctionId;
                if (dataFormatMajorVersion >= 2)
                {
                    // Starting with version 2.0, the serializer writes the hash function ID
                    hashFunctionId = (HashFunctionId)br.ReadByte();
                }
                else
                {
                    // Versions before 2.0 all used FNV-1a
                    hashFunctionId = HashFunctionId.Fnv1A;
                }

                int bitsPerIndex = br.ReadInt32();
                byte flags = br.ReadByte();
                bool isSparse = ((flags & 2) == 2);
                bool isDirectCount = ((flags & 1) == 1);

                HashSet<ulong> directCount = null;
                IDictionary<ushort, byte> lookupSparse = isSparse ? new Dictionary<ushort, byte>() : null;
                byte[] lookupDense = null;

                if (isDirectCount)
                {
                    int count = br.ReadInt32();
                    directCount = new HashSet<ulong>();

                    for (var i = 0; i < count; i++)
                    {
                        ulong element = br.ReadUInt64();
                        directCount.Add(element);
                    }
                }
                else if (isSparse)
                {
                    int count = br.ReadInt32();

                    for (var i = 0; i < count; i++)
                    {
                        ushort elementKey = br.ReadUInt16();
                        byte elementValue = br.ReadByte();
                        lookupSparse.Add(elementKey, elementValue);
                    }
                }
                else
                {
                    int count = br.ReadInt32();
                    lookupDense = br.ReadBytes(count);
                }

                // Starting with version 2.1, the serializer writes CountAdditions
                ulong countAdditions = 0UL;
                if (dataFormatMajorVersion >= 2 && dataFormatMinorVersion >= 1)
                {
                    countAdditions = br.ReadUInt64();
                }

                var data = new CardinalityEstimatorState
                {
                    HashFunctionId = hashFunctionId,
                    BitsPerIndex = bitsPerIndex,
                    DirectCount = directCount,
                    IsSparse = isSparse,
                    LookupDense = lookupDense,
                    LookupSparse = lookupSparse,
                    CountAdditions = countAdditions,
                };

                var result = new CardinalityEstimator(data);

                return result;
            }
        }