/// <summary> /// Deserialize a <see cref="CardinalityEstimator" /> from the given <paramref name="stream" /> /// </summary> public CardinalityEstimator Deserialize(Stream stream) { using (var br = new BinaryReader(stream)) { int dataFormatMajorVersion = br.ReadUInt16(); int dataFormatMinorVersion = br.ReadUInt16(); AssertDataVersionCanBeRead(dataFormatMajorVersion, dataFormatMinorVersion); int bitsPerIndex = br.ReadInt32(); byte flags = br.ReadByte(); bool isSparse = ((flags & 2) == 2); bool isDirectCount = ((flags & 1) == 1); HashSet <ulong> directCount = null; IDictionary <ushort, byte> lookupSparse = isSparse ? new Dictionary <ushort, byte>() : null; byte[] lookupDense = null; if (isDirectCount) { int count = br.ReadInt32(); directCount = new HashSet <ulong>(); for (var i = 0; i < count; i++) { ulong element = br.ReadUInt64(); directCount.Add(element); } } else if (isSparse) { int count = br.ReadInt32(); for (var i = 0; i < count; i++) { ushort elementKey = br.ReadUInt16(); byte elementValue = br.ReadByte(); lookupSparse.Add(elementKey, elementValue); } } else { int count = br.ReadInt32(); lookupDense = br.ReadBytes(count); } var data = new CardinalityEstimatorState { BitsPerIndex = bitsPerIndex, DirectCount = directCount, IsSparse = isSparse, LookupDense = lookupDense, LookupSparse = lookupSparse }; var result = new CardinalityEstimator(data); return(result); } }
/// <summary> /// Creates a CardinalityEstimator with the given <paramref name="state" /> /// </summary> internal CardinalityEstimator(CardinalityEstimatorState state) { this.bitsPerIndex = state.BitsPerIndex; this.bitsForHll = 64 - this.bitsPerIndex; this.m = (int)Math.Pow(2, this.bitsPerIndex); this.alphaM = GetAlphaM(this.m); this.subAlgorithmSelectionThreshold = GetSubAlgorithmSelectionThreshold(this.bitsPerIndex); // Init the hash function this.hashFunctionId = state.HashFunctionId; this.hashFunction = HashFunctionFactory.GetHashFunction(this.hashFunctionId); // Init the direct count this.directCount = state.DirectCount != null ? new HashSet <ulong>(state.DirectCount) : null; // Init the sparse representation this.isSparse = state.IsSparse; this.lookupSparse = state.LookupSparse != null ? new Dictionary <ushort, byte>(state.LookupSparse) : null; this.lookupDense = state.LookupDense; this.CountAdditions = state.CountAdditions; // Each element in the sparse representation takes 15 bytes, and there is some constant overhead this.sparseMaxElements = Math.Max(0, this.m / 15 - 10); // If necessary, switch to the dense representation if (this.sparseMaxElements <= 0) { SwitchToDenseRepresentation(); } // if DirectCount is not null, populate the HLL lookup with its elements. This allows serialization to include only directCount if (this.directCount != null) { // since we are re-initializing the object, we need to reset isSparse to true and sparse lookup isSparse = true; this.lookupSparse = new Dictionary <ushort, byte>(); foreach (ulong element in this.directCount) { AddElementHash(element); } } else { this.directCount = null; } }
/// <summary> /// Serialize the given <paramref name="cardinalityEstimator" /> to <paramref name="stream" /> /// </summary> public void Serialize(Stream stream, CardinalityEstimator cardinalityEstimator) { using (var bw = new BinaryWriter(stream)) { bw.Write(DataFormatMajorVersion); bw.Write(DataFormatMinorVersion); CardinalityEstimatorState data = cardinalityEstimator.GetState(); bw.Write((byte)data.HashFunctionId); bw.Write(data.BitsPerIndex); bw.Write((byte)(((data.IsSparse ? 1 : 0) << 1) + (data.DirectCount != null ? 1 : 0))); if (data.DirectCount != null) { bw.Write(data.DirectCount.Count); foreach (ulong element in data.DirectCount) { bw.Write(element); } } else if (data.IsSparse) { bw.Write(data.LookupSparse.Count); foreach (KeyValuePair <ushort, byte> element in data.LookupSparse) { bw.Write(element.Key); bw.Write(element.Value); } } else { bw.Write(data.LookupDense.Length); foreach (byte element in data.LookupDense) { bw.Write(element); } } bw.Write(data.CountAdditions); bw.Flush(); } }
/// <summary> /// Deserialize a <see cref="CardinalityEstimator" /> from the given <paramref name="stream" /> /// </summary> public CardinalityEstimator Deserialize(Stream stream) { using (var br = new BinaryReader(stream)) { int dataFormatMajorVersion = br.ReadUInt16(); int dataFormatMinorVersion = br.ReadUInt16(); AssertDataVersionCanBeRead(dataFormatMajorVersion, dataFormatMinorVersion); HashFunctionId hashFunctionId; if (dataFormatMajorVersion >= 2) { // Starting with version 2.0, the serializer writes the hash function ID hashFunctionId = (HashFunctionId)br.ReadByte(); } else { // Versions before 2.0 all used FNV-1a hashFunctionId = HashFunctionId.Fnv1A; } int bitsPerIndex = br.ReadInt32(); byte flags = br.ReadByte(); bool isSparse = ((flags & 2) == 2); bool isDirectCount = ((flags & 1) == 1); HashSet <ulong> directCount = null; IDictionary <ushort, byte> lookupSparse = isSparse ? new Dictionary <ushort, byte>() : null; byte[] lookupDense = null; if (isDirectCount) { int count = br.ReadInt32(); directCount = new HashSet <ulong>(); for (var i = 0; i < count; i++) { ulong element = br.ReadUInt64(); directCount.Add(element); } } else if (isSparse) { int count = br.ReadInt32(); for (var i = 0; i < count; i++) { ushort elementKey = br.ReadUInt16(); byte elementValue = br.ReadByte(); lookupSparse.Add(elementKey, elementValue); } } else { int count = br.ReadInt32(); lookupDense = br.ReadBytes(count); } // Starting with version 2.1, the serializer writes CountAdditions ulong countAdditions = 0UL; if (dataFormatMajorVersion >= 2 && dataFormatMinorVersion >= 1) { countAdditions = br.ReadUInt64(); } var data = new CardinalityEstimatorState { HashFunctionId = hashFunctionId, BitsPerIndex = bitsPerIndex, DirectCount = directCount, IsSparse = isSparse, LookupDense = lookupDense, LookupSparse = lookupSparse, CountAdditions = countAdditions, }; var result = new CardinalityEstimator(data); return(result); } }
/// <summary> /// Deserialize a <see cref="CardinalityEstimator" /> from the given <paramref name="stream" /> /// </summary> public CardinalityEstimator Deserialize(Stream stream) { using (var br = new BinaryReader(stream)) { int dataFormatMajorVersion = br.ReadUInt16(); int dataFormatMinorVersion = br.ReadUInt16(); AssertDataVersionCanBeRead(dataFormatMajorVersion, dataFormatMinorVersion); int bitsPerIndex = br.ReadInt32(); byte flags = br.ReadByte(); bool isSparse = ((flags & 2) == 2); bool isDirectCount = ((flags & 1) == 1); HashSet<ulong> directCount = null; IDictionary<ushort, byte> lookupSparse = isSparse ? new Dictionary<ushort, byte>() : null; byte[] lookupDense = null; if (isDirectCount) { int count = br.ReadInt32(); directCount = new HashSet<ulong>(); for (var i = 0; i < count; i++) { ulong element = br.ReadUInt64(); directCount.Add(element); } } else if (isSparse) { int count = br.ReadInt32(); for (var i = 0; i < count; i++) { ushort elementKey = br.ReadUInt16(); byte elementValue = br.ReadByte(); lookupSparse.Add(elementKey, elementValue); } } else { int count = br.ReadInt32(); lookupDense = br.ReadBytes(count); } var data = new CardinalityEstimatorState { BitsPerIndex = bitsPerIndex, DirectCount = directCount, IsSparse = isSparse, LookupDense = lookupDense, LookupSparse = lookupSparse }; var result = new CardinalityEstimator(data); return result; } }