/// <summary> /// Converts the given <paramref name="data"/> to it's byte array representation. /// Packed rows are the big elephant in the room. These are a direct representation of a row in any of the tables /// the server has and are composed of 3 parts. /// /// The first is the header that contains information about the columns that the PackedRow has data for. /// This header is known as <see cref="DBRowDescriptor" /> which is composed of a tuple with a string (column name) /// and an integer (column type). The column order doesn't matter on the header. /// /// The second part is the compressible bit data. Contains part of the actual data in the row. The columns are sorted /// by bit size in descending order, this way the amount of zeros one after the another is kept to a minimum, allowing /// the zero compression algorithm (<see cref="ZeroCompressionUtils" />) to greatly reduce the size of this data. /// Strings are not encoded here and are part of the third section of the PackedRow. /// The 1-bit sized columns (booleans) are encoded a bit differently from the rest of the data. Once the multi-bit /// sized data is written (long, int, short, byte) the bool values are grouped into 1 byte chunks, storing up to 8 booleans /// in each byte. The booleans are added to the byte from right to left and the byte blocks are written one after the other. /// These resources might be useful to better understand this operation /// https://stackoverflow.com/questions/36829860/using-binary-to-compress-boolean-array /// https://en.wikipedia.org/wiki/Bit_array /// https://sakai.rutgers.edu/wiki/site/e07619c5-a492-4ebe-8771-179dfe450ae4/bit-to-boolean%20conversion.html /// /// The third and last section contains the byte arrays and strings. These are encoded as normal python types /// after the second part. /// /// The following opcodes are supported /// <seealso cref="Opcode.PackedRow" /> /// </summary> /// <param name="writer">Where to write the encoded data to</param> /// <param name="packedRow">The value to write</param> private static void ProcessPackedRow(BinaryWriter writer, PyPackedRow packedRow) { writer.WriteOpcode(Opcode.PackedRow); Process(writer, packedRow.Header); // prepare the zero-compression stream MemoryStream wholeByteStream = new MemoryStream(); MemoryStream bitPacketStream = new MemoryStream(); MemoryStream objectStream = new MemoryStream(); BinaryWriter wholeByteWriter = new BinaryWriter(wholeByteStream); BinaryWriter bitPacketWriter = new BinaryWriter(bitPacketStream); BinaryWriter objectWriter = new BinaryWriter(objectStream); // sort the columns by size IOrderedEnumerable <DBRowDescriptor.Column> enumerator = packedRow.Header.Columns.OrderByDescending(c => Utils.GetTypeBits(c.Type)); byte bitOffset = 0; byte toWrite = 0; foreach (DBRowDescriptor.Column column in enumerator) { PyDataType value = packedRow[column.Name]; switch (column.Type) { case FieldType.I8: case FieldType.UI8: case FieldType.CY: case FieldType.FileTime: wholeByteWriter.Write((long)(value as PyInteger ?? 0)); break; case FieldType.I4: case FieldType.UI4: wholeByteWriter.Write((int)(value as PyInteger ?? 0)); break; case FieldType.I2: case FieldType.UI2: wholeByteWriter.Write((short)(value as PyInteger ?? 0)); break; case FieldType.I1: case FieldType.UI1: wholeByteWriter.Write((byte)(value as PyInteger ?? 0)); break; case FieldType.R8: wholeByteWriter.Write((double)(value as PyDecimal ?? 0)); break; case FieldType.R4: wholeByteWriter.Write((float)(value as PyDecimal ?? 0)); break; // bools, bytes and str are handled differently case FieldType.Bool: if (value as PyBool) { // bytes are written from right to left in the buffer toWrite |= (byte)(1 << bitOffset); } bitOffset++; if (bitOffset > 7) { // byte is full, write the byte to the stream bitPacketWriter.Write(toWrite); // reset the byte to keep using it as buffer toWrite = 0; // do the same for the next bit offset bitOffset = 0; } break; case FieldType.Bytes: case FieldType.Str: case FieldType.WStr: // write the object to the proper memory stream Process(objectWriter, packedRow[column.Name]); break; default: throw new Exception($"Unknown field type {column.Type}"); } } // after the column loop is done there might be some leftover compressed bits // that have to be written to the bit stream too if (bitOffset > 0) { bitPacketWriter.Write(toWrite); } // append the bitStream to the to the wholeByteWriter bitPacketStream.WriteTo(wholeByteStream); // create a reader for the stream wholeByteStream.Seek(0, SeekOrigin.Begin); // create the reader used to compress the buffer BinaryReader reader = new BinaryReader(wholeByteStream); // finally compress the data into the output ZeroCompressionUtils.ZeroCompress(reader, writer); // as last step write the encoded objects after the packed data objectStream.WriteTo(writer.BaseStream); }
/// <summary> /// Converts the given <paramref name="packedRow"/> to it's byte array representation. /// Packed rows are the big elephant in the room. These are a direct representation of a row in any of the tables /// the server has and are composed of 3 parts. /// /// The first is the header that contains information about the columns that the PackedRow has data for. /// This header is known as <see cref="DBRowDescriptor" /> which is composed of a tuple with a string (column name) /// and an integer (column type). The column order doesn't matter on the header. /// /// The second part is the compressible bit data. Contains part of the actual data in the row. The columns are sorted /// by bit size in descending order, this way the amount of zeros one after the another is kept to a minimum, allowing /// the zero compression algorithm (<see cref="ZeroCompressionUtils" />) to greatly reduce the size of this data. /// Strings are not encoded here and are part of the third section of the PackedRow. /// The 1-bit sized columns (booleans) are encoded a bit differently from the rest of the data. Once the multi-bit /// sized data is written (long, int, short, byte) the bool values are grouped into 1 byte chunks, storing up to 8 booleans /// in each byte. The booleans are added to the byte from right to left and the byte blocks are written one after the other. /// These resources might be useful to better understand this operation /// https://stackoverflow.com/questions/36829860/using-binary-to-compress-boolean-array /// https://en.wikipedia.org/wiki/Bit_array /// https://sakai.rutgers.edu/wiki/site/e07619c5-a492-4ebe-8771-179dfe450ae4/bit-to-boolean%20conversion.html /// /// The third and last section contains the byte arrays and strings. These are encoded as normal python types /// after the second part. /// /// The following opcodes are supported /// <seealso cref="Opcode.PackedRow" /> /// </summary> /// <param name="writer">Where to write the encoded data to</param> /// <param name="packedRow">The value to write</param> private static void ProcessPackedRow(BinaryWriter writer, PyPackedRow packedRow) { writer.WriteOpcode(Opcode.PackedRow); Process(writer, packedRow.Header); // bit where null flags will be written int booleanBits = 0; int nullBits = 0; int wholeBytes = 0; List <DBRowDescriptor.Column> booleanColumns = new List <DBRowDescriptor.Column>(); foreach (DBRowDescriptor.Column column in packedRow.Header.Columns) { int bitLength = Utils.GetTypeBits(column.Type); if (column.Type == FieldType.Bool) { booleanColumns.Add(column); booleanBits++; } nullBits++; if (bitLength >= 8) { wholeBytes += bitLength >> 3; } } // build byte buffers for the bitfields like booleans and nulls byte[] bitField = new byte[((booleanBits + nullBits) >> 3) + 1]; // prepare the zero-compression stream MemoryStream wholeByteStream = new MemoryStream(wholeBytes + bitField.Length); MemoryStream objectStream = new MemoryStream(); BinaryWriter wholeByteWriter = new BinaryWriter(wholeByteStream); BinaryWriter objectWriter = new BinaryWriter(objectStream); // sort the columns by size and obtain some important statistics IOrderedEnumerable <DBRowDescriptor.Column> enumerator = packedRow.Header.Columns.OrderByDescending(c => Utils.GetTypeBits(c.Type)); foreach (DBRowDescriptor.Column column in enumerator) { PyDataType value = packedRow[column.Name]; switch (column.Type) { case FieldType.I8: case FieldType.UI8: case FieldType.CY: case FieldType.FileTime: wholeByteWriter.Write((long)(value as PyInteger ?? 0)); break; case FieldType.I4: case FieldType.UI4: wholeByteWriter.Write((int)(value as PyInteger ?? 0)); break; case FieldType.I2: case FieldType.UI2: wholeByteWriter.Write((short)(value as PyInteger ?? 0)); break; case FieldType.I1: case FieldType.UI1: wholeByteWriter.Write((byte)(value as PyInteger ?? 0)); break; case FieldType.R8: wholeByteWriter.Write((double)(value as PyDecimal ?? 0)); break; case FieldType.R4: wholeByteWriter.Write((float)(value as PyDecimal ?? 0)); break; // bools, bytes and str are handled differently case FieldType.Bool: if (value as PyBool) { int bit = booleanColumns.IndexOf(column); bitField[bit >> 3] |= (byte)(1 << (bit & 0x7)); } break; case FieldType.Bytes: case FieldType.Str: case FieldType.WStr: // write the object to the proper memory stream Process(objectWriter, packedRow[column.Name]); continue; default: throw new Exception($"Unknown field type {column.Type}"); } if (value is null) { int bit = packedRow.Header.Columns.IndexOf(column) + booleanBits; bitField[bit >> 3] |= (byte)(1 << (bit & 0x7)); } } // write the bit field buffer into the wholeByteWriter wholeByteWriter.Write(bitField); // create a reader for the stream wholeByteStream.Seek(0, SeekOrigin.Begin); // create the reader used to compress the buffer BinaryReader reader = new BinaryReader(wholeByteStream); // finally compress the data into the output ZeroCompressionUtils.ZeroCompress(reader, writer); // as last step write the encoded objects after the packed data objectStream.WriteTo(writer.BaseStream); }
/// <summary> /// <seealso cref="Marshal.ProcessPackedRow"/> /// /// Opcodes supported: /// <seealso cref="Opcode.PackedRow"/> /// </summary> /// <param name="opcode">Type of object to parse</param> /// <returns>The decoded python type</returns> /// <exception cref="InvalidDataException">If any error was found in the data</exception> protected virtual PyDataType ProcessPackedRow(Opcode opcode) { if (opcode != Opcode.PackedRow) { throw new InvalidDataException($"Trying to parse a {opcode} as PackedRow"); } DBRowDescriptor descriptor = this.Process(false); Dictionary <string, PyDataType> data = new Dictionary <string, PyDataType> (); int wholeBytes = 0; int nullBits = 0; int boolBits = 0; List <DBRowDescriptor.Column> booleanColumns = new List <DBRowDescriptor.Column>(); foreach (DBRowDescriptor.Column column in descriptor.Columns) { int bitLength = Utils.GetTypeBits(column.Type); if (column.Type == FieldType.Bool) { booleanColumns.Add(column); boolBits++; } nullBits++; if (bitLength >= 8) { wholeBytes += bitLength >> 3; } } // sort columns by the bit size and calculate other statistics for the PackedRow IOrderedEnumerable <DBRowDescriptor.Column> enumerator = descriptor.Columns.OrderByDescending(c => Utils.GetTypeBits(c.Type)); MemoryStream decompressedStream = ZeroCompressionUtils.LoadZeroCompressed(this.mReader, wholeBytes + ((nullBits + boolBits) >> 3) + 1); BinaryReader decompressedReader = new BinaryReader(decompressedStream); byte[] fullBuffer = decompressedStream.GetBuffer(); foreach (DBRowDescriptor.Column column in enumerator) { int bit = (wholeBytes << 3) + descriptor.Columns.IndexOf(column) + boolBits; bool isNull = (fullBuffer[bit >> 3] & (1 << (bit & 0x7))) == (1 << (bit & 0x7)); switch (column.Type) { case FieldType.I8: case FieldType.UI8: case FieldType.CY: case FieldType.FileTime: data[column.Name] = new PyInteger(decompressedReader.ReadInt64()); break; case FieldType.I4: case FieldType.UI4: data[column.Name] = new PyInteger(decompressedReader.ReadInt32()); break; case FieldType.I2: case FieldType.UI2: data[column.Name] = new PyInteger(decompressedReader.ReadInt16()); break; case FieldType.I1: case FieldType.UI1: data[column.Name] = new PyInteger(decompressedReader.ReadByte()); break; case FieldType.R8: data[column.Name] = new PyDecimal(decompressedReader.ReadDouble()); break; case FieldType.R4: data[column.Name] = new PyDecimal(decompressedReader.ReadSingle()); break; case FieldType.Bool: { int boolBit = (wholeBytes << 3) + booleanColumns.IndexOf(column); bool isTrue = (fullBuffer[boolBit >> 3] & (1 << (boolBit & 0x7))) == (1 << (boolBit & 0x7)); data[column.Name] = new PyBool(isTrue); } break; case FieldType.Bytes: case FieldType.WStr: case FieldType.Str: data[column.Name] = this.Process(false); break; default: throw new InvalidDataException($"Unknown column type {column.Type}"); } if (isNull == true) { data[column.Name] = null; } } return(new PyPackedRow(descriptor, data)); }
/// <summary> /// <seealso cref="Marshal.ProcessPackedRow"/> /// /// Opcodes supported: /// <seealso cref="Opcode.PackedRow"/> /// </summary> /// <param name="opcode">Type of object to parse</param> /// <returns>The decoded python type</returns> /// <exception cref="InvalidDataException">If any error was found in the data</exception> protected virtual PyDataType ProcessPackedRow(Opcode opcode) { if (opcode != Opcode.PackedRow) { throw new InvalidDataException($"Trying to parse a {opcode} as PackedRow"); } DBRowDescriptor descriptor = this.Process(false); Dictionary <string, PyDataType> data = new Dictionary <string, PyDataType> (); MemoryStream decompressedStream = ZeroCompressionUtils.LoadZeroCompressed(this.mReader); BinaryReader decompressedReader = new BinaryReader(decompressedStream); // sort columns by the bit size IEnumerable <DBRowDescriptor.Column> enumerator = descriptor.Columns.OrderByDescending(c => Utils.GetTypeBits(c.Type)); int bitOffset = 8; byte buffer = 0; foreach (DBRowDescriptor.Column column in enumerator) { switch (column.Type) { case FieldType.I8: case FieldType.UI8: case FieldType.CY: case FieldType.FileTime: data[column.Name] = new PyInteger(decompressedReader.ReadInt64()); break; case FieldType.I4: case FieldType.UI4: data[column.Name] = new PyInteger(decompressedReader.ReadInt32()); break; case FieldType.I2: case FieldType.UI2: data[column.Name] = new PyInteger(decompressedReader.ReadInt16()); break; case FieldType.I1: case FieldType.UI1: data[column.Name] = new PyInteger(decompressedReader.ReadByte()); break; case FieldType.R8: data[column.Name] = new PyDecimal(decompressedReader.ReadDouble()); break; case FieldType.R4: data[column.Name] = new PyDecimal(decompressedReader.ReadSingle()); break; case FieldType.Bool: // read a byte from the buffer if needed if (bitOffset == 8) { buffer = decompressedReader.ReadByte(); bitOffset = 0; } data[column.Name] = new PyBool(((buffer >> bitOffset++) & 0x01) == 0x01); break; case FieldType.Bytes: case FieldType.WStr: case FieldType.Str: data[column.Name] = this.Process(false); break; default: throw new InvalidDataException($"Unknown column type {column.Type}"); } } return(new PyPackedRow(descriptor, data)); }