private void WriteWorker(Stream stream, BlockingCollection <Block> toWrite, ColumnCodec[] activeColumns, Schema sourceSchema, int rowsPerBlock, IChannelProvider cp, ExceptionMarshaller exMarshaller) { _host.AssertValue(exMarshaller); try { _host.AssertValue(cp); cp.AssertValue(stream); cp.AssertValue(toWrite); cp.AssertValue(activeColumns); cp.AssertValue(sourceSchema); cp.Assert(rowsPerBlock > 0); using (IChannel ch = cp.Start("Write")) { var blockLookups = new List <BlockLookup> [activeColumns.Length]; for (int c = 0; c < blockLookups.Length; ++c) { blockLookups[c] = new List <BlockLookup>(); } var deadLookups = new int[activeColumns.Length]; // Reserve space for the header at the start. This will be filled // in with valid values once writing has completed. ch.CheckIO(stream.Position == 0); stream.Write(new byte[Header.HeaderSize], 0, Header.HeaderSize); ch.CheckIO(stream.Position == Header.HeaderSize); long expectedPosition = stream.Position; BlockLookup deadLookup = new BlockLookup(); foreach (Block block in toWrite.GetConsumingEnumerable(exMarshaller.Token)) { ch.CheckIO(stream.Position == expectedPosition); MemoryStream compressed = block.BlockData; ArraySegment <byte> buffer; bool tmp = compressed.TryGetBuffer(out buffer); ch.Assert(tmp); stream.Write(buffer.Array, buffer.Offset, buffer.Count); BlockLookup currLookup = new BlockLookup(expectedPosition, (int)compressed.Length, block.UncompressedLength); expectedPosition += compressed.Length; _memPool.Return(ref compressed); ch.CheckIO(stream.Position == expectedPosition); // Record the position. We have this "lookups" list per column. Yet, it may be that sometimes // the writer receives things out of order. // REVIEW: The format and the rest of the pipeline supposedly supports a long number // of blocks, but the writing scheme does not yet support that. int blockIndex = (int)block.BlockIndex; var lookups = blockLookups[block.ColumnIndex]; if (lookups.Count == block.BlockIndex) // Received in order. { lookups.Add(currLookup); } else if (lookups.Count < block.BlockIndex) // Received a block a little bit early. { // Add a bunch of dead filler lookups, until these late blocks come in. int deadToAdd = (int)block.BlockIndex - lookups.Count; for (int i = 0; i < deadToAdd; ++i) { lookups.Add(deadLookup); } deadLookups[block.ColumnIndex] += deadToAdd; ch.Assert(lookups.Count == block.BlockIndex); lookups.Add(currLookup); } else // Received a block a little bit late. { // This should be a dead block unless the compressors are buggy and somehow // yielding duplicate blocks or something. ch.Assert(lookups[blockIndex].BlockOffset == 0); deadLookups[block.ColumnIndex]--; lookups[blockIndex] = currLookup; } } // We have finished writing all blocks. We will now write the block lookup tables (so we can // find the blocks), the slot names (for any columns that have them), the column table of // contents (so we know how to decode the blocks, and where the lookups and names are), // and the header (so we know dataview wide information and where to find the table of // contents) in that order. long[] lookupOffsets = new long[blockLookups.Length]; using (BinaryWriter writer = new BinaryWriter(stream, Encoding.UTF8, leaveOpen: true)) { // Write the block lookup directories. These are referenced from the table of contents, // so that someone knows where to look for some block data. for (int c = 0; c < blockLookups.Length; ++c) { ch.Assert(deadLookups[c] == 0); // The block lookup directories are written uncompressed and in fixed length // to enable rapid seeking. lookupOffsets[c] = stream.Position; foreach (BlockLookup lookup in blockLookups[c]) { // *** Lookup table entry format *** // long: Offset to the start of a block // int: Byte length of block as written // int: Byte length of block when uncompressed ch.Assert(lookup.BlockOffset > 0); writer.Write(lookup.BlockOffset); writer.Write(lookup.BlockLength); writer.Write(lookup.DecompressedBlockLength); } ch.CheckIO(stream.Position == lookupOffsets[c] + (16 * blockLookups[c].Count), "unexpected offsets after block lookup table write"); } // Write the metadata for each column. long[] metadataTocOffsets = new long[activeColumns.Length]; for (int c = 0; c < activeColumns.Length; ++c) { metadataTocOffsets[c] = WriteMetadata(writer, sourceSchema, activeColumns[c].SourceIndex, ch); } // Write the table of contents. long tocOffset = stream.Position; { int c = 0; expectedPosition = stream.Position; foreach (var active in activeColumns) { // *** Column TOC entry format *** // string: column name // codec (as interpretable by CodecFactory.TryGetCodec): column block codec // CompressionKind(byte): block compression strategy // LEB128 int: Rows per block // long: Offset to the start of the lookup table // long: Offset to the start of the metadata TOC entries, or 0 if this has no metadata string name = sourceSchema[active.SourceIndex].Name; writer.Write(name); int nameLen = Encoding.UTF8.GetByteCount(name); expectedPosition += Utils.Leb128IntLength((uint)nameLen) + nameLen; ch.CheckIO(stream.Position == expectedPosition, "unexpected offsets after table of contents name"); expectedPosition += _factory.WriteCodec(stream, active.Codec); ch.CheckIO(stream.Position == expectedPosition, "unexpected offsets after table of contents type description"); writer.Write((byte)_compression); expectedPosition++; // REVIEW: Right now the number of rows per block is fixed, so we // write the same value each time. In some future state, it may be that this // is relaxed, with possibly some tradeoffs (for example, inability to randomly seek). writer.WriteLeb128Int((ulong)rowsPerBlock); expectedPosition += Utils.Leb128IntLength((uint)rowsPerBlock); // Offset of the lookup table. writer.Write(lookupOffsets[c]); expectedPosition += sizeof(long); // Offset of the metadata table of contents. writer.Write(metadataTocOffsets[c]); expectedPosition += sizeof(long); ch.CheckIO(stream.Position == expectedPosition, "unexpected offsets after table of contents"); c++; } } // Write the tail signature. long tailOffset = stream.Position; writer.Write(Header.TailSignatureValue); // Now move back to the beginning of the stream, and write out the now completed header. Header header = new Header() { Signature = Header.SignatureValue, Version = Header.WriterVersion, CompatibleVersion = Header.CanBeReadByVersion, TableOfContentsOffset = tocOffset, TailOffset = tailOffset, RowCount = _rowCount, ColumnCount = activeColumns.Length }; byte[] headerBytes = new byte[Header.HeaderSize]; unsafe { Marshal.Copy(new IntPtr(&header), headerBytes, 0, Marshal.SizeOf(typeof(Header))); } writer.Seek(0, SeekOrigin.Begin); writer.Write(headerBytes); } } } catch (Exception ex) { exMarshaller.Set("writing", ex); } }