示例#1
0
        private void WriteWorker(Stream stream, BlockingCollection <Block> toWrite, ColumnCodec[] activeColumns,
                                 Schema sourceSchema, int rowsPerBlock, IChannelProvider cp, ExceptionMarshaller exMarshaller)
        {
            _host.AssertValue(exMarshaller);
            try
            {
                _host.AssertValue(cp);
                cp.AssertValue(stream);
                cp.AssertValue(toWrite);
                cp.AssertValue(activeColumns);
                cp.AssertValue(sourceSchema);
                cp.Assert(rowsPerBlock > 0);

                using (IChannel ch = cp.Start("Write"))
                {
                    var blockLookups = new List <BlockLookup> [activeColumns.Length];
                    for (int c = 0; c < blockLookups.Length; ++c)
                    {
                        blockLookups[c] = new List <BlockLookup>();
                    }
                    var deadLookups = new int[activeColumns.Length];

                    // Reserve space for the header at the start. This will be filled
                    // in with valid values once writing has completed.
                    ch.CheckIO(stream.Position == 0);
                    stream.Write(new byte[Header.HeaderSize], 0, Header.HeaderSize);
                    ch.CheckIO(stream.Position == Header.HeaderSize);
                    long        expectedPosition = stream.Position;
                    BlockLookup deadLookup       = new BlockLookup();
                    foreach (Block block in toWrite.GetConsumingEnumerable(exMarshaller.Token))
                    {
                        ch.CheckIO(stream.Position == expectedPosition);
                        MemoryStream        compressed = block.BlockData;
                        ArraySegment <byte> buffer;
                        bool tmp = compressed.TryGetBuffer(out buffer);
                        ch.Assert(tmp);
                        stream.Write(buffer.Array, buffer.Offset, buffer.Count);
                        BlockLookup currLookup = new BlockLookup(expectedPosition, (int)compressed.Length, block.UncompressedLength);
                        expectedPosition += compressed.Length;
                        _memPool.Return(ref compressed);
                        ch.CheckIO(stream.Position == expectedPosition);

                        // Record the position. We have this "lookups" list per column. Yet, it may be that sometimes
                        // the writer receives things out of order.
                        // REVIEW: The format and the rest of the pipeline supposedly supports a long number
                        // of blocks, but the writing scheme does not yet support that.
                        int blockIndex = (int)block.BlockIndex;
                        var lookups    = blockLookups[block.ColumnIndex];
                        if (lookups.Count == block.BlockIndex) // Received in order.
                        {
                            lookups.Add(currLookup);
                        }
                        else if (lookups.Count < block.BlockIndex) // Received a block a little bit early.
                        {
                            // Add a bunch of dead filler lookups, until these late blocks come in.
                            int deadToAdd = (int)block.BlockIndex - lookups.Count;
                            for (int i = 0; i < deadToAdd; ++i)
                            {
                                lookups.Add(deadLookup);
                            }
                            deadLookups[block.ColumnIndex] += deadToAdd;
                            ch.Assert(lookups.Count == block.BlockIndex);
                            lookups.Add(currLookup);
                        }
                        else // Received a block a little bit late.
                        {
                            // This should be a dead block unless the compressors are buggy and somehow
                            // yielding duplicate blocks or something.
                            ch.Assert(lookups[blockIndex].BlockOffset == 0);
                            deadLookups[block.ColumnIndex]--;
                            lookups[blockIndex] = currLookup;
                        }
                    }

                    // We have finished writing all blocks. We will now write the block lookup tables (so we can
                    // find the blocks), the slot names (for any columns that have them), the column table of
                    // contents (so we know how to decode the blocks, and where the lookups and names are),
                    // and the header (so we know dataview wide information and where to find the table of
                    // contents) in that order.
                    long[] lookupOffsets = new long[blockLookups.Length];
                    using (BinaryWriter writer = new BinaryWriter(stream, Encoding.UTF8, leaveOpen: true))
                    {
                        // Write the block lookup directories. These are referenced from the table of contents,
                        // so that someone knows where to look for some block data.
                        for (int c = 0; c < blockLookups.Length; ++c)
                        {
                            ch.Assert(deadLookups[c] == 0);
                            // The block lookup directories are written uncompressed and in fixed length
                            // to enable rapid seeking.
                            lookupOffsets[c] = stream.Position;
                            foreach (BlockLookup lookup in blockLookups[c])
                            {
                                // *** Lookup table entry format ***
                                // long: Offset to the start of a block
                                // int: Byte length of block as written
                                // int: Byte length of block when uncompressed

                                ch.Assert(lookup.BlockOffset > 0);
                                writer.Write(lookup.BlockOffset);
                                writer.Write(lookup.BlockLength);
                                writer.Write(lookup.DecompressedBlockLength);
                            }
                            ch.CheckIO(stream.Position == lookupOffsets[c] + (16 * blockLookups[c].Count),
                                       "unexpected offsets after block lookup table write");
                        }
                        // Write the metadata for each column.
                        long[] metadataTocOffsets = new long[activeColumns.Length];
                        for (int c = 0; c < activeColumns.Length; ++c)
                        {
                            metadataTocOffsets[c] = WriteMetadata(writer, sourceSchema, activeColumns[c].SourceIndex, ch);
                        }

                        // Write the table of contents.
                        long tocOffset = stream.Position;
                        {
                            int c = 0;
                            expectedPosition = stream.Position;
                            foreach (var active in activeColumns)
                            {
                                // *** Column TOC entry format ***
                                // string: column name
                                // codec (as interpretable by CodecFactory.TryGetCodec): column block codec
                                // CompressionKind(byte): block compression strategy
                                // LEB128 int: Rows per block
                                // long: Offset to the start of the lookup table
                                // long: Offset to the start of the metadata TOC entries, or 0 if this has no metadata

                                string name = sourceSchema[active.SourceIndex].Name;
                                writer.Write(name);
                                int nameLen = Encoding.UTF8.GetByteCount(name);
                                expectedPosition += Utils.Leb128IntLength((uint)nameLen) + nameLen;
                                ch.CheckIO(stream.Position == expectedPosition, "unexpected offsets after table of contents name");
                                expectedPosition += _factory.WriteCodec(stream, active.Codec);
                                ch.CheckIO(stream.Position == expectedPosition, "unexpected offsets after table of contents type description");
                                writer.Write((byte)_compression);
                                expectedPosition++;
                                // REVIEW: Right now the number of rows per block is fixed, so we
                                // write the same value each time. In some future state, it may be that this
                                // is relaxed, with possibly some tradeoffs (for example, inability to randomly seek).
                                writer.WriteLeb128Int((ulong)rowsPerBlock);
                                expectedPosition += Utils.Leb128IntLength((uint)rowsPerBlock);
                                // Offset of the lookup table.
                                writer.Write(lookupOffsets[c]);
                                expectedPosition += sizeof(long);
                                // Offset of the metadata table of contents.
                                writer.Write(metadataTocOffsets[c]);
                                expectedPosition += sizeof(long);
                                ch.CheckIO(stream.Position == expectedPosition, "unexpected offsets after table of contents");
                                c++;
                            }
                        }
                        // Write the tail signature.
                        long tailOffset = stream.Position;
                        writer.Write(Header.TailSignatureValue);

                        // Now move back to the beginning of the stream, and write out the now completed header.
                        Header header = new Header()
                        {
                            Signature             = Header.SignatureValue,
                            Version               = Header.WriterVersion,
                            CompatibleVersion     = Header.CanBeReadByVersion,
                            TableOfContentsOffset = tocOffset,
                            TailOffset            = tailOffset,
                            RowCount              = _rowCount,
                            ColumnCount           = activeColumns.Length
                        };
                        byte[] headerBytes = new byte[Header.HeaderSize];
                        unsafe
                        {
                            Marshal.Copy(new IntPtr(&header), headerBytes, 0, Marshal.SizeOf(typeof(Header)));
                        }
                        writer.Seek(0, SeekOrigin.Begin);
                        writer.Write(headerBytes);
                    }
                }
            }
            catch (Exception ex)
            {
                exMarshaller.Set("writing", ex);
            }
        }