コード例 #1
0
        private Tuple <ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingRecordBatch(IReadOnlyDictionary <string, Field> fields, IReadOnlyList <IArrowArray> arrays)
        {
            Builder.Clear();

            // Serialize field nodes

            int fieldCount = fields.Count;

            Flatbuf.RecordBatch.StartNodesVector(Builder, CountAllNodes(fields));

            // flatbuffer struct vectors have to be created in reverse order
            for (int i = fieldCount - 1; i >= 0; i--)
            {
                CreateSelfAndChildrenFieldNodes(arrays[i].Data);
            }

            VectorOffset fieldNodesVectorOffset = Builder.EndVector();

            // Serialize buffers

            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();

            for (int i = 0; i < fieldCount; i++)
            {
                IArrowArray fieldArray = arrays[i];
                fieldArray.Accept(recordBatchBuilder);
            }

            IReadOnlyList <ArrowRecordBatchFlatBufferBuilder.Buffer> buffers = recordBatchBuilder.Buffers;

            Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);

            // flatbuffer struct vectors have to be created in reverse order
            for (int i = buffers.Count - 1; i >= 0; i--)
            {
                Flatbuf.Buffer.CreateBuffer(Builder,
                                            buffers[i].Offset, buffers[i].DataBuffer.Length);
            }

            return(Tuple.Create(recordBatchBuilder, fieldNodesVectorOffset));
        }
コード例 #2
0
ファイル: ArrowStreamWriter.cs プロジェクト: eric-erki/Arrow
        private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBatch,
                                                                   CancellationToken cancellationToken = default)
        {
            // TODO: Truncate buffers with extraneous padding / unused capacity

            if (!HasWrittenSchema)
            {
                await WriteSchemaAsync(Schema, cancellationToken).ConfigureAwait(false);

                HasWrittenSchema = true;
            }

            Builder.Clear();

            // Serialize field nodes

            var fieldCount = Schema.Fields.Count;

            Flatbuf.RecordBatch.StartNodesVector(Builder, fieldCount);

            // flatbuffer struct vectors have to be created in reverse order
            for (var i = fieldCount - 1; i >= 0; i--)
            {
                var fieldArray = recordBatch.Column(i);
                Flatbuf.FieldNode.CreateFieldNode(Builder, fieldArray.Length, fieldArray.NullCount);
            }

            var fieldNodesVectorOffset = Builder.EndVector();

            // Serialize buffers

            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();

            for (var i = 0; i < fieldCount; i++)
            {
                var fieldArray = recordBatch.Column(i);
                fieldArray.Accept(recordBatchBuilder);
            }

            var buffers = recordBatchBuilder.Buffers;

            Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);

            // flatbuffer struct vectors have to be created in reverse order
            for (var i = buffers.Count - 1; i >= 0; i--)
            {
                Flatbuf.Buffer.CreateBuffer(Builder,
                                            buffers[i].Offset, buffers[i].DataBuffer.Length);
            }

            var buffersVectorOffset = Builder.EndVector();

            // Serialize record batch

            StartingWritingRecordBatch();

            var recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                                                                          fieldNodesVectorOffset,
                                                                          buffersVectorOffset);

            long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch,
                                                          recordBatchOffset, recordBatchBuilder.TotalLength,
                                                          cancellationToken).ConfigureAwait(false);

            // Write buffer data

            long bodyLength = 0;

            for (var i = 0; i < buffers.Count; i++)
            {
                ArrowBuffer buffer = buffers[i].DataBuffer;
                if (buffer.IsEmpty)
                {
                    continue;
                }

                await WriteBufferAsync(buffer, cancellationToken).ConfigureAwait(false);

                int paddedLength = checked ((int)BitUtility.RoundUpToMultipleOf8(buffer.Length));
                int padding      = paddedLength - buffer.Length;
                if (padding > 0)
                {
                    await WritePaddingAsync(padding).ConfigureAwait(false);
                }

                bodyLength += paddedLength;
            }

            // Write padding so the record batch message body length is a multiple of 8 bytes

            int bodyPaddingLength = CalculatePadding(bodyLength);

            await WritePaddingAsync(bodyPaddingLength).ConfigureAwait(false);

            FinishedWritingRecordBatch(bodyLength + bodyPaddingLength, metadataLength);
        }
コード例 #3
0
        protected virtual async Task <Block> WriteRecordBatchInternalAsync(RecordBatch recordBatch,
                                                                           CancellationToken cancellationToken = default)
        {
            // TODO: Truncate buffers with extraneous padding / unused capacity

            if (!HasWrittenSchema)
            {
                await WriteSchemaAsync(Schema, cancellationToken).ConfigureAwait(false);

                HasWrittenSchema = true;
            }

            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();

            Builder.Clear();

            // Serialize field nodes

            var fieldCount       = Schema.Fields.Count;
            var fieldNodeOffsets = new Offset <Flatbuf.FieldNode> [fieldCount];

            Flatbuf.RecordBatch.StartNodesVector(Builder, fieldCount);

            for (var i = 0; i < fieldCount; i++)
            {
                var fieldArray = recordBatch.Column(i);
                fieldNodeOffsets[i] =
                    Flatbuf.FieldNode.CreateFieldNode(Builder, fieldArray.Length, fieldArray.NullCount);
            }

            var fieldNodesVectorOffset = Builder.EndVector();

            // Serialize buffers

            for (var i = 0; i < fieldCount; i++)
            {
                var fieldArray = recordBatch.Column(i);
                fieldArray.Accept(recordBatchBuilder);
            }

            var buffers       = recordBatchBuilder.Buffers;
            var bufferOffsets = new Offset <Flatbuf.Buffer> [buffers.Count];

            Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);

            for (var i = buffers.Count - 1; i >= 0; i--)
            {
                bufferOffsets[i] = Flatbuf.Buffer.CreateBuffer(Builder,
                                                               buffers[i].Offset, buffers[i].Length);
            }

            var buffersVectorOffset = Builder.EndVector();

            // Serialize record batch

            var recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                                                                          fieldNodesVectorOffset,
                                                                          buffersVectorOffset);

            var metadataOffset = BaseStream.Position;

            await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch,
                                    recordBatchOffset, recordBatchBuilder.TotalLength,
                                    cancellationToken).ConfigureAwait(false);

            var metadataLength = BaseStream.Position - metadataOffset;

            // Write buffer data

            var lengthOffset = BaseStream.Position;

            for (var i = 0; i < buffers.Count; i++)
            {
                if (buffers[i].DataBuffer.IsEmpty)
                {
                    continue;
                }


                await WriteBufferAsync(buffers[i].DataBuffer, cancellationToken).ConfigureAwait(false);
            }

            // Write padding so the record batch message body length is a multiple of 8 bytes

            var bodyLength        = Convert.ToInt32(BaseStream.Position - lengthOffset);
            var bodyPaddingLength = CalculatePadding(bodyLength);

            await WritePaddingAsync(bodyPaddingLength).ConfigureAwait(false);

            return(new Block(
                       offset: Convert.ToInt32(metadataOffset),
                       length: bodyLength + bodyPaddingLength,
                       metadataLength: Convert.ToInt32(metadataLength)));
        }