private Tuple <ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingRecordBatch(IReadOnlyDictionary <string, Field> fields, IReadOnlyList <IArrowArray> arrays) { Builder.Clear(); // Serialize field nodes int fieldCount = fields.Count; Flatbuf.RecordBatch.StartNodesVector(Builder, CountAllNodes(fields)); // flatbuffer struct vectors have to be created in reverse order for (int i = fieldCount - 1; i >= 0; i--) { CreateSelfAndChildrenFieldNodes(arrays[i].Data); } VectorOffset fieldNodesVectorOffset = Builder.EndVector(); // Serialize buffers var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder(); for (int i = 0; i < fieldCount; i++) { IArrowArray fieldArray = arrays[i]; fieldArray.Accept(recordBatchBuilder); } IReadOnlyList <ArrowRecordBatchFlatBufferBuilder.Buffer> buffers = recordBatchBuilder.Buffers; Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count); // flatbuffer struct vectors have to be created in reverse order for (int i = buffers.Count - 1; i >= 0; i--) { Flatbuf.Buffer.CreateBuffer(Builder, buffers[i].Offset, buffers[i].DataBuffer.Length); } return(Tuple.Create(recordBatchBuilder, fieldNodesVectorOffset)); }
private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBatch, CancellationToken cancellationToken = default) { // TODO: Truncate buffers with extraneous padding / unused capacity if (!HasWrittenSchema) { await WriteSchemaAsync(Schema, cancellationToken).ConfigureAwait(false); HasWrittenSchema = true; } Builder.Clear(); // Serialize field nodes var fieldCount = Schema.Fields.Count; Flatbuf.RecordBatch.StartNodesVector(Builder, fieldCount); // flatbuffer struct vectors have to be created in reverse order for (var i = fieldCount - 1; i >= 0; i--) { var fieldArray = recordBatch.Column(i); Flatbuf.FieldNode.CreateFieldNode(Builder, fieldArray.Length, fieldArray.NullCount); } var fieldNodesVectorOffset = Builder.EndVector(); // Serialize buffers var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder(); for (var i = 0; i < fieldCount; i++) { var fieldArray = recordBatch.Column(i); fieldArray.Accept(recordBatchBuilder); } var buffers = recordBatchBuilder.Buffers; Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count); // flatbuffer struct vectors have to be created in reverse order for (var i = buffers.Count - 1; i >= 0; i--) { Flatbuf.Buffer.CreateBuffer(Builder, buffers[i].Offset, buffers[i].DataBuffer.Length); } var buffersVectorOffset = Builder.EndVector(); // Serialize record batch StartingWritingRecordBatch(); var recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length, fieldNodesVectorOffset, buffersVectorOffset); long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch, recordBatchOffset, recordBatchBuilder.TotalLength, cancellationToken).ConfigureAwait(false); // Write buffer data long bodyLength = 0; for (var i = 0; i < buffers.Count; i++) { ArrowBuffer buffer = buffers[i].DataBuffer; if (buffer.IsEmpty) { continue; } await WriteBufferAsync(buffer, cancellationToken).ConfigureAwait(false); int paddedLength = checked ((int)BitUtility.RoundUpToMultipleOf8(buffer.Length)); int padding = paddedLength - buffer.Length; if (padding > 0) { await WritePaddingAsync(padding).ConfigureAwait(false); } bodyLength += paddedLength; } // Write padding so the record batch message body length is a multiple of 8 bytes int bodyPaddingLength = CalculatePadding(bodyLength); await WritePaddingAsync(bodyPaddingLength).ConfigureAwait(false); FinishedWritingRecordBatch(bodyLength + bodyPaddingLength, metadataLength); }
protected virtual async Task <Block> WriteRecordBatchInternalAsync(RecordBatch recordBatch, CancellationToken cancellationToken = default) { // TODO: Truncate buffers with extraneous padding / unused capacity if (!HasWrittenSchema) { await WriteSchemaAsync(Schema, cancellationToken).ConfigureAwait(false); HasWrittenSchema = true; } var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder(); Builder.Clear(); // Serialize field nodes var fieldCount = Schema.Fields.Count; var fieldNodeOffsets = new Offset <Flatbuf.FieldNode> [fieldCount]; Flatbuf.RecordBatch.StartNodesVector(Builder, fieldCount); for (var i = 0; i < fieldCount; i++) { var fieldArray = recordBatch.Column(i); fieldNodeOffsets[i] = Flatbuf.FieldNode.CreateFieldNode(Builder, fieldArray.Length, fieldArray.NullCount); } var fieldNodesVectorOffset = Builder.EndVector(); // Serialize buffers for (var i = 0; i < fieldCount; i++) { var fieldArray = recordBatch.Column(i); fieldArray.Accept(recordBatchBuilder); } var buffers = recordBatchBuilder.Buffers; var bufferOffsets = new Offset <Flatbuf.Buffer> [buffers.Count]; Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count); for (var i = buffers.Count - 1; i >= 0; i--) { bufferOffsets[i] = Flatbuf.Buffer.CreateBuffer(Builder, buffers[i].Offset, buffers[i].Length); } var buffersVectorOffset = Builder.EndVector(); // Serialize record batch var recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length, fieldNodesVectorOffset, buffersVectorOffset); var metadataOffset = BaseStream.Position; await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch, recordBatchOffset, recordBatchBuilder.TotalLength, cancellationToken).ConfigureAwait(false); var metadataLength = BaseStream.Position - metadataOffset; // Write buffer data var lengthOffset = BaseStream.Position; for (var i = 0; i < buffers.Count; i++) { if (buffers[i].DataBuffer.IsEmpty) { continue; } await WriteBufferAsync(buffers[i].DataBuffer, cancellationToken).ConfigureAwait(false); } // Write padding so the record batch message body length is a multiple of 8 bytes var bodyLength = Convert.ToInt32(BaseStream.Position - lengthOffset); var bodyPaddingLength = CalculatePadding(bodyLength); await WritePaddingAsync(bodyPaddingLength).ConfigureAwait(false); return(new Block( offset: Convert.ToInt32(metadataOffset), length: bodyLength + bodyPaddingLength, metadataLength: Convert.ToInt32(metadataLength))); }