/// <summary> /// Returns an enumerable of immutable ReadOnlyMemory<byte> buffers representing null values in the Apache Arrow format /// </summary> /// <remarks>Each ReadOnlyMemory<byte> encodes the null values for its corresponding Data buffer</remarks> /// <returns>IEnumerable<ReadOnlyMemory<byte>></returns> public IEnumerable <ReadOnlyMemory <byte> > GetReadOnlyNullBitMapBuffers() { for (int i = 0; i < _columnContainer.NullBitMapBuffers.Count; i++) { ReadOnlyDataFrameBuffer <byte> buffer = _columnContainer.NullBitMapBuffers[i]; yield return(buffer.RawReadOnlyMemory); } }
/// <summary> /// Returns an enumeration of immutable <see cref="ReadOnlyMemory{Int32}"/> representing offsets into its corresponding Data buffer. /// The Apache Arrow format specifies how the offset buffer encodes the length of each value in the Data buffer /// </summary> /// <returns>An enumeration of <see cref="ReadOnlyMemory{Int32}"/> objects.</returns> public IEnumerable <ReadOnlyMemory <int> > GetReadOnlyOffsetsBuffers() { for (int i = 0; i < _offsetsBuffers.Count; i++) { ReadOnlyDataFrameBuffer <int> buffer = _offsetsBuffers[i]; yield return(buffer.ReadOnlyMemory); } }
/// <summary> /// Returns an enumerable of immutable memory buffers representing the underlying values /// </summary> /// <remarks>Null values are encoded in the buffers returned by GetReadOnlyNullBitmapBuffers in the Apache Arrow format</remarks> /// <returns>IEnumerable<ReadOnlyMemory<typeparamref name="T"/>></returns> public IEnumerable <ReadOnlyMemory <T> > GetReadOnlyDataBuffers() { for (int i = 0; i < _columnContainer.Buffers.Count; i++) { ReadOnlyDataFrameBuffer <T> buffer = _columnContainer.Buffers[i]; yield return(buffer.ReadOnlyMemory); } }
/// <summary> /// Returns an enumeration of immutable buffers representing the underlying values in the Apache Arrow format /// </summary> /// <remarks><see langword="null" /> values are encoded in the buffers returned by GetReadOnlyNullBitmapBuffers in the Apache Arrow format</remarks> /// <remarks>The offsets buffers returned by GetReadOnlyOffsetBuffers can be used to delineate each value</remarks> /// <returns>An enumeration of <see cref="ReadOnlyMemory{Byte}"/> whose elements are the raw data buffers for the UTF8 string values.</returns> public IEnumerable <ReadOnlyMemory <byte> > GetReadOnlyDataBuffers() { for (int i = 0; i < _dataBuffers.Count; i++) { ReadOnlyDataFrameBuffer <byte> buffer = _dataBuffers[i]; yield return(buffer.RawReadOnlyMemory); } }
public PrimitiveColumnContainer(ReadOnlyMemory <byte> buffer, ReadOnlyMemory <byte> nullBitMap, int length, int nullCount) { ReadOnlyDataFrameBuffer <T> dataBuffer; if (buffer.IsEmpty) { DataFrameBuffer <T> mutableBuffer = new DataFrameBuffer <T>(); mutableBuffer.EnsureCapacity(length); mutableBuffer.Length = length; mutableBuffer.RawSpan.Fill(default(T)); dataBuffer = mutableBuffer; } else { dataBuffer = new ReadOnlyDataFrameBuffer <T>(buffer, length); } Buffers.Add(dataBuffer); int bitMapBufferLength = (length + 7) / 8; ReadOnlyDataFrameBuffer <byte> nullDataFrameBuffer; if (nullBitMap.IsEmpty) { if (nullCount != 0) { throw new ArgumentNullException(Strings.InconsistentNullBitMapAndNullCount, nameof(nullBitMap)); } if (!buffer.IsEmpty) { // Create a new bitMap with all the bits up to length set var bitMap = new byte[bitMapBufferLength]; bitMap.AsSpan().Fill(255); int lastByte = 1 << (length - (bitMapBufferLength - 1) * 8); bitMap[bitMapBufferLength - 1] = (byte)(lastByte - 1); nullDataFrameBuffer = new DataFrameBuffer <byte>(bitMap, bitMapBufferLength); } else { nullDataFrameBuffer = new DataFrameBuffer <byte>(); } } else { if (nullBitMap.Length < bitMapBufferLength) { throw new ArgumentException(Strings.InconsistentNullBitMapAndLength, nameof(nullBitMap)); } nullDataFrameBuffer = new ReadOnlyDataFrameBuffer <byte>(nullBitMap, bitMapBufferLength); } NullBitMapBuffers.Add(nullDataFrameBuffer); Length = length; NullCount = nullCount; }
private bool GetValidityBit(long index) { if ((ulong)index > (ulong)Length) { throw new ArgumentOutOfRangeException(nameof(index)); } // First find the right bitMapBuffer int bitMapIndex = GetBufferIndexContainingRowIndex(index, out int indexInBuffer); Debug.Assert(_nullBitMapBuffers.Count > bitMapIndex); ReadOnlyDataFrameBuffer <byte> bitMapBuffer = _nullBitMapBuffers[bitMapIndex]; int bitMapBufferIndex = (int)((uint)index / 8); Debug.Assert(bitMapBuffer.Length > bitMapBufferIndex); byte curBitMap = bitMapBuffer[bitMapBufferIndex]; return(((curBitMap >> (indexInBuffer & 7)) & 1) != 0); }
/// <summary> /// Constructs an <see cref="ArrowStringDataFrameColumn"/> with the given <paramref name="name"/>, <paramref name="length"/> and <paramref name="nullCount"/>. The <paramref name="values"/>, <paramref name="offsets"/> and <paramref name="nullBits"/> are the contents of the column in the Arrow format. /// </summary> /// <param name="name">The name of the column.</param> /// <param name="values">The Arrow formatted string values in this column.</param> /// <param name="offsets">The Arrow formatted offets in this column.</param> /// <param name="nullBits">The Arrow formatted null bits in this column.</param> /// <param name="length">The length of the column.</param> /// <param name="nullCount">The number of <see langword="null" /> values in this column.</param> public ArrowStringDataFrameColumn(string name, ReadOnlyMemory <byte> values, ReadOnlyMemory <byte> offsets, ReadOnlyMemory <byte> nullBits, int length, int nullCount) : base(name, length, typeof(string)) { ReadOnlyDataFrameBuffer <byte> dataBuffer = new ReadOnlyDataFrameBuffer <byte>(values, values.Length); ReadOnlyDataFrameBuffer <int> offsetBuffer = new ReadOnlyDataFrameBuffer <int>(offsets, length + 1); ReadOnlyDataFrameBuffer <byte> nullBitMapBuffer = new ReadOnlyDataFrameBuffer <byte>(nullBits, nullBits.Length); if (length + 1 != offsetBuffer.Length) { throw new ArgumentException(nameof(offsetBuffer)); } _dataBuffers = new List <ReadOnlyDataFrameBuffer <byte> >(); _offsetsBuffers = new List <ReadOnlyDataFrameBuffer <int> >(); _nullBitMapBuffers = new List <ReadOnlyDataFrameBuffer <byte> >(); _dataBuffers.Add(dataBuffer); _offsetsBuffers.Add(offsetBuffer); _nullBitMapBuffers.Add(nullBitMapBuffer); _nullCount = nullCount; }
private void GetSortIndices(IComparer <T> comparer, out PrimitiveDataFrameColumn <long> columnSortIndices) { List <List <int> > bufferSortIndices = new List <List <int> >(_columnContainer.Buffers.Count); // Sort each buffer first for (int b = 0; b < _columnContainer.Buffers.Count; b++) { ReadOnlyDataFrameBuffer <T> buffer = _columnContainer.Buffers[b]; ReadOnlySpan <byte> nullBitMapSpan = _columnContainer.NullBitMapBuffers[b].ReadOnlySpan; int[] sortIndices = new int[buffer.Length]; for (int i = 0; i < buffer.Length; i++) { sortIndices[i] = i; } IntrospectiveSort(buffer.ReadOnlySpan, buffer.Length, sortIndices, comparer); // Bug fix: QuickSort is not stable. When PrimitiveDataFrameColumn has null values and default values, they move around List <int> nonNullSortIndices = new List <int>(); for (int i = 0; i < sortIndices.Length; i++) { if (_columnContainer.IsValid(nullBitMapSpan, sortIndices[i])) { nonNullSortIndices.Add(sortIndices[i]); } } bufferSortIndices.Add(nonNullSortIndices); } // Simple merge sort to build the full column's sort indices ValueTuple <T, int> GetFirstNonNullValueAndBufferIndexStartingAtIndex(int bufferIndex, int startIndex) { int index = bufferSortIndices[bufferIndex][startIndex]; T value; ReadOnlyMemory <byte> buffer = _columnContainer.Buffers[bufferIndex].ReadOnlyBuffer; ReadOnlyMemory <T> typedBuffer = Unsafe.As <ReadOnlyMemory <byte>, ReadOnlyMemory <T> >(ref buffer); if (!typedBuffer.IsEmpty) { bool isArray = MemoryMarshal.TryGetArray(typedBuffer, out ArraySegment <T> arraySegment); if (isArray) { value = arraySegment.Array[index + arraySegment.Offset]; } else { value = _columnContainer.Buffers[bufferIndex][index]; } } else { value = _columnContainer.Buffers[bufferIndex][index]; } return(value, startIndex); } SortedDictionary <T, List <ValueTuple <int, int> > > heapOfValueAndListOfTupleOfSortAndBufferIndex = new SortedDictionary <T, List <ValueTuple <int, int> > >(comparer); IList <ReadOnlyDataFrameBuffer <T> > buffers = _columnContainer.Buffers; for (int i = 0; i < buffers.Count; i++) { ReadOnlyDataFrameBuffer <T> buffer = buffers[i]; if (bufferSortIndices[i].Count == 0) { // All nulls continue; } ValueTuple <T, int> valueAndBufferIndex = GetFirstNonNullValueAndBufferIndexStartingAtIndex(i, 0); if (heapOfValueAndListOfTupleOfSortAndBufferIndex.ContainsKey(valueAndBufferIndex.Item1)) { heapOfValueAndListOfTupleOfSortAndBufferIndex[valueAndBufferIndex.Item1].Add((valueAndBufferIndex.Item2, i)); } else { heapOfValueAndListOfTupleOfSortAndBufferIndex.Add(valueAndBufferIndex.Item1, new List <ValueTuple <int, int> >() { (valueAndBufferIndex.Item2, i) });