/// <summary> /// Returns an enumerable of immutable ReadOnlyMemory<int> representing offsets into its corresponding Data buffer. /// The Apache Arrow format specifies how the offset buffer encodes the length of each value in the Data buffer /// </summary> /// <returns>IEnumerable<ReadOnlyMemory<int>></returns> public IEnumerable <ReadOnlyMemory <int> > GetReadOnlyOffsetsBuffers() { for (int i = 0; i < _offsetsBuffers.Count; i++) { ReadOnlyDataFrameBuffer <int> buffer = _offsetsBuffers[i]; yield return(buffer.ReadOnlyMemory); } }
/// <summary> /// Returns an enumerable of immutable ReadOnlyMemory<byte> buffers representing null values in the Apache Arrow format /// </summary> /// <remarks>Each ReadOnlyMemory<byte> encodes the indices of null values in its corresponding Data buffer</remarks> /// <returns>IEnumerable<ReadOnlyMemory<byte>></returns> public IEnumerable <ReadOnlyMemory <byte> > GetReadOnlyNullBitMapBuffers() { for (int i = 0; i < _nullBitMapBuffers.Count; i++) { ReadOnlyDataFrameBuffer <byte> buffer = _nullBitMapBuffers[i]; yield return(buffer.RawReadOnlyMemory); } }
/// <summary> /// Returns an enumerable of immutable memory buffers representing the underlying values /// </summary> /// <remarks>Null values are encoded in the buffers returned by GetReadOnlyNullBitmapBuffers in the Apache Arrow format</remarks> /// <returns>IEnumerable<ReadOnlyMemory<typeparamref name="T"/>></returns> public IEnumerable <ReadOnlyMemory <T> > GetReadOnlyDataBuffers() { for (int i = 0; i < _columnContainer.Buffers.Count; i++) { ReadOnlyDataFrameBuffer <T> buffer = _columnContainer.Buffers[i]; yield return(buffer.ReadOnlyMemory); } }
private void GetSortIndices(IComparer <T> comparer, out PrimitiveColumn <long> columnSortIndices) { List <List <int> > bufferSortIndices = new List <List <int> >(_columnContainer.Buffers.Count); // Sort each buffer first for (int b = 0; b < _columnContainer.Buffers.Count; b++) { ReadOnlyDataFrameBuffer <T> buffer = _columnContainer.Buffers[b]; int[] sortIndices = new int[buffer.Length]; for (int i = 0; i < buffer.Length; i++) { sortIndices[i] = i; } IntrospectiveSort(buffer.ReadOnlySpan, buffer.Length, sortIndices, comparer); // Bug fix: QuickSort is not stable. When PrimitiveColumn has null values and default values, they move around List <int> nonNullSortIndices = new List <int>(); for (int i = 0; i < sortIndices.Length; i++) { if (IsValid(sortIndices[i] + b * ReadOnlyDataFrameBuffer <T> .MaxCapacity)) { nonNullSortIndices.Add(sortIndices[i]); } } bufferSortIndices.Add(nonNullSortIndices); } // Simple merge sort to build the full column's sort indices ValueTuple <T, int> GetFirstNonNullValueAndBufferIndexStartingAtIndex(int bufferIndex, int startIndex) { T value = _columnContainer.Buffers[bufferIndex][bufferSortIndices[bufferIndex][startIndex]]; long rowIndex = bufferSortIndices[bufferIndex][startIndex] + bufferIndex * ReadOnlyDataFrameBuffer <T> .MaxCapacity; return(value, startIndex); } SortedDictionary <T, List <ValueTuple <int, int> > > heapOfValueAndListOfTupleOfSortAndBufferIndex = new SortedDictionary <T, List <ValueTuple <int, int> > >(comparer); IList <ReadOnlyDataFrameBuffer <T> > buffers = _columnContainer.Buffers; for (int i = 0; i < buffers.Count; i++) { ReadOnlyDataFrameBuffer <T> buffer = buffers[i]; if (bufferSortIndices[i].Count == 0) { // All nulls continue; } ValueTuple <T, int> valueAndBufferIndex = GetFirstNonNullValueAndBufferIndexStartingAtIndex(i, 0); if (heapOfValueAndListOfTupleOfSortAndBufferIndex.ContainsKey(valueAndBufferIndex.Item1)) { heapOfValueAndListOfTupleOfSortAndBufferIndex[valueAndBufferIndex.Item1].Add((valueAndBufferIndex.Item2, i)); } else { heapOfValueAndListOfTupleOfSortAndBufferIndex.Add(valueAndBufferIndex.Item1, new List <ValueTuple <int, int> >() { (valueAndBufferIndex.Item2, i) });
public bool GetValidityBit(long index) { if ((ulong)index > (ulong)Length) { throw new ArgumentOutOfRangeException(nameof(index)); } // First find the right bitMapBuffer int bitMapIndex = GetBufferIndexContainingRowIndex(index, out int indexInBuffer); Debug.Assert(_nullBitMapBuffers.Count > bitMapIndex); ReadOnlyDataFrameBuffer <byte> bitMapBuffer = _nullBitMapBuffers[bitMapIndex]; int bitMapBufferIndex = (int)((uint)index / 8); Debug.Assert(bitMapBuffer.Length > bitMapBufferIndex); byte curBitMap = bitMapBuffer[bitMapBufferIndex]; return(((curBitMap >> (indexInBuffer & 7)) & 1) != 0); }
public ArrowStringColumn(string name, ReadOnlyMemory <byte> values, ReadOnlyMemory <byte> offsets, ReadOnlyMemory <byte> nullBits, int length, int nullCount) : base(name, length, typeof(string)) { ReadOnlyDataFrameBuffer <byte> dataBuffer = new ReadOnlyDataFrameBuffer <byte>(values, values.Length); ReadOnlyDataFrameBuffer <int> offsetBuffer = new ReadOnlyDataFrameBuffer <int>(offsets, length + 1); ReadOnlyDataFrameBuffer <byte> nullBitMapBuffer = new ReadOnlyDataFrameBuffer <byte>(nullBits, nullBits.Length); if (length + 1 != offsetBuffer.Length) { throw new ArgumentException(nameof(offsetBuffer)); } _dataBuffers = new List <ReadOnlyDataFrameBuffer <byte> >(); _offsetsBuffers = new List <ReadOnlyDataFrameBuffer <int> >(); _nullBitMapBuffers = new List <ReadOnlyDataFrameBuffer <byte> >(); _dataBuffers.Add(dataBuffer); _offsetsBuffers.Add(offsetBuffer); _nullBitMapBuffers.Add(nullBitMapBuffer); _nullCount = nullCount; }
private void GetSortIndices(IComparer <T> comparer, out PrimitiveColumn <long> columnSortIndices) { List <List <int> > bufferSortIndices = new List <List <int> >(_columnContainer.Buffers.Count); // Sort each buffer first for (int b = 0; b < _columnContainer.Buffers.Count; b++) { ReadOnlyDataFrameBuffer <T> buffer = _columnContainer.Buffers[b]; ReadOnlySpan <byte> nullBitMapSpan = _columnContainer.NullBitMapBuffers[b].ReadOnlySpan; int[] sortIndices = new int[buffer.Length]; for (int i = 0; i < buffer.Length; i++) { sortIndices[i] = i; } IntrospectiveSort(buffer.ReadOnlySpan, buffer.Length, sortIndices, comparer); // Bug fix: QuickSort is not stable. When PrimitiveColumn has null values and default values, they move around List <int> nonNullSortIndices = new List <int>(); for (int i = 0; i < sortIndices.Length; i++) { if (_columnContainer.IsValid(nullBitMapSpan, sortIndices[i])) { nonNullSortIndices.Add(sortIndices[i]); } } bufferSortIndices.Add(nonNullSortIndices); } // Simple merge sort to build the full column's sort indices ValueTuple <T, int> GetFirstNonNullValueAndBufferIndexStartingAtIndex(int bufferIndex, int startIndex) { int index = bufferSortIndices[bufferIndex][startIndex]; T value; ReadOnlyMemory <byte> buffer = _columnContainer.Buffers[bufferIndex].ReadOnlyBuffer; ReadOnlyMemory <T> typedBuffer = Unsafe.As <ReadOnlyMemory <byte>, ReadOnlyMemory <T> >(ref buffer); if (!typedBuffer.IsEmpty) { bool isArray = MemoryMarshal.TryGetArray(typedBuffer, out ArraySegment <T> arraySegment); if (isArray) { value = arraySegment.Array[index + arraySegment.Offset]; } else { value = _columnContainer.Buffers[bufferIndex][index]; } } else { value = _columnContainer.Buffers[bufferIndex][index]; } return(value, startIndex); } SortedDictionary <T, List <ValueTuple <int, int> > > heapOfValueAndListOfTupleOfSortAndBufferIndex = new SortedDictionary <T, List <ValueTuple <int, int> > >(comparer); IList <ReadOnlyDataFrameBuffer <T> > buffers = _columnContainer.Buffers; for (int i = 0; i < buffers.Count; i++) { ReadOnlyDataFrameBuffer <T> buffer = buffers[i]; if (bufferSortIndices[i].Count == 0) { // All nulls continue; } ValueTuple <T, int> valueAndBufferIndex = GetFirstNonNullValueAndBufferIndexStartingAtIndex(i, 0); if (heapOfValueAndListOfTupleOfSortAndBufferIndex.ContainsKey(valueAndBufferIndex.Item1)) { heapOfValueAndListOfTupleOfSortAndBufferIndex[valueAndBufferIndex.Item1].Add((valueAndBufferIndex.Item2, i)); } else { heapOfValueAndListOfTupleOfSortAndBufferIndex.Add(valueAndBufferIndex.Item1, new List <ValueTuple <int, int> >() { (valueAndBufferIndex.Item2, i) });