/// <summary>
 /// Returns an enumerable of immutable ReadOnlyMemory<byte> buffers representing null values in the Apache Arrow format
 /// </summary>
 /// <remarks>Each ReadOnlyMemory<byte> encodes the null values for its corresponding Data buffer</remarks>
 /// <returns>IEnumerable<ReadOnlyMemory<byte>></returns>
 public IEnumerable <ReadOnlyMemory <byte> > GetReadOnlyNullBitMapBuffers()
 {
     for (int i = 0; i < _columnContainer.NullBitMapBuffers.Count; i++)
     {
         ReadOnlyDataFrameBuffer <byte> buffer = _columnContainer.NullBitMapBuffers[i];
         yield return(buffer.RawReadOnlyMemory);
     }
 }
예제 #2
0
 /// <summary>
 /// Returns an enumeration of immutable <see cref="ReadOnlyMemory{Int32}"/> representing offsets into its corresponding Data buffer.
 /// The Apache Arrow format specifies how the offset buffer encodes the length of each value in the Data buffer
 /// </summary>
 /// <returns>An enumeration of <see cref="ReadOnlyMemory{Int32}"/> objects.</returns>
 public IEnumerable <ReadOnlyMemory <int> > GetReadOnlyOffsetsBuffers()
 {
     for (int i = 0; i < _offsetsBuffers.Count; i++)
     {
         ReadOnlyDataFrameBuffer <int> buffer = _offsetsBuffers[i];
         yield return(buffer.ReadOnlyMemory);
     }
 }
 /// <summary>
 /// Returns an enumerable of immutable memory buffers representing the underlying values
 /// </summary>
 /// <remarks>Null values are encoded in the buffers returned by GetReadOnlyNullBitmapBuffers in the Apache Arrow format</remarks>
 /// <returns>IEnumerable<ReadOnlyMemory<typeparamref name="T"/>></returns>
 public IEnumerable <ReadOnlyMemory <T> > GetReadOnlyDataBuffers()
 {
     for (int i = 0; i < _columnContainer.Buffers.Count; i++)
     {
         ReadOnlyDataFrameBuffer <T> buffer = _columnContainer.Buffers[i];
         yield return(buffer.ReadOnlyMemory);
     }
 }
예제 #4
0
 /// <summary>
 /// Returns an enumeration of immutable buffers representing the underlying values in the Apache Arrow format
 /// </summary>
 /// <remarks><see langword="null" /> values are encoded in the buffers returned by GetReadOnlyNullBitmapBuffers in the Apache Arrow format</remarks>
 /// <remarks>The offsets buffers returned by GetReadOnlyOffsetBuffers can be used to delineate each value</remarks>
 /// <returns>An enumeration of <see cref="ReadOnlyMemory{Byte}"/> whose elements are the raw data buffers for the UTF8 string values.</returns>
 public IEnumerable <ReadOnlyMemory <byte> > GetReadOnlyDataBuffers()
 {
     for (int i = 0; i < _dataBuffers.Count; i++)
     {
         ReadOnlyDataFrameBuffer <byte> buffer = _dataBuffers[i];
         yield return(buffer.RawReadOnlyMemory);
     }
 }
        public PrimitiveColumnContainer(ReadOnlyMemory <byte> buffer, ReadOnlyMemory <byte> nullBitMap, int length, int nullCount)
        {
            ReadOnlyDataFrameBuffer <T> dataBuffer;

            if (buffer.IsEmpty)
            {
                DataFrameBuffer <T> mutableBuffer = new DataFrameBuffer <T>();
                mutableBuffer.EnsureCapacity(length);
                mutableBuffer.Length = length;
                mutableBuffer.RawSpan.Fill(default(T));
                dataBuffer = mutableBuffer;
            }
            else
            {
                dataBuffer = new ReadOnlyDataFrameBuffer <T>(buffer, length);
            }
            Buffers.Add(dataBuffer);
            int bitMapBufferLength = (length + 7) / 8;
            ReadOnlyDataFrameBuffer <byte> nullDataFrameBuffer;

            if (nullBitMap.IsEmpty)
            {
                if (nullCount != 0)
                {
                    throw new ArgumentNullException(Strings.InconsistentNullBitMapAndNullCount, nameof(nullBitMap));
                }
                if (!buffer.IsEmpty)
                {
                    // Create a new bitMap with all the bits up to length set
                    var bitMap = new byte[bitMapBufferLength];
                    bitMap.AsSpan().Fill(255);
                    int lastByte = 1 << (length - (bitMapBufferLength - 1) * 8);
                    bitMap[bitMapBufferLength - 1] = (byte)(lastByte - 1);
                    nullDataFrameBuffer            = new DataFrameBuffer <byte>(bitMap, bitMapBufferLength);
                }
                else
                {
                    nullDataFrameBuffer = new DataFrameBuffer <byte>();
                }
            }
            else
            {
                if (nullBitMap.Length < bitMapBufferLength)
                {
                    throw new ArgumentException(Strings.InconsistentNullBitMapAndLength, nameof(nullBitMap));
                }
                nullDataFrameBuffer = new ReadOnlyDataFrameBuffer <byte>(nullBitMap, bitMapBufferLength);
            }
            NullBitMapBuffers.Add(nullDataFrameBuffer);
            Length    = length;
            NullCount = nullCount;
        }
예제 #6
0
        private bool GetValidityBit(long index)
        {
            if ((ulong)index > (ulong)Length)
            {
                throw new ArgumentOutOfRangeException(nameof(index));
            }
            // First find the right bitMapBuffer
            int bitMapIndex = GetBufferIndexContainingRowIndex(index, out int indexInBuffer);

            Debug.Assert(_nullBitMapBuffers.Count > bitMapIndex);
            ReadOnlyDataFrameBuffer <byte> bitMapBuffer = _nullBitMapBuffers[bitMapIndex];
            int bitMapBufferIndex = (int)((uint)index / 8);

            Debug.Assert(bitMapBuffer.Length > bitMapBufferIndex);
            byte curBitMap = bitMapBuffer[bitMapBufferIndex];

            return(((curBitMap >> (indexInBuffer & 7)) & 1) != 0);
        }
예제 #7
0
        /// <summary>
        /// Constructs an <see cref="ArrowStringDataFrameColumn"/> with the given <paramref name="name"/>, <paramref name="length"/> and <paramref name="nullCount"/>. The <paramref name="values"/>, <paramref name="offsets"/> and <paramref name="nullBits"/> are the contents of the column in the Arrow format.
        /// </summary>
        /// <param name="name">The name of the column.</param>
        /// <param name="values">The Arrow formatted string values in this column.</param>
        /// <param name="offsets">The Arrow formatted offets in this column.</param>
        /// <param name="nullBits">The Arrow formatted null bits in this column.</param>
        /// <param name="length">The length of the column.</param>
        /// <param name="nullCount">The number of <see langword="null" /> values in this column.</param>
        public ArrowStringDataFrameColumn(string name, ReadOnlyMemory <byte> values, ReadOnlyMemory <byte> offsets, ReadOnlyMemory <byte> nullBits, int length, int nullCount) : base(name, length, typeof(string))
        {
            ReadOnlyDataFrameBuffer <byte> dataBuffer       = new ReadOnlyDataFrameBuffer <byte>(values, values.Length);
            ReadOnlyDataFrameBuffer <int>  offsetBuffer     = new ReadOnlyDataFrameBuffer <int>(offsets, length + 1);
            ReadOnlyDataFrameBuffer <byte> nullBitMapBuffer = new ReadOnlyDataFrameBuffer <byte>(nullBits, nullBits.Length);

            if (length + 1 != offsetBuffer.Length)
            {
                throw new ArgumentException(nameof(offsetBuffer));
            }

            _dataBuffers       = new List <ReadOnlyDataFrameBuffer <byte> >();
            _offsetsBuffers    = new List <ReadOnlyDataFrameBuffer <int> >();
            _nullBitMapBuffers = new List <ReadOnlyDataFrameBuffer <byte> >();

            _dataBuffers.Add(dataBuffer);
            _offsetsBuffers.Add(offsetBuffer);
            _nullBitMapBuffers.Add(nullBitMapBuffer);

            _nullCount = nullCount;
        }
예제 #8
0
        private void GetSortIndices(IComparer <T> comparer, out PrimitiveDataFrameColumn <long> columnSortIndices)
        {
            List <List <int> > bufferSortIndices = new List <List <int> >(_columnContainer.Buffers.Count);

            // Sort each buffer first
            for (int b = 0; b < _columnContainer.Buffers.Count; b++)
            {
                ReadOnlyDataFrameBuffer <T> buffer         = _columnContainer.Buffers[b];
                ReadOnlySpan <byte>         nullBitMapSpan = _columnContainer.NullBitMapBuffers[b].ReadOnlySpan;
                int[] sortIndices = new int[buffer.Length];
                for (int i = 0; i < buffer.Length; i++)
                {
                    sortIndices[i] = i;
                }
                IntrospectiveSort(buffer.ReadOnlySpan, buffer.Length, sortIndices, comparer);
                // Bug fix: QuickSort is not stable. When PrimitiveDataFrameColumn has null values and default values, they move around
                List <int> nonNullSortIndices = new List <int>();
                for (int i = 0; i < sortIndices.Length; i++)
                {
                    if (_columnContainer.IsValid(nullBitMapSpan, sortIndices[i]))
                    {
                        nonNullSortIndices.Add(sortIndices[i]);
                    }
                }
                bufferSortIndices.Add(nonNullSortIndices);
            }
            // Simple merge sort to build the full column's sort indices
            ValueTuple <T, int> GetFirstNonNullValueAndBufferIndexStartingAtIndex(int bufferIndex, int startIndex)
            {
                int index = bufferSortIndices[bufferIndex][startIndex];
                T   value;
                ReadOnlyMemory <byte> buffer      = _columnContainer.Buffers[bufferIndex].ReadOnlyBuffer;
                ReadOnlyMemory <T>    typedBuffer = Unsafe.As <ReadOnlyMemory <byte>, ReadOnlyMemory <T> >(ref buffer);

                if (!typedBuffer.IsEmpty)
                {
                    bool isArray = MemoryMarshal.TryGetArray(typedBuffer, out ArraySegment <T> arraySegment);
                    if (isArray)
                    {
                        value = arraySegment.Array[index + arraySegment.Offset];
                    }
                    else
                    {
                        value = _columnContainer.Buffers[bufferIndex][index];
                    }
                }
                else
                {
                    value = _columnContainer.Buffers[bufferIndex][index];
                }
                return(value, startIndex);
            }

            SortedDictionary <T, List <ValueTuple <int, int> > > heapOfValueAndListOfTupleOfSortAndBufferIndex = new SortedDictionary <T, List <ValueTuple <int, int> > >(comparer);
            IList <ReadOnlyDataFrameBuffer <T> > buffers = _columnContainer.Buffers;

            for (int i = 0; i < buffers.Count; i++)
            {
                ReadOnlyDataFrameBuffer <T> buffer = buffers[i];
                if (bufferSortIndices[i].Count == 0)
                {
                    // All nulls
                    continue;
                }
                ValueTuple <T, int> valueAndBufferIndex = GetFirstNonNullValueAndBufferIndexStartingAtIndex(i, 0);
                if (heapOfValueAndListOfTupleOfSortAndBufferIndex.ContainsKey(valueAndBufferIndex.Item1))
                {
                    heapOfValueAndListOfTupleOfSortAndBufferIndex[valueAndBufferIndex.Item1].Add((valueAndBufferIndex.Item2, i));
                }
                else
                {
                    heapOfValueAndListOfTupleOfSortAndBufferIndex.Add(valueAndBufferIndex.Item1, new List <ValueTuple <int, int> >()
                    {
                        (valueAndBufferIndex.Item2, i)
                    });