/// <summary>
 /// Returns an enumerable of immutable ReadOnlyMemory<int> representing offsets into its corresponding Data buffer.
 /// The Apache Arrow format specifies how the offset buffer encodes the length of each value in the Data buffer
 /// </summary>
 /// <returns>IEnumerable<ReadOnlyMemory<int>></returns>
 public IEnumerable <ReadOnlyMemory <int> > GetReadOnlyOffsetsBuffers()
 {
     for (int i = 0; i < _offsetsBuffers.Count; i++)
     {
         ReadOnlyDataFrameBuffer <int> buffer = _offsetsBuffers[i];
         yield return(buffer.ReadOnlyMemory);
     }
 }
 /// <summary>
 /// Returns an enumerable of immutable ReadOnlyMemory<byte> buffers representing null values in the Apache Arrow format
 /// </summary>
 /// <remarks>Each ReadOnlyMemory<byte> encodes the indices of null values in its corresponding Data buffer</remarks>
 /// <returns>IEnumerable<ReadOnlyMemory<byte>></returns>
 public IEnumerable <ReadOnlyMemory <byte> > GetReadOnlyNullBitMapBuffers()
 {
     for (int i = 0; i < _nullBitMapBuffers.Count; i++)
     {
         ReadOnlyDataFrameBuffer <byte> buffer = _nullBitMapBuffers[i];
         yield return(buffer.RawReadOnlyMemory);
     }
 }
예제 #3
0
 /// <summary>
 /// Returns an enumerable of immutable memory buffers representing the underlying values
 /// </summary>
 /// <remarks>Null values are encoded in the buffers returned by GetReadOnlyNullBitmapBuffers in the Apache Arrow format</remarks>
 /// <returns>IEnumerable<ReadOnlyMemory<typeparamref name="T"/>></returns>
 public IEnumerable <ReadOnlyMemory <T> > GetReadOnlyDataBuffers()
 {
     for (int i = 0; i < _columnContainer.Buffers.Count; i++)
     {
         ReadOnlyDataFrameBuffer <T> buffer = _columnContainer.Buffers[i];
         yield return(buffer.ReadOnlyMemory);
     }
 }
예제 #4
0
        private void GetSortIndices(IComparer <T> comparer, out PrimitiveColumn <long> columnSortIndices)
        {
            List <List <int> > bufferSortIndices = new List <List <int> >(_columnContainer.Buffers.Count);

            // Sort each buffer first
            for (int b = 0; b < _columnContainer.Buffers.Count; b++)
            {
                ReadOnlyDataFrameBuffer <T> buffer = _columnContainer.Buffers[b];
                int[] sortIndices = new int[buffer.Length];
                for (int i = 0; i < buffer.Length; i++)
                {
                    sortIndices[i] = i;
                }
                IntrospectiveSort(buffer.ReadOnlySpan, buffer.Length, sortIndices, comparer);
                // Bug fix: QuickSort is not stable. When PrimitiveColumn has null values and default values, they move around
                List <int> nonNullSortIndices = new List <int>();
                for (int i = 0; i < sortIndices.Length; i++)
                {
                    if (IsValid(sortIndices[i] + b * ReadOnlyDataFrameBuffer <T> .MaxCapacity))
                    {
                        nonNullSortIndices.Add(sortIndices[i]);
                    }
                }
                bufferSortIndices.Add(nonNullSortIndices);
            }
            // Simple merge sort to build the full column's sort indices
            ValueTuple <T, int> GetFirstNonNullValueAndBufferIndexStartingAtIndex(int bufferIndex, int startIndex)
            {
                T    value    = _columnContainer.Buffers[bufferIndex][bufferSortIndices[bufferIndex][startIndex]];
                long rowIndex = bufferSortIndices[bufferIndex][startIndex] + bufferIndex * ReadOnlyDataFrameBuffer <T> .MaxCapacity;

                return(value, startIndex);
            }

            SortedDictionary <T, List <ValueTuple <int, int> > > heapOfValueAndListOfTupleOfSortAndBufferIndex = new SortedDictionary <T, List <ValueTuple <int, int> > >(comparer);
            IList <ReadOnlyDataFrameBuffer <T> > buffers = _columnContainer.Buffers;

            for (int i = 0; i < buffers.Count; i++)
            {
                ReadOnlyDataFrameBuffer <T> buffer = buffers[i];
                if (bufferSortIndices[i].Count == 0)
                {
                    // All nulls
                    continue;
                }
                ValueTuple <T, int> valueAndBufferIndex = GetFirstNonNullValueAndBufferIndexStartingAtIndex(i, 0);
                if (heapOfValueAndListOfTupleOfSortAndBufferIndex.ContainsKey(valueAndBufferIndex.Item1))
                {
                    heapOfValueAndListOfTupleOfSortAndBufferIndex[valueAndBufferIndex.Item1].Add((valueAndBufferIndex.Item2, i));
                }
                else
                {
                    heapOfValueAndListOfTupleOfSortAndBufferIndex.Add(valueAndBufferIndex.Item1, new List <ValueTuple <int, int> >()
                    {
                        (valueAndBufferIndex.Item2, i)
                    });
        public bool GetValidityBit(long index)
        {
            if ((ulong)index > (ulong)Length)
            {
                throw new ArgumentOutOfRangeException(nameof(index));
            }
            // First find the right bitMapBuffer
            int bitMapIndex = GetBufferIndexContainingRowIndex(index, out int indexInBuffer);

            Debug.Assert(_nullBitMapBuffers.Count > bitMapIndex);
            ReadOnlyDataFrameBuffer <byte> bitMapBuffer = _nullBitMapBuffers[bitMapIndex];
            int bitMapBufferIndex = (int)((uint)index / 8);

            Debug.Assert(bitMapBuffer.Length > bitMapBufferIndex);
            byte curBitMap = bitMapBuffer[bitMapBufferIndex];

            return(((curBitMap >> (indexInBuffer & 7)) & 1) != 0);
        }
        public ArrowStringColumn(string name, ReadOnlyMemory <byte> values, ReadOnlyMemory <byte> offsets, ReadOnlyMemory <byte> nullBits, int length, int nullCount) : base(name, length, typeof(string))
        {
            ReadOnlyDataFrameBuffer <byte> dataBuffer       = new ReadOnlyDataFrameBuffer <byte>(values, values.Length);
            ReadOnlyDataFrameBuffer <int>  offsetBuffer     = new ReadOnlyDataFrameBuffer <int>(offsets, length + 1);
            ReadOnlyDataFrameBuffer <byte> nullBitMapBuffer = new ReadOnlyDataFrameBuffer <byte>(nullBits, nullBits.Length);

            if (length + 1 != offsetBuffer.Length)
            {
                throw new ArgumentException(nameof(offsetBuffer));
            }

            _dataBuffers       = new List <ReadOnlyDataFrameBuffer <byte> >();
            _offsetsBuffers    = new List <ReadOnlyDataFrameBuffer <int> >();
            _nullBitMapBuffers = new List <ReadOnlyDataFrameBuffer <byte> >();

            _dataBuffers.Add(dataBuffer);
            _offsetsBuffers.Add(offsetBuffer);
            _nullBitMapBuffers.Add(nullBitMapBuffer);

            _nullCount = nullCount;
        }
        private void GetSortIndices(IComparer <T> comparer, out PrimitiveColumn <long> columnSortIndices)
        {
            List <List <int> > bufferSortIndices = new List <List <int> >(_columnContainer.Buffers.Count);

            // Sort each buffer first
            for (int b = 0; b < _columnContainer.Buffers.Count; b++)
            {
                ReadOnlyDataFrameBuffer <T> buffer         = _columnContainer.Buffers[b];
                ReadOnlySpan <byte>         nullBitMapSpan = _columnContainer.NullBitMapBuffers[b].ReadOnlySpan;
                int[] sortIndices = new int[buffer.Length];
                for (int i = 0; i < buffer.Length; i++)
                {
                    sortIndices[i] = i;
                }
                IntrospectiveSort(buffer.ReadOnlySpan, buffer.Length, sortIndices, comparer);
                // Bug fix: QuickSort is not stable. When PrimitiveColumn has null values and default values, they move around
                List <int> nonNullSortIndices = new List <int>();
                for (int i = 0; i < sortIndices.Length; i++)
                {
                    if (_columnContainer.IsValid(nullBitMapSpan, sortIndices[i]))
                    {
                        nonNullSortIndices.Add(sortIndices[i]);
                    }
                }
                bufferSortIndices.Add(nonNullSortIndices);
            }
            // Simple merge sort to build the full column's sort indices
            ValueTuple <T, int> GetFirstNonNullValueAndBufferIndexStartingAtIndex(int bufferIndex, int startIndex)
            {
                int index = bufferSortIndices[bufferIndex][startIndex];
                T   value;
                ReadOnlyMemory <byte> buffer      = _columnContainer.Buffers[bufferIndex].ReadOnlyBuffer;
                ReadOnlyMemory <T>    typedBuffer = Unsafe.As <ReadOnlyMemory <byte>, ReadOnlyMemory <T> >(ref buffer);

                if (!typedBuffer.IsEmpty)
                {
                    bool isArray = MemoryMarshal.TryGetArray(typedBuffer, out ArraySegment <T> arraySegment);
                    if (isArray)
                    {
                        value = arraySegment.Array[index + arraySegment.Offset];
                    }
                    else
                    {
                        value = _columnContainer.Buffers[bufferIndex][index];
                    }
                }
                else
                {
                    value = _columnContainer.Buffers[bufferIndex][index];
                }
                return(value, startIndex);
            }

            SortedDictionary <T, List <ValueTuple <int, int> > > heapOfValueAndListOfTupleOfSortAndBufferIndex = new SortedDictionary <T, List <ValueTuple <int, int> > >(comparer);
            IList <ReadOnlyDataFrameBuffer <T> > buffers = _columnContainer.Buffers;

            for (int i = 0; i < buffers.Count; i++)
            {
                ReadOnlyDataFrameBuffer <T> buffer = buffers[i];
                if (bufferSortIndices[i].Count == 0)
                {
                    // All nulls
                    continue;
                }
                ValueTuple <T, int> valueAndBufferIndex = GetFirstNonNullValueAndBufferIndexStartingAtIndex(i, 0);
                if (heapOfValueAndListOfTupleOfSortAndBufferIndex.ContainsKey(valueAndBufferIndex.Item1))
                {
                    heapOfValueAndListOfTupleOfSortAndBufferIndex[valueAndBufferIndex.Item1].Add((valueAndBufferIndex.Item2, i));
                }
                else
                {
                    heapOfValueAndListOfTupleOfSortAndBufferIndex.Add(valueAndBufferIndex.Item1, new List <ValueTuple <int, int> >()
                    {
                        (valueAndBufferIndex.Item2, i)
                    });