Esempio n. 1
0
 public PrimitiveColumnContainer(long length = 0)
 {
     while (length > 0)
     {
         if (Buffers.Count == 0)
         {
             Buffers.Add(new DataFrameBuffer <T>());
             NullBitMapBuffers.Add(new DataFrameBuffer <byte>());
         }
         DataFrameBuffer <T> lastBuffer = Buffers[Buffers.Count - 1];
         if (lastBuffer.Length == lastBuffer.MaxCapacity)
         {
             lastBuffer = new DataFrameBuffer <T>();
             Buffers.Add(lastBuffer);
             NullBitMapBuffers.Add(new DataFrameBuffer <byte>());
         }
         int allocatable = (int)Math.Min(length, lastBuffer.MaxCapacity);
         lastBuffer.EnsureCapacity(allocatable);
         DataFrameBuffer <byte> lastNullBitMapBuffer = NullBitMapBuffers[NullBitMapBuffers.Count - 1];
         lastNullBitMapBuffer.EnsureCapacity((int)Math.Ceiling(allocatable / 8.0));
         lastBuffer.Length           = allocatable;
         lastNullBitMapBuffer.Length = allocatable;
         length -= allocatable;
         Length += lastBuffer.Length;
     }
 }
Esempio n. 2
0
        public PrimitiveColumnContainer(T[] values)
        {
            values = values ?? throw new ArgumentNullException(nameof(values));
            long length = values.LongLength;
            DataFrameBuffer <T> curBuffer;

            if (Buffers.Count == 0)
            {
                curBuffer = new DataFrameBuffer <T>();
                Buffers.Add(curBuffer);
                NullBitMapBuffers.Add(new DataFrameBuffer <byte>());
            }
            else
            {
                curBuffer = Buffers[Buffers.Count - 1];
            }
            for (long i = 0; i < length; i++)
            {
                if (curBuffer.Length == curBuffer.MaxCapacity)
                {
                    curBuffer = new DataFrameBuffer <T>();
                    Buffers.Add(curBuffer);
                    NullBitMapBuffers.Add(new DataFrameBuffer <byte>());
                }
                curBuffer.Append(values[i]);
                SetValidityBit(Length, true);
                Length++;
            }
        }
Esempio n. 3
0
        private void GetSortIndices(IComparer <T> comparer, out PrimitiveColumn <long> columnSortIndices)
        {
            List <int[]> bufferSortIndices = new List <int[]>(_columnContainer.Buffers.Count);

            // Sort each buffer first
            foreach (DataFrameBuffer <T> buffer in _columnContainer.Buffers)
            {
                var sortIndices = new int[buffer.Length];
                for (int i = 0; i < buffer.Length; i++)
                {
                    sortIndices[i] = i;
                }
                IntrospectiveSort(buffer.Span, buffer.Length, sortIndices, comparer);
                bufferSortIndices.Add(sortIndices);
            }
            // Simple merge sort to build the full column's sort indices
            ValueTuple <T, int> GetFirstNonNullValueAndBufferIndexStartingAtIndex(int bufferIndex, int startIndex)
            {
                T    value    = _columnContainer.Buffers[bufferIndex][bufferSortIndices[bufferIndex][startIndex]];
                long rowIndex = bufferSortIndices[bufferIndex][startIndex] + bufferIndex * _columnContainer.Buffers[0].MaxCapacity;

                while (!IsValid(rowIndex) && ++startIndex < bufferSortIndices[bufferIndex].Length)
                {
                    value    = _columnContainer.Buffers[bufferIndex][bufferSortIndices[bufferIndex][startIndex]];
                    rowIndex = startIndex + bufferIndex * _columnContainer.Buffers[0].MaxCapacity;
                }
                return(value, startIndex);
            }

            SortedDictionary <T, List <ValueTuple <int, int> > > heapOfValueAndListOfTupleOfSortAndBufferIndex = new SortedDictionary <T, List <ValueTuple <int, int> > >(comparer);
            IList <DataFrameBuffer <T> > buffers = _columnContainer.Buffers;

            for (int i = 0; i < buffers.Count; i++)
            {
                DataFrameBuffer <T> buffer = buffers[i];
                ValueTuple <T, int> valueAndBufferIndex = GetFirstNonNullValueAndBufferIndexStartingAtIndex(i, 0);
                long columnIndex = valueAndBufferIndex.Item2 + i * bufferSortIndices[0].Length;
                if (columnIndex == Length)
                {
                    // All nulls
                    continue;
                }
                if (heapOfValueAndListOfTupleOfSortAndBufferIndex.ContainsKey(valueAndBufferIndex.Item1))
                {
                    heapOfValueAndListOfTupleOfSortAndBufferIndex[valueAndBufferIndex.Item1].Add((valueAndBufferIndex.Item2, i));
                }
                else
                {
                    heapOfValueAndListOfTupleOfSortAndBufferIndex.Add(valueAndBufferIndex.Item1, new List <ValueTuple <int, int> >()
                    {
                        (valueAndBufferIndex.Item2, i)
                    });
        /// <summary>
        /// A null value has an unset bit
        /// A NON-null value has a set bit
        /// </summary>
        /// <param name="index"></param>
        /// <param name="value"></param>
        private void SetValidityBit(long index, bool value)
        {
            if ((uint)index > Length)
            {
                throw new ArgumentOutOfRangeException(nameof(index));
            }
            // First find the right bitMapBuffer
            int bitMapIndex = (int)(index / Buffers[0].MaxCapacity);

            Debug.Assert(NullBitMapBuffers.Count > bitMapIndex);
            DataFrameBuffer <byte> bitMapBuffer = NullBitMapBuffers[bitMapIndex];

            // Set the bit
            index -= bitMapIndex * Buffers[0].MaxCapacity;
            int bitMapBufferIndex = (int)((uint)index / 8);

            Debug.Assert(bitMapBuffer.Length >= bitMapBufferIndex);
            if (bitMapBuffer.Length == bitMapBufferIndex)
            {
                bitMapBuffer.Append(0);
            }
            byte curBitMap = bitMapBuffer[bitMapBufferIndex];
            byte newBitMap;

            if (value)
            {
                newBitMap = (byte)(curBitMap | (byte)(1 << (int)(index % 8)));
                if (_modifyNullCountWhileIndexing && (curBitMap >> ((int)(index % 8)) & 1) == 0 && index < Length && NullCount > 0)
                {
                    // Old value was null.
                    NullCount--;
                }
            }
            else
            {
                if (_modifyNullCountWhileIndexing && (curBitMap >> ((int)(index % 8)) & 1) == 1 && index < Length)
                {
                    // old value was NOT null and new value is null
                    NullCount++;
                }
                else if (_modifyNullCountWhileIndexing && index == Length)
                {
                    // New entry from an append
                    NullCount++;
                }
                newBitMap = (byte)(curBitMap & (byte)~(1 << (int)((uint)index % 8)));
            }
            bitMapBuffer[bitMapBufferIndex] = newBitMap;
        }
Esempio n. 5
0
        internal PrimitiveColumnContainer <bool> CloneAsBoolContainer()
        {
            var ret = new PrimitiveColumnContainer <bool>();

            foreach (DataFrameBuffer <T> buffer in Buffers)
            {
                DataFrameBuffer <bool> newBuffer = new DataFrameBuffer <bool>();
                ret.Buffers.Add(newBuffer);
                newBuffer.EnsureCapacity(buffer.Length);
                newBuffer.Span.Fill(false);
                newBuffer.Length = buffer.Length;
                ret.Length      += buffer.Length;
            }
            return(ret);
        }
Esempio n. 6
0
        internal PrimitiveDataFrameColumnContainer <bool> CreateBoolContainerForCompareOps()
        {
            var ret = new PrimitiveDataFrameColumnContainer <bool>();

            foreach (var buffer in Buffers)
            {
                DataFrameBuffer <bool> newBuffer = new DataFrameBuffer <bool>();
                ret.Buffers.Add(newBuffer);
                newBuffer.EnsureCapacity(buffer.Length);
                newBuffer.Span.Fill(false);
                newBuffer.Length = buffer.Length;
                ret.Length      += buffer.Length;
            }
            return(ret);
        }
Esempio n. 7
0
        public void Append(T value)
        {
            if (Buffers.Count == 0)
            {
                Buffers.Add(new DataFrameBuffer <T>());
            }
            DataFrameBuffer <T> lastBuffer = Buffers[Buffers.Count - 1];

            if (lastBuffer.Length == lastBuffer.MaxCapacity)
            {
                lastBuffer = new DataFrameBuffer <T>();
            }
            lastBuffer.Append(value);
            Length++;
        }
        private void SetValidityBit(long index, bool value)
        {
            if ((ulong)index > (ulong)Length)
            {
                throw new ArgumentOutOfRangeException(nameof(index));
            }
            // First find the right bitMapBuffer
            int bitMapIndex = GetBufferIndexContainingRowIndex(index, out int indexInBuffer);

            Debug.Assert(_nullBitMapBuffers.Count > bitMapIndex);
            DataFrameBuffer <byte> bitMapBuffer = (DataFrameBuffer <byte>)_nullBitMapBuffers[bitMapIndex];

            // Set the bit
            int bitMapBufferIndex = (int)((uint)indexInBuffer / 8);

            Debug.Assert(bitMapBuffer.Length >= bitMapBufferIndex);
            if (bitMapBuffer.Length == bitMapBufferIndex)
            {
                bitMapBuffer.Append(0);
            }
            byte curBitMap = bitMapBuffer[bitMapBufferIndex];
            byte newBitMap;

            if (value)
            {
                newBitMap = (byte)(curBitMap | (byte)(1 << (indexInBuffer & 7))); //bit hack for index % 8
                if ((curBitMap >> (indexInBuffer & 7) & 1) == 0 && indexInBuffer < Length && NullCount > 0)
                {
                    // Old value was null.
                    _nullCount--;
                }
            }
            else
            {
                if ((curBitMap >> (indexInBuffer & 7) & 1) == 1 && indexInBuffer < Length)
                {
                    // old value was NOT null and new value is null
                    _nullCount++;
                }
                else if (indexInBuffer == Length)
                {
                    // New entry from an append
                    _nullCount++;
                }
                newBitMap = (byte)(curBitMap & (byte)~(1 << (int)((uint)indexInBuffer & 7)));
            }
            bitMapBuffer[bitMapBufferIndex] = newBitMap;
        }
        private List <DataFrameBuffer <byte> > CloneNullBitMapBuffers()
        {
            List <DataFrameBuffer <byte> > ret = new List <DataFrameBuffer <byte> >();

            foreach (DataFrameBuffer <byte> buffer in NullBitMapBuffers)
            {
                DataFrameBuffer <byte> newBuffer = new DataFrameBuffer <byte>();
                ret.Add(newBuffer);
                Span <byte> span = buffer.Span;
                for (int i = 0; i < buffer.Length; i++)
                {
                    newBuffer.Append(span[i]);
                }
            }
            return(ret);
        }
Esempio n. 10
0
        public PrimitiveDataFrameColumnContainer <T> Clone()
        {
            var ret = new PrimitiveDataFrameColumnContainer <T>();

            foreach (DataFrameBuffer <T> buffer in Buffers)
            {
                DataFrameBuffer <T> newBuffer = new DataFrameBuffer <T>();
                ret.Buffers.Add(newBuffer);
                var span = buffer.Span;
                ret.Length += buffer.Length;
                for (int i = 0; i < buffer.Length; i++)
                {
                    newBuffer.Append(span[i]);
                }
            }
            return(ret);
        }
Esempio n. 11
0
        internal PrimitiveColumnContainer <decimal> CloneAsDecimalContainer()
        {
            var ret = new PrimitiveColumnContainer <decimal>();

            foreach (DataFrameBuffer <T> buffer in Buffers)
            {
                ret.Length += buffer.Length;
                DataFrameBuffer <decimal> newBuffer = new DataFrameBuffer <decimal>();
                ret.Buffers.Add(newBuffer);
                newBuffer.EnsureCapacity(buffer.Length);
                Span <T> span = buffer.Span;
                for (int i = 0; i < buffer.Length; i++)
                {
                    newBuffer.Append(DecimalConverter <T> .Instance.GetDecimal(span[i]));
                }
            }
            return(ret);
        }
        public void Append(T?value)
        {
            if (Buffers.Count == 0)
            {
                Buffers.Add(new DataFrameBuffer <T>());
                NullBitMapBuffers.Add(new DataFrameBuffer <byte>());
            }
            DataFrameBuffer <T> lastBuffer = Buffers[Buffers.Count - 1];

            if (lastBuffer.Length == lastBuffer.MaxCapacity)
            {
                lastBuffer = new DataFrameBuffer <T>();
                Buffers.Add(lastBuffer);
                NullBitMapBuffers.Add(new DataFrameBuffer <byte>());
            }
            lastBuffer.Append(value ?? default);
            SetValidityBit(Length, value.HasValue ? true : false);
            Length++;
        }
Esempio n. 13
0
        private void GetSortIndices(IComparer <T> comparer, out PrimitiveColumn <long> columnSortIndices)
        {
            List <int[]> bufferSortIndices = new List <int[]>(_columnContainer.Buffers.Count);

            // Sort each buffer first
            foreach (DataFrameBuffer <T> buffer in _columnContainer.Buffers)
            {
                var sortIndices = new int[buffer.Length];
                for (int i = 0; i < buffer.Length; i++)
                {
                    sortIndices[i] = i;
                }
                IntrospectiveSort(buffer.Span, buffer.Length, sortIndices, comparer);
                bufferSortIndices.Add(sortIndices);
            }
            // Simple merge sort to build the full column's sort indices
            SortedDictionary <T, List <Tuple <int, int> > > heapOfValueAndListOfTupleOfSortAndBufferIndex = new SortedDictionary <T, List <Tuple <int, int> > >(comparer);
            IList <DataFrameBuffer <T> > buffers = _columnContainer.Buffers;

            for (int i = 0; i < buffers.Count; i++)
            {
                DataFrameBuffer <T> buffer = buffers[i];
                T value = buffer[bufferSortIndices[i][0]];
                if (heapOfValueAndListOfTupleOfSortAndBufferIndex.ContainsKey(value))
                {
                    heapOfValueAndListOfTupleOfSortAndBufferIndex[value].Add(new Tuple <int, int>(0, i));
                }
                else
                {
                    heapOfValueAndListOfTupleOfSortAndBufferIndex.Add(value, new List <Tuple <int, int> >()
                    {
                        new Tuple <int, int>(0, i)
                    });
                }
            }
            columnSortIndices = new PrimitiveColumn <long>("SortIndices");
            GetBufferSortIndex     getBufferSortIndex     = new GetBufferSortIndex((int bufferIndex, int sortIndex) => bufferSortIndices[bufferIndex][sortIndex]);
            GetValueAtBuffer <T>   getValueAtBuffer       = new GetValueAtBuffer <T>((int bufferIndex, int sortIndex) => _columnContainer.Buffers[bufferIndex][bufferSortIndices[bufferIndex][sortIndex]]);
            GetBufferLengthAtIndex getBufferLengthAtIndex = new GetBufferLengthAtIndex((int bufferIndex) => bufferSortIndices[bufferIndex].Length);

            PopulateColumnSortIndicesWithHeap(heapOfValueAndListOfTupleOfSortAndBufferIndex, columnSortIndices, getBufferSortIndex, getValueAtBuffer, getBufferLengthAtIndex);
        }
        internal PrimitiveColumnContainer <double> CloneAsDoubleContainer()
        {
            var ret = new PrimitiveColumnContainer <double>();

            foreach (DataFrameBuffer <T> buffer in Buffers)
            {
                ret.Length += buffer.Length;
                DataFrameBuffer <double> newBuffer = new DataFrameBuffer <double>();
                ret.Buffers.Add(newBuffer);
                newBuffer.EnsureCapacity(buffer.Length);
                Span <T> span = buffer.Span;
                for (int i = 0; i < buffer.Length; i++)
                {
                    newBuffer.Append(DoubleConverter <T> .Instance.GetDouble(span[i]));
                }
            }
            ret.NullBitMapBuffers = CloneNullBitMapBuffers();
            ret.NullCount         = NullCount;
            return(ret);
        }
Esempio n. 15
0
        public PrimitiveDataFrameColumnContainer(IEnumerable <T> values)
        {
            values = values ?? throw new ArgumentNullException(nameof(values));
            if (Buffers.Count == 0)
            {
                Buffers.Add(new DataFrameBuffer <T>());
            }
            var curBuffer = Buffers[Buffers.Count - 1];

            foreach (var value in values)
            {
                if (curBuffer.Length == curBuffer.MaxCapacity)
                {
                    curBuffer = new DataFrameBuffer <T>();
                    Buffers.Add(curBuffer);
                }
                curBuffer.Append(value);
                Length++;
            }
        }
        private bool GetValidityBit(long index)
        {
            if ((uint)index >= Length)
            {
                throw new ArgumentOutOfRangeException(nameof(index));
            }
            // First find the right bitMapBuffer
            int bitMapIndex = (int)(index / Buffers[0].MaxCapacity);

            Debug.Assert(NullBitMapBuffers.Count > bitMapIndex);
            DataFrameBuffer <byte> bitMapBuffer = NullBitMapBuffers[bitMapIndex];

            // Get the bit
            index -= bitMapIndex * Buffers[0].MaxCapacity;
            int bitMapBufferIndex = (int)((uint)index / 8);

            Debug.Assert(bitMapBuffer.Length > bitMapBufferIndex);
            byte curBitMap = bitMapBuffer[bitMapBufferIndex];

            return(((curBitMap >> ((int)index % 8)) & 1) != 0);
        }
Esempio n. 17
0
        public void AppendMany(T?value, long count)
        {
            if (!value.HasValue)
            {
                NullCount += count;
            }

            while (count > 0)
            {
                if (Buffers.Count == 0)
                {
                    Buffers.Add(new DataFrameBuffer <T>());
                    NullBitMapBuffers.Add(new DataFrameBuffer <byte>());
                }
                DataFrameBuffer <T> lastBuffer = Buffers[Buffers.Count - 1];
                if (lastBuffer.Length == lastBuffer.MaxCapacity)
                {
                    lastBuffer = new DataFrameBuffer <T>();
                    Buffers.Add(lastBuffer);
                    NullBitMapBuffers.Add(new DataFrameBuffer <byte>());
                }
                int allocatable = (int)Math.Min(count, lastBuffer.MaxCapacity);
                lastBuffer.EnsureCapacity(allocatable);
                lastBuffer.Span.Slice(lastBuffer.Length, allocatable).Fill(value ?? default);
                lastBuffer.Length += allocatable;
                Length            += allocatable;

                DataFrameBuffer <byte> lastNullBitMapBuffer = NullBitMapBuffers[NullBitMapBuffers.Count - 1];
                int nullBitMapAllocatable = (int)(((uint)allocatable + 7) / 8);
                lastNullBitMapBuffer.EnsureCapacity(nullBitMapAllocatable);
                _modifyNullCountWhileIndexing = false;
                for (long i = Length - count; i < Length; i++)
                {
                    SetValidityBit(i, value.HasValue ? true : false);
                }
                _modifyNullCountWhileIndexing = true;
                lastNullBitMapBuffer.Length  += nullBitMapAllocatable;
                count -= allocatable;
            }
        }
Esempio n. 18
0
        // This is an immutable column, however this method exists to support Clone(). Keep this method private
        private void Append(string value)
        {
            if (_dataBuffers.Count == 0)
            {
                _dataBuffers.Add(new DataFrameBuffer <byte>());
                _nullBitMapBuffers.Add(new DataFrameBuffer <byte>());
                _offsetsBuffers.Add(new DataFrameBuffer <int>());
            }
            DataFrameBuffer <int> mutableOffsetsBuffer = _offsetsBuffers[_offsetsBuffers.Count - 1] as DataFrameBuffer <int>;

            if (mutableOffsetsBuffer.Length == 0)
            {
                mutableOffsetsBuffer.Append(0);
            }
            Length++;
            if (value == null)
            {
                mutableOffsetsBuffer.Append(mutableOffsetsBuffer[mutableOffsetsBuffer.Length - 1]);
            }
            else
            {
                byte[] bytes = Encoding.UTF8.GetBytes(value);
                DataFrameBuffer <byte> mutableDataBuffer = _dataBuffers[_dataBuffers.Count - 1] as DataFrameBuffer <byte>;
                if (mutableDataBuffer.Length == ReadOnlyDataFrameBuffer <byte> .MaxCapacity)
                {
                    mutableDataBuffer = new DataFrameBuffer <byte>();
                    _dataBuffers.Add(mutableDataBuffer);
                    _nullBitMapBuffers.Add(new DataFrameBuffer <byte>());
                    var offsetBuffer = new DataFrameBuffer <int>();
                    _offsetsBuffers.Add(offsetBuffer);
                    offsetBuffer.Append(0);
                }
                mutableDataBuffer.EnsureCapacity(bytes.Length);
                bytes.AsMemory().CopyTo(mutableDataBuffer.Memory.Slice(mutableDataBuffer.Length));
                mutableDataBuffer.Length += bytes.Length;
                mutableOffsetsBuffer.Append(mutableOffsetsBuffer[mutableOffsetsBuffer.Length - 1] + bytes.Length);
            }
            SetValidityBit(Length - 1, value == null ? true : false);
        }
        // This is an immutable column, however this method exists to support Clone(). Keep this method private
        // Appending a default string is equivalent to appending null. It increases the NullCount and sets a null bitmap bit
        // Appending an empty string is valid. It does NOT affect the NullCount. It instead adds a new offset entry
        private void Append(ReadOnlySpan <byte> value)
        {
            if (_dataBuffers.Count == 0)
            {
                _dataBuffers.Add(new DataFrameBuffer <byte>());
                _nullBitMapBuffers.Add(new DataFrameBuffer <byte>());
                _offsetsBuffers.Add(new DataFrameBuffer <int>());
            }
            DataFrameBuffer <int> mutableOffsetsBuffer = (DataFrameBuffer <int>)_offsetsBuffers[_offsetsBuffers.Count - 1];

            if (mutableOffsetsBuffer.Length == 0)
            {
                mutableOffsetsBuffer.Append(0);
            }
            Length++;
            if (value == default)
            {
                mutableOffsetsBuffer.Append(mutableOffsetsBuffer[mutableOffsetsBuffer.Length - 1]);
            }
            else
            {
                DataFrameBuffer <byte> mutableDataBuffer = (DataFrameBuffer <byte>)_dataBuffers[_dataBuffers.Count - 1];
                if (mutableDataBuffer.Length == ReadOnlyDataFrameBuffer <byte> .MaxCapacity)
                {
                    mutableDataBuffer = new DataFrameBuffer <byte>();
                    _dataBuffers.Add(mutableDataBuffer);
                    _nullBitMapBuffers.Add(new DataFrameBuffer <byte>());
                    mutableOffsetsBuffer = new DataFrameBuffer <int>();
                    _offsetsBuffers.Add(mutableOffsetsBuffer);
                    mutableOffsetsBuffer.Append(0);
                }
                mutableDataBuffer.EnsureCapacity(value.Length);
                value.CopyTo(mutableDataBuffer.RawSpan.Slice(mutableDataBuffer.Length));
                mutableDataBuffer.Length += value.Length;
                mutableOffsetsBuffer.Append(mutableOffsetsBuffer[mutableOffsetsBuffer.Length - 1] + value.Length);
            }
            SetValidityBit(Length - 1, value == null ? true : false);
        }
Esempio n. 20
0
        public PrimitiveColumnContainer(IEnumerable <T> values)
        {
            values = values ?? throw new ArgumentNullException(nameof(values));
            if (Buffers.Count == 0)
            {
                Buffers.Add(new DataFrameBuffer <T>());
                NullBitMapBuffers.Add(new DataFrameBuffer <byte>());
            }
            DataFrameBuffer <T> curBuffer = (DataFrameBuffer <T>)Buffers[Buffers.Count - 1];

            foreach (T value in values)
            {
                if (curBuffer.Length == ReadOnlyDataFrameBuffer <T> .MaxCapacity)
                {
                    curBuffer = new DataFrameBuffer <T>();
                    Buffers.Add(curBuffer);
                    NullBitMapBuffers.Add(new DataFrameBuffer <byte>());
                }
                curBuffer.Append(value);
                SetValidityBit(Length, true);
                Length++;
            }
        }