public PrimitiveColumnContainer(long length = 0) { while (length > 0) { if (Buffers.Count == 0) { Buffers.Add(new DataFrameBuffer <T>()); NullBitMapBuffers.Add(new DataFrameBuffer <byte>()); } DataFrameBuffer <T> lastBuffer = Buffers[Buffers.Count - 1]; if (lastBuffer.Length == lastBuffer.MaxCapacity) { lastBuffer = new DataFrameBuffer <T>(); Buffers.Add(lastBuffer); NullBitMapBuffers.Add(new DataFrameBuffer <byte>()); } int allocatable = (int)Math.Min(length, lastBuffer.MaxCapacity); lastBuffer.EnsureCapacity(allocatable); DataFrameBuffer <byte> lastNullBitMapBuffer = NullBitMapBuffers[NullBitMapBuffers.Count - 1]; lastNullBitMapBuffer.EnsureCapacity((int)Math.Ceiling(allocatable / 8.0)); lastBuffer.Length = allocatable; lastNullBitMapBuffer.Length = allocatable; length -= allocatable; Length += lastBuffer.Length; } }
public PrimitiveColumnContainer(T[] values) { values = values ?? throw new ArgumentNullException(nameof(values)); long length = values.LongLength; DataFrameBuffer <T> curBuffer; if (Buffers.Count == 0) { curBuffer = new DataFrameBuffer <T>(); Buffers.Add(curBuffer); NullBitMapBuffers.Add(new DataFrameBuffer <byte>()); } else { curBuffer = Buffers[Buffers.Count - 1]; } for (long i = 0; i < length; i++) { if (curBuffer.Length == curBuffer.MaxCapacity) { curBuffer = new DataFrameBuffer <T>(); Buffers.Add(curBuffer); NullBitMapBuffers.Add(new DataFrameBuffer <byte>()); } curBuffer.Append(values[i]); SetValidityBit(Length, true); Length++; } }
private void GetSortIndices(IComparer <T> comparer, out PrimitiveColumn <long> columnSortIndices) { List <int[]> bufferSortIndices = new List <int[]>(_columnContainer.Buffers.Count); // Sort each buffer first foreach (DataFrameBuffer <T> buffer in _columnContainer.Buffers) { var sortIndices = new int[buffer.Length]; for (int i = 0; i < buffer.Length; i++) { sortIndices[i] = i; } IntrospectiveSort(buffer.Span, buffer.Length, sortIndices, comparer); bufferSortIndices.Add(sortIndices); } // Simple merge sort to build the full column's sort indices ValueTuple <T, int> GetFirstNonNullValueAndBufferIndexStartingAtIndex(int bufferIndex, int startIndex) { T value = _columnContainer.Buffers[bufferIndex][bufferSortIndices[bufferIndex][startIndex]]; long rowIndex = bufferSortIndices[bufferIndex][startIndex] + bufferIndex * _columnContainer.Buffers[0].MaxCapacity; while (!IsValid(rowIndex) && ++startIndex < bufferSortIndices[bufferIndex].Length) { value = _columnContainer.Buffers[bufferIndex][bufferSortIndices[bufferIndex][startIndex]]; rowIndex = startIndex + bufferIndex * _columnContainer.Buffers[0].MaxCapacity; } return(value, startIndex); } SortedDictionary <T, List <ValueTuple <int, int> > > heapOfValueAndListOfTupleOfSortAndBufferIndex = new SortedDictionary <T, List <ValueTuple <int, int> > >(comparer); IList <DataFrameBuffer <T> > buffers = _columnContainer.Buffers; for (int i = 0; i < buffers.Count; i++) { DataFrameBuffer <T> buffer = buffers[i]; ValueTuple <T, int> valueAndBufferIndex = GetFirstNonNullValueAndBufferIndexStartingAtIndex(i, 0); long columnIndex = valueAndBufferIndex.Item2 + i * bufferSortIndices[0].Length; if (columnIndex == Length) { // All nulls continue; } if (heapOfValueAndListOfTupleOfSortAndBufferIndex.ContainsKey(valueAndBufferIndex.Item1)) { heapOfValueAndListOfTupleOfSortAndBufferIndex[valueAndBufferIndex.Item1].Add((valueAndBufferIndex.Item2, i)); } else { heapOfValueAndListOfTupleOfSortAndBufferIndex.Add(valueAndBufferIndex.Item1, new List <ValueTuple <int, int> >() { (valueAndBufferIndex.Item2, i) });
/// <summary> /// A null value has an unset bit /// A NON-null value has a set bit /// </summary> /// <param name="index"></param> /// <param name="value"></param> private void SetValidityBit(long index, bool value) { if ((uint)index > Length) { throw new ArgumentOutOfRangeException(nameof(index)); } // First find the right bitMapBuffer int bitMapIndex = (int)(index / Buffers[0].MaxCapacity); Debug.Assert(NullBitMapBuffers.Count > bitMapIndex); DataFrameBuffer <byte> bitMapBuffer = NullBitMapBuffers[bitMapIndex]; // Set the bit index -= bitMapIndex * Buffers[0].MaxCapacity; int bitMapBufferIndex = (int)((uint)index / 8); Debug.Assert(bitMapBuffer.Length >= bitMapBufferIndex); if (bitMapBuffer.Length == bitMapBufferIndex) { bitMapBuffer.Append(0); } byte curBitMap = bitMapBuffer[bitMapBufferIndex]; byte newBitMap; if (value) { newBitMap = (byte)(curBitMap | (byte)(1 << (int)(index % 8))); if (_modifyNullCountWhileIndexing && (curBitMap >> ((int)(index % 8)) & 1) == 0 && index < Length && NullCount > 0) { // Old value was null. NullCount--; } } else { if (_modifyNullCountWhileIndexing && (curBitMap >> ((int)(index % 8)) & 1) == 1 && index < Length) { // old value was NOT null and new value is null NullCount++; } else if (_modifyNullCountWhileIndexing && index == Length) { // New entry from an append NullCount++; } newBitMap = (byte)(curBitMap & (byte)~(1 << (int)((uint)index % 8))); } bitMapBuffer[bitMapBufferIndex] = newBitMap; }
internal PrimitiveColumnContainer <bool> CloneAsBoolContainer() { var ret = new PrimitiveColumnContainer <bool>(); foreach (DataFrameBuffer <T> buffer in Buffers) { DataFrameBuffer <bool> newBuffer = new DataFrameBuffer <bool>(); ret.Buffers.Add(newBuffer); newBuffer.EnsureCapacity(buffer.Length); newBuffer.Span.Fill(false); newBuffer.Length = buffer.Length; ret.Length += buffer.Length; } return(ret); }
internal PrimitiveDataFrameColumnContainer <bool> CreateBoolContainerForCompareOps() { var ret = new PrimitiveDataFrameColumnContainer <bool>(); foreach (var buffer in Buffers) { DataFrameBuffer <bool> newBuffer = new DataFrameBuffer <bool>(); ret.Buffers.Add(newBuffer); newBuffer.EnsureCapacity(buffer.Length); newBuffer.Span.Fill(false); newBuffer.Length = buffer.Length; ret.Length += buffer.Length; } return(ret); }
public void Append(T value) { if (Buffers.Count == 0) { Buffers.Add(new DataFrameBuffer <T>()); } DataFrameBuffer <T> lastBuffer = Buffers[Buffers.Count - 1]; if (lastBuffer.Length == lastBuffer.MaxCapacity) { lastBuffer = new DataFrameBuffer <T>(); } lastBuffer.Append(value); Length++; }
private void SetValidityBit(long index, bool value) { if ((ulong)index > (ulong)Length) { throw new ArgumentOutOfRangeException(nameof(index)); } // First find the right bitMapBuffer int bitMapIndex = GetBufferIndexContainingRowIndex(index, out int indexInBuffer); Debug.Assert(_nullBitMapBuffers.Count > bitMapIndex); DataFrameBuffer <byte> bitMapBuffer = (DataFrameBuffer <byte>)_nullBitMapBuffers[bitMapIndex]; // Set the bit int bitMapBufferIndex = (int)((uint)indexInBuffer / 8); Debug.Assert(bitMapBuffer.Length >= bitMapBufferIndex); if (bitMapBuffer.Length == bitMapBufferIndex) { bitMapBuffer.Append(0); } byte curBitMap = bitMapBuffer[bitMapBufferIndex]; byte newBitMap; if (value) { newBitMap = (byte)(curBitMap | (byte)(1 << (indexInBuffer & 7))); //bit hack for index % 8 if ((curBitMap >> (indexInBuffer & 7) & 1) == 0 && indexInBuffer < Length && NullCount > 0) { // Old value was null. _nullCount--; } } else { if ((curBitMap >> (indexInBuffer & 7) & 1) == 1 && indexInBuffer < Length) { // old value was NOT null and new value is null _nullCount++; } else if (indexInBuffer == Length) { // New entry from an append _nullCount++; } newBitMap = (byte)(curBitMap & (byte)~(1 << (int)((uint)indexInBuffer & 7))); } bitMapBuffer[bitMapBufferIndex] = newBitMap; }
private List <DataFrameBuffer <byte> > CloneNullBitMapBuffers() { List <DataFrameBuffer <byte> > ret = new List <DataFrameBuffer <byte> >(); foreach (DataFrameBuffer <byte> buffer in NullBitMapBuffers) { DataFrameBuffer <byte> newBuffer = new DataFrameBuffer <byte>(); ret.Add(newBuffer); Span <byte> span = buffer.Span; for (int i = 0; i < buffer.Length; i++) { newBuffer.Append(span[i]); } } return(ret); }
public PrimitiveDataFrameColumnContainer <T> Clone() { var ret = new PrimitiveDataFrameColumnContainer <T>(); foreach (DataFrameBuffer <T> buffer in Buffers) { DataFrameBuffer <T> newBuffer = new DataFrameBuffer <T>(); ret.Buffers.Add(newBuffer); var span = buffer.Span; ret.Length += buffer.Length; for (int i = 0; i < buffer.Length; i++) { newBuffer.Append(span[i]); } } return(ret); }
internal PrimitiveColumnContainer <decimal> CloneAsDecimalContainer() { var ret = new PrimitiveColumnContainer <decimal>(); foreach (DataFrameBuffer <T> buffer in Buffers) { ret.Length += buffer.Length; DataFrameBuffer <decimal> newBuffer = new DataFrameBuffer <decimal>(); ret.Buffers.Add(newBuffer); newBuffer.EnsureCapacity(buffer.Length); Span <T> span = buffer.Span; for (int i = 0; i < buffer.Length; i++) { newBuffer.Append(DecimalConverter <T> .Instance.GetDecimal(span[i])); } } return(ret); }
public void Append(T?value) { if (Buffers.Count == 0) { Buffers.Add(new DataFrameBuffer <T>()); NullBitMapBuffers.Add(new DataFrameBuffer <byte>()); } DataFrameBuffer <T> lastBuffer = Buffers[Buffers.Count - 1]; if (lastBuffer.Length == lastBuffer.MaxCapacity) { lastBuffer = new DataFrameBuffer <T>(); Buffers.Add(lastBuffer); NullBitMapBuffers.Add(new DataFrameBuffer <byte>()); } lastBuffer.Append(value ?? default); SetValidityBit(Length, value.HasValue ? true : false); Length++; }
private void GetSortIndices(IComparer <T> comparer, out PrimitiveColumn <long> columnSortIndices) { List <int[]> bufferSortIndices = new List <int[]>(_columnContainer.Buffers.Count); // Sort each buffer first foreach (DataFrameBuffer <T> buffer in _columnContainer.Buffers) { var sortIndices = new int[buffer.Length]; for (int i = 0; i < buffer.Length; i++) { sortIndices[i] = i; } IntrospectiveSort(buffer.Span, buffer.Length, sortIndices, comparer); bufferSortIndices.Add(sortIndices); } // Simple merge sort to build the full column's sort indices SortedDictionary <T, List <Tuple <int, int> > > heapOfValueAndListOfTupleOfSortAndBufferIndex = new SortedDictionary <T, List <Tuple <int, int> > >(comparer); IList <DataFrameBuffer <T> > buffers = _columnContainer.Buffers; for (int i = 0; i < buffers.Count; i++) { DataFrameBuffer <T> buffer = buffers[i]; T value = buffer[bufferSortIndices[i][0]]; if (heapOfValueAndListOfTupleOfSortAndBufferIndex.ContainsKey(value)) { heapOfValueAndListOfTupleOfSortAndBufferIndex[value].Add(new Tuple <int, int>(0, i)); } else { heapOfValueAndListOfTupleOfSortAndBufferIndex.Add(value, new List <Tuple <int, int> >() { new Tuple <int, int>(0, i) }); } } columnSortIndices = new PrimitiveColumn <long>("SortIndices"); GetBufferSortIndex getBufferSortIndex = new GetBufferSortIndex((int bufferIndex, int sortIndex) => bufferSortIndices[bufferIndex][sortIndex]); GetValueAtBuffer <T> getValueAtBuffer = new GetValueAtBuffer <T>((int bufferIndex, int sortIndex) => _columnContainer.Buffers[bufferIndex][bufferSortIndices[bufferIndex][sortIndex]]); GetBufferLengthAtIndex getBufferLengthAtIndex = new GetBufferLengthAtIndex((int bufferIndex) => bufferSortIndices[bufferIndex].Length); PopulateColumnSortIndicesWithHeap(heapOfValueAndListOfTupleOfSortAndBufferIndex, columnSortIndices, getBufferSortIndex, getValueAtBuffer, getBufferLengthAtIndex); }
internal PrimitiveColumnContainer <double> CloneAsDoubleContainer() { var ret = new PrimitiveColumnContainer <double>(); foreach (DataFrameBuffer <T> buffer in Buffers) { ret.Length += buffer.Length; DataFrameBuffer <double> newBuffer = new DataFrameBuffer <double>(); ret.Buffers.Add(newBuffer); newBuffer.EnsureCapacity(buffer.Length); Span <T> span = buffer.Span; for (int i = 0; i < buffer.Length; i++) { newBuffer.Append(DoubleConverter <T> .Instance.GetDouble(span[i])); } } ret.NullBitMapBuffers = CloneNullBitMapBuffers(); ret.NullCount = NullCount; return(ret); }
public PrimitiveDataFrameColumnContainer(IEnumerable <T> values) { values = values ?? throw new ArgumentNullException(nameof(values)); if (Buffers.Count == 0) { Buffers.Add(new DataFrameBuffer <T>()); } var curBuffer = Buffers[Buffers.Count - 1]; foreach (var value in values) { if (curBuffer.Length == curBuffer.MaxCapacity) { curBuffer = new DataFrameBuffer <T>(); Buffers.Add(curBuffer); } curBuffer.Append(value); Length++; } }
private bool GetValidityBit(long index) { if ((uint)index >= Length) { throw new ArgumentOutOfRangeException(nameof(index)); } // First find the right bitMapBuffer int bitMapIndex = (int)(index / Buffers[0].MaxCapacity); Debug.Assert(NullBitMapBuffers.Count > bitMapIndex); DataFrameBuffer <byte> bitMapBuffer = NullBitMapBuffers[bitMapIndex]; // Get the bit index -= bitMapIndex * Buffers[0].MaxCapacity; int bitMapBufferIndex = (int)((uint)index / 8); Debug.Assert(bitMapBuffer.Length > bitMapBufferIndex); byte curBitMap = bitMapBuffer[bitMapBufferIndex]; return(((curBitMap >> ((int)index % 8)) & 1) != 0); }
public void AppendMany(T?value, long count) { if (!value.HasValue) { NullCount += count; } while (count > 0) { if (Buffers.Count == 0) { Buffers.Add(new DataFrameBuffer <T>()); NullBitMapBuffers.Add(new DataFrameBuffer <byte>()); } DataFrameBuffer <T> lastBuffer = Buffers[Buffers.Count - 1]; if (lastBuffer.Length == lastBuffer.MaxCapacity) { lastBuffer = new DataFrameBuffer <T>(); Buffers.Add(lastBuffer); NullBitMapBuffers.Add(new DataFrameBuffer <byte>()); } int allocatable = (int)Math.Min(count, lastBuffer.MaxCapacity); lastBuffer.EnsureCapacity(allocatable); lastBuffer.Span.Slice(lastBuffer.Length, allocatable).Fill(value ?? default); lastBuffer.Length += allocatable; Length += allocatable; DataFrameBuffer <byte> lastNullBitMapBuffer = NullBitMapBuffers[NullBitMapBuffers.Count - 1]; int nullBitMapAllocatable = (int)(((uint)allocatable + 7) / 8); lastNullBitMapBuffer.EnsureCapacity(nullBitMapAllocatable); _modifyNullCountWhileIndexing = false; for (long i = Length - count; i < Length; i++) { SetValidityBit(i, value.HasValue ? true : false); } _modifyNullCountWhileIndexing = true; lastNullBitMapBuffer.Length += nullBitMapAllocatable; count -= allocatable; } }
// This is an immutable column, however this method exists to support Clone(). Keep this method private private void Append(string value) { if (_dataBuffers.Count == 0) { _dataBuffers.Add(new DataFrameBuffer <byte>()); _nullBitMapBuffers.Add(new DataFrameBuffer <byte>()); _offsetsBuffers.Add(new DataFrameBuffer <int>()); } DataFrameBuffer <int> mutableOffsetsBuffer = _offsetsBuffers[_offsetsBuffers.Count - 1] as DataFrameBuffer <int>; if (mutableOffsetsBuffer.Length == 0) { mutableOffsetsBuffer.Append(0); } Length++; if (value == null) { mutableOffsetsBuffer.Append(mutableOffsetsBuffer[mutableOffsetsBuffer.Length - 1]); } else { byte[] bytes = Encoding.UTF8.GetBytes(value); DataFrameBuffer <byte> mutableDataBuffer = _dataBuffers[_dataBuffers.Count - 1] as DataFrameBuffer <byte>; if (mutableDataBuffer.Length == ReadOnlyDataFrameBuffer <byte> .MaxCapacity) { mutableDataBuffer = new DataFrameBuffer <byte>(); _dataBuffers.Add(mutableDataBuffer); _nullBitMapBuffers.Add(new DataFrameBuffer <byte>()); var offsetBuffer = new DataFrameBuffer <int>(); _offsetsBuffers.Add(offsetBuffer); offsetBuffer.Append(0); } mutableDataBuffer.EnsureCapacity(bytes.Length); bytes.AsMemory().CopyTo(mutableDataBuffer.Memory.Slice(mutableDataBuffer.Length)); mutableDataBuffer.Length += bytes.Length; mutableOffsetsBuffer.Append(mutableOffsetsBuffer[mutableOffsetsBuffer.Length - 1] + bytes.Length); } SetValidityBit(Length - 1, value == null ? true : false); }
// This is an immutable column, however this method exists to support Clone(). Keep this method private // Appending a default string is equivalent to appending null. It increases the NullCount and sets a null bitmap bit // Appending an empty string is valid. It does NOT affect the NullCount. It instead adds a new offset entry private void Append(ReadOnlySpan <byte> value) { if (_dataBuffers.Count == 0) { _dataBuffers.Add(new DataFrameBuffer <byte>()); _nullBitMapBuffers.Add(new DataFrameBuffer <byte>()); _offsetsBuffers.Add(new DataFrameBuffer <int>()); } DataFrameBuffer <int> mutableOffsetsBuffer = (DataFrameBuffer <int>)_offsetsBuffers[_offsetsBuffers.Count - 1]; if (mutableOffsetsBuffer.Length == 0) { mutableOffsetsBuffer.Append(0); } Length++; if (value == default) { mutableOffsetsBuffer.Append(mutableOffsetsBuffer[mutableOffsetsBuffer.Length - 1]); } else { DataFrameBuffer <byte> mutableDataBuffer = (DataFrameBuffer <byte>)_dataBuffers[_dataBuffers.Count - 1]; if (mutableDataBuffer.Length == ReadOnlyDataFrameBuffer <byte> .MaxCapacity) { mutableDataBuffer = new DataFrameBuffer <byte>(); _dataBuffers.Add(mutableDataBuffer); _nullBitMapBuffers.Add(new DataFrameBuffer <byte>()); mutableOffsetsBuffer = new DataFrameBuffer <int>(); _offsetsBuffers.Add(mutableOffsetsBuffer); mutableOffsetsBuffer.Append(0); } mutableDataBuffer.EnsureCapacity(value.Length); value.CopyTo(mutableDataBuffer.RawSpan.Slice(mutableDataBuffer.Length)); mutableDataBuffer.Length += value.Length; mutableOffsetsBuffer.Append(mutableOffsetsBuffer[mutableOffsetsBuffer.Length - 1] + value.Length); } SetValidityBit(Length - 1, value == null ? true : false); }
public PrimitiveColumnContainer(IEnumerable <T> values) { values = values ?? throw new ArgumentNullException(nameof(values)); if (Buffers.Count == 0) { Buffers.Add(new DataFrameBuffer <T>()); NullBitMapBuffers.Add(new DataFrameBuffer <byte>()); } DataFrameBuffer <T> curBuffer = (DataFrameBuffer <T>)Buffers[Buffers.Count - 1]; foreach (T value in values) { if (curBuffer.Length == ReadOnlyDataFrameBuffer <T> .MaxCapacity) { curBuffer = new DataFrameBuffer <T>(); Buffers.Add(curBuffer); NullBitMapBuffers.Add(new DataFrameBuffer <byte>()); } curBuffer.Append(value); SetValidityBit(Length, true); Length++; } }