public void SetColumn(int columnIndex, BaseColumn column) { column = column ?? throw new ArgumentNullException(nameof(column)); if ((uint)columnIndex >= ColumnCount) { throw new ArgumentOutOfRangeException(nameof(columnIndex)); } if (RowCount > 0 && column.Length != RowCount) { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } if (_columnNameToIndexDictionary.ContainsKey(column.Name)) { throw new ArgumentException($"Table already contains a column called {column.Name}"); } _columnNameToIndexDictionary.Remove(_columnNames[columnIndex]); _columnNames[columnIndex] = column.Name; _columnNameToIndexDictionary[column.Name] = columnIndex; _columns[columnIndex] = column; }
public void InsertColumn(int columnIndex, BaseColumn column, DataFrame parent) { column = column ?? throw new ArgumentNullException(nameof(column)); if ((uint)columnIndex > _columns.Count) { throw new ArgumentOutOfRangeException(nameof(columnIndex)); } if (RowCount > 0 && column.Length != RowCount) { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } if (_columnNameToIndexDictionary.ContainsKey(column.Name)) { throw new ArgumentException($"Table already contains a column called {column.Name}"); } RowCount = column.Length; _columnNames.Insert(columnIndex, column.Name); _columnNameToIndexDictionary[column.Name] = columnIndex; _columns.Insert(columnIndex, column); ColumnCount++; }
public override BaseColumn Clone(BaseColumn mapIndices = null, bool invertMapIndices = false, long numberOfNullsToAppend = 0) { PrimitiveColumn <T> clone; if (!(mapIndices is null)) { if (mapIndices.DataType != typeof(long) && mapIndices.DataType != typeof(bool)) { throw new ArgumentException(String.Format(Strings.MultipleMismatchedValueType, typeof(long), typeof(bool)), nameof(mapIndices)); } if (mapIndices.Length > Length) { throw new ArgumentException(Strings.MapIndicesExceedsColumnLenth, nameof(mapIndices)); } if (mapIndices.DataType == typeof(long)) { clone = Clone(mapIndices as PrimitiveColumn <long>, invertMapIndices); } else { clone = Clone(mapIndices as PrimitiveColumn <bool>); } }
public IEnumerable <RecordBatch> AsArrowRecordBatches() { Apache.Arrow.Schema.Builder schemaBuilder = new Apache.Arrow.Schema.Builder(); int columnCount = ColumnCount; for (int i = 0; i < columnCount; i++) { BaseColumn column = Column(i); Field field = column.Field(); schemaBuilder.Field(field); } Schema schema = schemaBuilder.Build(); List <Apache.Arrow.Array> arrays = new List <Apache.Arrow.Array>(); int recordBatchLength = Int32.MaxValue; int numberOfRowsInThisRecordBatch = (int)Math.Min(recordBatchLength, RowCount); long numberOfRowsProcessed = 0; // Sometimes .NET for Spark passes in DataFrames with no rows. In those cases, we just return a RecordBatch with the right Schema and no rows do { for (int i = 0; i < columnCount; i++) { BaseColumn column = Column(i); numberOfRowsInThisRecordBatch = (int)Math.Min(numberOfRowsInThisRecordBatch, column.MaxRecordBatchLength(numberOfRowsProcessed)); } for (int i = 0; i < columnCount; i++) { BaseColumn column = Column(i); arrays.Add(column.AsArrowArray(numberOfRowsProcessed, numberOfRowsInThisRecordBatch)); } numberOfRowsProcessed += numberOfRowsInThisRecordBatch; yield return(new RecordBatch(schema, arrays, numberOfRowsInThisRecordBatch)); } while (numberOfRowsProcessed < RowCount); }
public DataFrameTable(BaseColumn column) : this(new List <BaseColumn> { column }) { }
public DataFrame(RecordBatch recordBatch) { _table = new DataFrameTable(); Apache.Arrow.Schema arrowSchema = recordBatch.Schema; int fieldIndex = 0; IEnumerable <IArrowArray> arrowArrays = recordBatch.Arrays; foreach (IArrowArray arrowArray in arrowArrays) { Field field = arrowSchema.GetFieldByIndex(fieldIndex); IArrowType fieldType = field.DataType; BaseColumn dataFrameColumn = null; switch (fieldType.TypeId) { case ArrowTypeId.Boolean: BooleanArray arrowBooleanArray = (BooleanArray)arrowArray; ReadOnlyMemory <byte> valueBuffer = arrowBooleanArray.ValueBuffer.Memory; ReadOnlyMemory <byte> nullBitMapBuffer = arrowBooleanArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveColumn <bool>(field.Name, valueBuffer, nullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Double: PrimitiveArray <double> arrowDoubleArray = (PrimitiveArray <double>)arrowArray; ReadOnlyMemory <byte> doubleValueBuffer = arrowDoubleArray.ValueBuffer.Memory; ReadOnlyMemory <byte> doubleNullBitMapBuffer = arrowDoubleArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveColumn <double>(field.Name, doubleValueBuffer, doubleNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Float: PrimitiveArray <float> arrowFloatArray = (PrimitiveArray <float>)arrowArray; ReadOnlyMemory <byte> floatValueBuffer = arrowFloatArray.ValueBuffer.Memory; ReadOnlyMemory <byte> floatNullBitMapBuffer = arrowFloatArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveColumn <float>(field.Name, floatValueBuffer, floatNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int8: PrimitiveArray <sbyte> arrowsbyteArray = (PrimitiveArray <sbyte>)arrowArray; ReadOnlyMemory <byte> sbyteValueBuffer = arrowsbyteArray.ValueBuffer.Memory; ReadOnlyMemory <byte> sbyteNullBitMapBuffer = arrowsbyteArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveColumn <sbyte>(field.Name, sbyteValueBuffer, sbyteNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int16: PrimitiveArray <short> arrowshortArray = (PrimitiveArray <short>)arrowArray; ReadOnlyMemory <byte> shortValueBuffer = arrowshortArray.ValueBuffer.Memory; ReadOnlyMemory <byte> shortNullBitMapBuffer = arrowshortArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveColumn <short>(field.Name, shortValueBuffer, shortNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int32: PrimitiveArray <int> arrowIntArray = (PrimitiveArray <int>)arrowArray; ReadOnlyMemory <byte> intValueBuffer = arrowIntArray.ValueBuffer.Memory; ReadOnlyMemory <byte> intNullBitMapBuffer = arrowIntArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveColumn <int>(field.Name, intValueBuffer, intNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int64: PrimitiveArray <long> arrowLongArray = (PrimitiveArray <long>)arrowArray; ReadOnlyMemory <byte> longValueBuffer = arrowLongArray.ValueBuffer.Memory; ReadOnlyMemory <byte> longNullBitMapBuffer = arrowLongArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveColumn <long>(field.Name, longValueBuffer, longNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.String: StringArray stringArray = (StringArray)arrowArray; ReadOnlyMemory <byte> dataMemory = stringArray.ValueBuffer.Memory; ReadOnlyMemory <byte> offsetsMemory = stringArray.ValueOffsetsBuffer.Memory; ReadOnlyMemory <byte> nullMemory = stringArray.NullBitmapBuffer.Memory; dataFrameColumn = new ArrowStringColumn(field.Name, dataMemory, offsetsMemory, nullMemory, stringArray.Length, stringArray.NullCount); break; case ArrowTypeId.UInt8: PrimitiveArray <byte> arrowbyteArray = (PrimitiveArray <byte>)arrowArray; ReadOnlyMemory <byte> byteValueBuffer = arrowbyteArray.ValueBuffer.Memory; ReadOnlyMemory <byte> byteNullBitMapBuffer = arrowbyteArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveColumn <byte>(field.Name, byteValueBuffer, byteNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt16: PrimitiveArray <ushort> arrowUshortArray = (PrimitiveArray <ushort>)arrowArray; ReadOnlyMemory <byte> ushortValueBuffer = arrowUshortArray.ValueBuffer.Memory; ReadOnlyMemory <byte> ushortNullBitMapBuffer = arrowUshortArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveColumn <ushort>(field.Name, ushortValueBuffer, ushortNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt32: PrimitiveArray <uint> arrowUintArray = (PrimitiveArray <uint>)arrowArray; ReadOnlyMemory <byte> uintValueBuffer = arrowUintArray.ValueBuffer.Memory; ReadOnlyMemory <byte> uintNullBitMapBuffer = arrowUintArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveColumn <uint>(field.Name, uintValueBuffer, uintNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt64: PrimitiveArray <ulong> arrowUlongArray = (PrimitiveArray <ulong>)arrowArray; ReadOnlyMemory <byte> ulongValueBuffer = arrowUlongArray.ValueBuffer.Memory; ReadOnlyMemory <byte> ulongNullBitMapBuffer = arrowUlongArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveColumn <ulong>(field.Name, ulongValueBuffer, ulongNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Decimal: case ArrowTypeId.Binary: case ArrowTypeId.Date32: case ArrowTypeId.Date64: case ArrowTypeId.Dictionary: case ArrowTypeId.FixedSizedBinary: case ArrowTypeId.HalfFloat: case ArrowTypeId.Interval: case ArrowTypeId.List: case ArrowTypeId.Map: case ArrowTypeId.Null: case ArrowTypeId.Struct: case ArrowTypeId.Time32: case ArrowTypeId.Time64: default: throw new NotImplementedException(nameof(fieldType.Name)); } _table.InsertColumn(ColumnCount, dataFrameColumn); fieldIndex++; } }
public virtual PrimitiveColumn <bool> LessThan(BaseColumn column) { throw new NotImplementedException(); }
public void InsertColumn(int columnIndex, BaseColumn column) { _table.InsertColumn(columnIndex, column, this); OnColumnsChanged(); }
// TODO: Merge API with an "On" parameter that merges on a column common to 2 dataframes /// <summary> /// Merge DataFrames with a database style join /// </summary> /// <param name="other"></param> /// <param name="leftJoinColumn"></param> /// <param name="rightJoinColumn"></param> /// <param name="leftSuffix"></param> /// <param name="rightSuffix"></param> /// <param name="joinAlgorithm"></param> /// <returns></returns> public DataFrame Merge <TKey>(DataFrame other, string leftJoinColumn, string rightJoinColumn, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left) { // A simple hash join DataFrame ret = new DataFrame(); DataFrame leftDataFrame = this; DataFrame rightDataFrame = other; // The final table size is not known until runtime long rowNumber = 0; PrimitiveColumn <long> leftRowIndices = new PrimitiveColumn <long>("LeftIndices"); PrimitiveColumn <long> rightRowIndices = new PrimitiveColumn <long>("RightIndices"); if (joinAlgorithm == JoinAlgorithm.Left) { // First hash other dataframe on the rightJoinColumn BaseColumn otherColumn = other[rightJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>(); // Go over the records in this dataframe and match with the dictionary BaseColumn thisColumn = this[leftJoinColumn]; for (long i = 0; i < thisColumn.Length; i++) { var thisColumnValue = thisColumn[i]; TKey thisColumnValueOrDefault = (TKey)(thisColumnValue == null ? default(TKey) : thisColumnValue); if (multimap.TryGetValue(thisColumnValueOrDefault, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (thisColumnValue == null) { // Match only with nulls in otherColumn if (otherColumn[row] == null) { leftRowIndices.Append(i); rightRowIndices.Append(row); } } else { // Cannot match nulls in otherColumn if (otherColumn[row] != null) { leftRowIndices.Append(i); rightRowIndices.Append(row); } } } } else { leftRowIndices.Append(i); rightRowIndices.Append(null); } } } else if (joinAlgorithm == JoinAlgorithm.Right) { BaseColumn thisColumn = this[leftJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = thisColumn.GroupColumnValues <TKey>(); BaseColumn otherColumn = other[rightJoinColumn]; for (long i = 0; i < otherColumn.Length; i++) { var otherColumnValue = otherColumn[i]; TKey otherColumnValueOrDefault = (TKey)(otherColumnValue == null ? default(TKey) : otherColumnValue); if (multimap.TryGetValue(otherColumnValueOrDefault, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (otherColumnValue == null) { if (thisColumn[row] == null) { leftRowIndices.Append(row); rightRowIndices.Append(i); } } else { if (thisColumn[row] != null) { leftRowIndices.Append(row); rightRowIndices.Append(i); } } } } else { leftRowIndices.Append(null); rightRowIndices.Append(i); } } } else if (joinAlgorithm == JoinAlgorithm.Inner) { // Hash the column with the smaller RowCount long leftRowCount = RowCount; long rightRowCount = other.RowCount; DataFrame longerDataFrame = leftRowCount < rightRowCount ? other : this; DataFrame shorterDataFrame = ReferenceEquals(longerDataFrame, this) ? other : this; BaseColumn hashColumn = (leftRowCount < rightRowCount) ? this[leftJoinColumn] : other[rightJoinColumn]; BaseColumn otherColumn = ReferenceEquals(hashColumn, this[leftJoinColumn]) ? other[rightJoinColumn] : this[leftJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = hashColumn.GroupColumnValues <TKey>(); for (long i = 0; i < otherColumn.Length; i++) { var otherColumnValue = otherColumn[i]; TKey otherColumnValueOrDefault = (TKey)(otherColumnValue == null ? default(TKey) : otherColumnValue); if (multimap.TryGetValue(otherColumnValueOrDefault, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (otherColumnValue == null) { if (hashColumn[row] == null) { leftRowIndices.Append(row); rightRowIndices.Append(i); } } else { if (hashColumn[row] != null) { leftRowIndices.Append(row); rightRowIndices.Append(i); } } } } } leftDataFrame = shorterDataFrame; rightDataFrame = longerDataFrame; } else if (joinAlgorithm == JoinAlgorithm.FullOuter) { BaseColumn otherColumn = other[rightJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>(); Dictionary <TKey, long> intersection = new Dictionary <TKey, long>(EqualityComparer <TKey> .Default); // Go over the records in this dataframe and match with the dictionary BaseColumn thisColumn = this[leftJoinColumn]; for (long i = 0; i < thisColumn.Length; i++) { var thisColumnValue = thisColumn[i]; TKey thisColumnValueOrDefault = (TKey)(thisColumnValue == null ? default(TKey) : thisColumnValue); if (multimap.TryGetValue(thisColumnValueOrDefault, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (thisColumnValue == null) { // Has to match only with nulls in otherColumn if (otherColumn[row] == null) { leftRowIndices.Append(i); rightRowIndices.Append(row); if (!intersection.ContainsKey(thisColumnValueOrDefault)) { intersection.Add(thisColumnValueOrDefault, rowNumber); } } } else { // Cannot match to nulls in otherColumn if (otherColumn[row] != null) { leftRowIndices.Append(i); rightRowIndices.Append(row); if (!intersection.ContainsKey(thisColumnValueOrDefault)) { intersection.Add(thisColumnValueOrDefault, rowNumber); } } } } } else { leftRowIndices.Append(i); rightRowIndices.Append(null); } } for (long i = 0; i < otherColumn.Length; i++) { TKey value = (TKey)(otherColumn[i] ?? default(TKey)); if (!intersection.ContainsKey(value)) { leftRowIndices.Append(null); rightRowIndices.Append(i); } } } else { throw new NotImplementedException(nameof(joinAlgorithm)); } for (int i = 0; i < leftDataFrame.ColumnCount; i++) { ret.InsertColumn(i, leftDataFrame.Column(i).Clone(leftRowIndices)); } for (int i = 0; i < rightDataFrame.ColumnCount; i++) { BaseColumn column = rightDataFrame.Column(i).Clone(rightRowIndices); SetSuffixForDuplicatedColumnNames(ret, column, leftSuffix, rightSuffix); ret.InsertColumn(ret.ColumnCount, column); } return(ret); }
public virtual BaseColumn Xor(BaseColumn column) { throw new NotImplementedException(); }
public virtual BaseColumn GreaterThan(BaseColumn column) { throw new NotImplementedException(); }
public virtual BaseColumn LessThanOrEqual(BaseColumn column) { throw new NotImplementedException(); }
private Delegate CreateGetterDelegate(int col) { BaseColumn column = _dataFrame.Column(col); return(column.GetDataViewGetter(this)); }
// TODO: Merge API with an "On" parameter that merges on a column common to 2 dataframes /// <summary> /// Merge DataFrames with a database style join /// </summary> /// <param name="other"></param> /// <param name="leftJoinColumn"></param> /// <param name="rightJoinColumn"></param> /// <param name="leftSuffix"></param> /// <param name="rightSuffix"></param> /// <param name="joinAlgorithm"></param> /// <returns></returns> public DataFrame Merge <TKey>(DataFrame other, string leftJoinColumn, string rightJoinColumn, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left) { // A simple hash join DataFrame ret = new DataFrame(); PrimitiveColumn <long> emptyMap = new PrimitiveColumn <long>("Empty"); for (int i = 0; i < ColumnCount; i++) { // Create empty columns BaseColumn column = Column(i).Clone(emptyMap); ret.InsertColumn(ret.ColumnCount, column); } for (int i = 0; i < other.ColumnCount; i++) { // Create empty columns BaseColumn column = other.Column(i).Clone(emptyMap); SetSuffixForDuplicatedColumnNames(ret, column, leftSuffix, rightSuffix); ret.InsertColumn(ret.ColumnCount, column); } // The final table size is not known until runtime long rowNumber = 0; if (joinAlgorithm == JoinAlgorithm.Left) { // First hash other dataframe on the rightJoinColumn BaseColumn otherColumn = other[rightJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>(); // Go over the records in this dataframe and match with the dictionary BaseColumn thisColumn = this[leftJoinColumn]; for (int c = 0; c < ret.ColumnCount; c++) { ret.Column(c).Resize(thisColumn.Length); } for (long i = 0; i < thisColumn.Length; i++) { if (rowNumber >= thisColumn.Length) { for (int c = 0; c < ret.ColumnCount; c++) { ret.Column(c).Resize(rowNumber + 1); } } TKey value = (TKey)(thisColumn[i] ?? default(TKey)); if (multimap.TryGetValue(value, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (thisColumn[i] == null) { // Match only with nulls in otherColumn if (otherColumn[row] == null) { AppendForMerge(ret, rowNumber++, this, other, i, row); } } else { // Cannot match nulls in otherColumn if (otherColumn[row] != null) { AppendForMerge(ret, rowNumber++, this, other, i, row); } } } } else { AppendForMerge(ret, rowNumber++, this, other, i, -1); } } ret._table.RowCount = rowNumber; } else if (joinAlgorithm == JoinAlgorithm.Right) { BaseColumn thisColumn = this[leftJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = thisColumn.GroupColumnValues <TKey>(); BaseColumn otherColumn = other[rightJoinColumn]; for (int c = 0; c < ret.ColumnCount; c++) { ret.Column(c).Resize(otherColumn.Length); } for (long i = 0; i < otherColumn.Length; i++) { if (rowNumber >= otherColumn.Length) { for (int c = 0; c < ret.ColumnCount; c++) { ret.Column(c).Resize(rowNumber + 1); } } TKey value = (TKey)(otherColumn[i] ?? default(TKey)); if (multimap.TryGetValue(value, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (otherColumn[i] == null) { if (thisColumn[row] == null) { AppendForMerge(ret, rowNumber++, this, other, row, i); } } else { if (thisColumn[row] != null) { AppendForMerge(ret, rowNumber++, this, other, row, i); } } } } else { AppendForMerge(ret, rowNumber++, this, other, -1, i); } } ret._table.RowCount = rowNumber; } else if (joinAlgorithm == JoinAlgorithm.Inner) { // Hash the column with the smaller RowCount long leftRowCount = RowCount; long rightRowCount = other.RowCount; DataFrame longerDataFrame = leftRowCount < rightRowCount ? other : this; DataFrame shorterDataFrame = ReferenceEquals(longerDataFrame, this) ? other : this; BaseColumn hashColumn = (leftRowCount < rightRowCount) ? this[leftJoinColumn] : other[rightJoinColumn]; BaseColumn otherColumn = ReferenceEquals(hashColumn, this[leftJoinColumn]) ? other[rightJoinColumn] : this[leftJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = hashColumn.GroupColumnValues <TKey>(); for (int c = 0; c < ret.ColumnCount; c++) { ret.Column(c).Resize(1); } for (long i = 0; i < otherColumn.Length; i++) { if (rowNumber >= ret.Column(0).Length) { for (int c = 0; c < ret.ColumnCount; c++) { ret.Column(c).Resize(rowNumber + 1); } } TKey value = (TKey)(otherColumn[i] ?? default(TKey)); if (multimap.TryGetValue(value, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (otherColumn[i] == null) { if (hashColumn[row] == null) { AppendForMerge(ret, rowNumber++, this, other, ReferenceEquals(this, shorterDataFrame) ? row : i, ReferenceEquals(this, shorterDataFrame) ? i : row); } } else { if (hashColumn[row] != null) { AppendForMerge(ret, rowNumber++, this, other, ReferenceEquals(this, shorterDataFrame) ? row : i, ReferenceEquals(this, shorterDataFrame) ? i : row); } } } } } ret._table.RowCount = rowNumber; } else if (joinAlgorithm == JoinAlgorithm.FullOuter) { BaseColumn otherColumn = other[rightJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>(); Dictionary <TKey, long> intersection = new Dictionary <TKey, long>(EqualityComparer <TKey> .Default); // Go over the records in this dataframe and match with the dictionary BaseColumn thisColumn = this[leftJoinColumn]; for (int c = 0; c < ret.ColumnCount; c++) { ret.Column(c).Resize(thisColumn.Length + 1); } for (long i = 0; i < thisColumn.Length; i++) { if (rowNumber >= thisColumn.Length) { for (int c = 0; c < ret.ColumnCount; c++) { ret.Column(c).Resize(rowNumber + 1); } } TKey value = (TKey)(thisColumn[i] ?? default(TKey)); if (multimap.TryGetValue(value, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (thisColumn[i] == null) { // Has to match only with nulls in otherColumn if (otherColumn[row] == null) { AppendForMerge(ret, rowNumber++, this, other, i, row); if (!intersection.ContainsKey(value)) { intersection.Add(value, rowNumber); } } } else { // Cannot match to nulls in otherColumn if (otherColumn[row] != null) { AppendForMerge(ret, rowNumber++, this, other, i, row); if (!intersection.ContainsKey(value)) { intersection.Add(value, rowNumber); } } } } } else { AppendForMerge(ret, rowNumber++, this, other, i, -1); } } for (long i = 0; i < otherColumn.Length; i++) { if (rowNumber >= ret.Column(0).Length) { for (int c = 0; c < ret.ColumnCount; c++) { ret.Column(c).Resize(rowNumber + 1); } } TKey value = (TKey)(otherColumn[i] ?? default(TKey)); if (!intersection.ContainsKey(value)) { if (rowNumber >= otherColumn.Length) { for (int c = 0; c < ret.ColumnCount; c++) { ret.Column(c).Resize(rowNumber + 1); } } AppendForMerge(ret, rowNumber++, this, other, -1, i); } } ret._table.RowCount = rowNumber; } return(ret); }
public virtual BaseColumn Xor(BaseColumn column, bool inPlace = false) { throw new NotImplementedException(); }
public void InsertColumn(int columnIndex, BaseColumn column) => _table.InsertColumn(columnIndex, column);
public void SetColumn(int columnIndex, BaseColumn column) => _table.SetColumn(columnIndex, column);
public DataFrame Join(DataFrame other, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left) { DataFrame ret = new DataFrame(); if (joinAlgorithm == JoinAlgorithm.Left) { for (int i = 0; i < ColumnCount; i++) { BaseColumn newColumn = Column(i).Clone(); ret.InsertColumn(ret.ColumnCount, newColumn); } long minLength = Math.Min(RowCount, other.RowCount); PrimitiveColumn <long> mapIndices = new PrimitiveColumn <long>("mapIndices", minLength); for (long i = 0; i < minLength; i++) { mapIndices[i] = i; } for (int i = 0; i < other.ColumnCount; i++) { BaseColumn newColumn; if (other.RowCount < RowCount) { newColumn = other.Column(i).Clone(numberOfNullsToAppend: RowCount - other.RowCount); } else { newColumn = other.Column(i).Clone(mapIndices); } SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix); ret.InsertColumn(ret.ColumnCount, newColumn); } } else if (joinAlgorithm == JoinAlgorithm.Right) { long minLength = Math.Min(RowCount, other.RowCount); PrimitiveColumn <long> mapIndices = new PrimitiveColumn <long>("mapIndices", minLength); for (long i = 0; i < minLength; i++) { mapIndices[i] = i; } for (int i = 0; i < ColumnCount; i++) { BaseColumn newColumn; if (RowCount < other.RowCount) { newColumn = Column(i).Clone(numberOfNullsToAppend: other.RowCount - RowCount); } else { newColumn = Column(i).Clone(mapIndices); } ret.InsertColumn(ret.ColumnCount, newColumn); } for (int i = 0; i < other.ColumnCount; i++) { BaseColumn newColumn = other.Column(i).Clone(); SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix); ret.InsertColumn(ret.ColumnCount, newColumn); } } else if (joinAlgorithm == JoinAlgorithm.FullOuter) { long newRowCount = Math.Max(RowCount, other.RowCount); long numberOfNulls = newRowCount - RowCount; for (int i = 0; i < ColumnCount; i++) { BaseColumn newColumn = Column(i).Clone(numberOfNullsToAppend: numberOfNulls); ret.InsertColumn(ret.ColumnCount, newColumn); } numberOfNulls = newRowCount - other.RowCount; for (int i = 0; i < other.ColumnCount; i++) { BaseColumn newColumn = other.Column(i).Clone(numberOfNullsToAppend: numberOfNulls); SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix); ret.InsertColumn(ret.ColumnCount, newColumn); } } else if (joinAlgorithm == JoinAlgorithm.Inner) { long newRowCount = Math.Min(RowCount, other.RowCount); PrimitiveColumn <long> mapIndices = new PrimitiveColumn <long>("mapIndices", newRowCount); for (long i = 0; i < newRowCount; i++) { mapIndices[i] = i; } for (int i = 0; i < ColumnCount; i++) { BaseColumn newColumn = Column(i).Clone(mapIndices); ret.InsertColumn(ret.ColumnCount, newColumn); } for (int i = 0; i < other.ColumnCount; i++) { BaseColumn newColumn = other.Column(i).Clone(mapIndices); SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix); ret.InsertColumn(ret.ColumnCount, newColumn); } } return(ret); }
public void SetColumn(int columnIndex, BaseColumn column) { _table.SetColumn(columnIndex, column); OnColumnsChanged(); }
public virtual PrimitiveColumn <bool> GreaterThanOrEqual(BaseColumn column) { throw new NotImplementedException(); }