public DataFrame ElementwiseGreaterThanOrEqual <T>(IReadOnlyList <T> values) where T : unmanaged { if (values.Count != Columns.Count) { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(values)); } DataFrame retDataFrame = new DataFrame(); for (int i = 0; i < Columns.Count; i++) { DataFrameColumn baseColumn = _columnCollection[i]; DataFrameColumn newColumn = baseColumn.ElementwiseGreaterThanOrEqual(values[i]); retDataFrame.Columns.Insert(i, newColumn); } return(retDataFrame); }
/// <summary> /// Performs a reversed element-wise boolean Xor on each column /// </summary> public DataFrame ReverseXor(bool value, bool inPlace = false) { DataFrame retDataFrame = inPlace ? this : new DataFrame(); for (int i = 0; i < Columns.Count; i++) { DataFrameColumn baseColumn = _columnCollection[i]; DataFrameColumn newColumn = baseColumn.ReverseXor(value, inPlace); if (inPlace) { retDataFrame.Columns[i] = newColumn; } else { retDataFrame.Columns.Insert(i, newColumn); } } return(retDataFrame); }
/// <summary> /// Performs an element-wise subtraction on each column /// </summary> public DataFrame Subtract <T>(T value, bool inPlace = false) where T : unmanaged { DataFrame retDataFrame = inPlace ? this : new DataFrame(); for (int i = 0; i < Columns.Count; i++) { DataFrameColumn baseColumn = _columnCollection[i]; DataFrameColumn newColumn = baseColumn.Subtract(value, inPlace); if (inPlace) { retDataFrame.Columns[i] = newColumn; } else { retDataFrame.Columns.Insert(i, newColumn); } } return(retDataFrame); }
public DataFrame Xor(IReadOnlyList <bool> values, bool inPlace = false) { if (values.Count != Columns.Count) { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(values)); } DataFrame retDataFrame = inPlace ? this : new DataFrame(); for (int i = 0; i < Columns.Count; i++) { DataFrameColumn baseColumn = _columnCollection[i]; DataFrameColumn newColumn = baseColumn.Xor(values[i], inPlace); if (inPlace) { retDataFrame.Columns[i] = newColumn; } else { retDataFrame.Columns.Insert(i, newColumn); } } return(retDataFrame); }
/// <summary> /// Returns an <see cref="IEnumerable{RecordBatch}"/> without copying data /// </summary> public IEnumerable <RecordBatch> ToArrowRecordBatches() { Apache.Arrow.Schema.Builder schemaBuilder = new Apache.Arrow.Schema.Builder(); int columnCount = Columns.Count; for (int i = 0; i < columnCount; i++) { DataFrameColumn column = Columns[i]; Field field = column.GetArrowField(); schemaBuilder.Field(field); } Schema schema = schemaBuilder.Build(); List <Apache.Arrow.Array> arrays = new List <Apache.Arrow.Array>(); int recordBatchLength = Int32.MaxValue; int numberOfRowsInThisRecordBatch = (int)Math.Min(recordBatchLength, RowCount); long numberOfRowsProcessed = 0; // Sometimes .NET for Spark passes in DataFrames with no rows. In those cases, we just return a RecordBatch with the right Schema and no rows do { for (int i = 0; i < columnCount; i++) { DataFrameColumn column = Columns[i]; numberOfRowsInThisRecordBatch = (int)Math.Min(numberOfRowsInThisRecordBatch, column.GetMaxRecordBatchLength(numberOfRowsProcessed)); } for (int i = 0; i < columnCount; i++) { DataFrameColumn column = Columns[i]; arrays.Add(column.ToArrowArray(numberOfRowsProcessed, numberOfRowsInThisRecordBatch)); } numberOfRowsProcessed += numberOfRowsInThisRecordBatch; yield return(new RecordBatch(schema, arrays, numberOfRowsInThisRecordBatch)); } while (numberOfRowsProcessed < RowCount); }
private static Dictionary <long, ICollection <long> > GetOccurences(DataFrame retainedDataFrame, DataFrame supplementaryDataFrame, string[] retainedJoinColumnNames, string[] supplemetaryJoinColumnNames, out HashSet <long> supplementaryJoinColumnsNullIndices) { supplementaryJoinColumnsNullIndices = new HashSet <long>(); // Get occurrences of values in columns used for join in the retained and supplementary dataframes Dictionary <long, ICollection <long> > occurrences = null; Dictionary <long, long> retainedIndicesReverseMapping = null; for (int colNameIndex = 0; colNameIndex < retainedJoinColumnNames.Length; colNameIndex++) { DataFrameColumn shrinkedRetainedColumn = retainedDataFrame.Columns[retainedJoinColumnNames[colNameIndex]]; // Shrink retained column by row occurrences from previous step if (occurrences != null) { // Only rows with occurences from previose step should go for futher processing var shrinkedRetainedIndices = occurrences.Keys.ToArray(); // Create reverse mapping of index of the row in the shrinked column to the index of this row in the original dataframe (new index -> original index) var newRetainedIndicesReverseMapping = new Dictionary <long, long>(shrinkedRetainedIndices.Length); for (int i = 0; i < shrinkedRetainedIndices.Length; i++) { // Store reverse mapping to restore original dataframe indices from indices in shrinked row var originalIndex = shrinkedRetainedIndices[i]; newRetainedIndicesReverseMapping.Add(i, originalIndex); } retainedIndicesReverseMapping = newRetainedIndicesReverseMapping; var indices = new Int64DataFrameColumn("Indices", shrinkedRetainedIndices); shrinkedRetainedColumn = shrinkedRetainedColumn.Clone(indices); } DataFrameColumn supplementaryColumn = supplementaryDataFrame.Columns[supplemetaryJoinColumnNames[colNameIndex]]; // Find occurrenses on current step (join column) var newOccurrences = shrinkedRetainedColumn.GetGroupedOccurrences(supplementaryColumn, out HashSet <long> supplementaryColumnNullIndices); // Convert indices from in key from local (shrinked row) to indices in original dataframe if (retainedIndicesReverseMapping != null) { newOccurrences = newOccurrences.ToDictionary(kvp => retainedIndicesReverseMapping[kvp.Key], kvp => kvp.Value); } supplementaryJoinColumnsNullIndices.UnionWith(supplementaryColumnNullIndices); // Shrink join result on current column by previous join columns (if any) // (we have to remove occurrences that doesn't exist in previous columns, because JOIN happens only if ALL left and right columns in JOIN are matched) if (occurrences != null) { newOccurrences = GetShrinkedOccurences(occurrences, newOccurrences); } occurrences = newOccurrences; } return(occurrences); }
public override PrimitiveDataFrameColumn <bool> ElementwiseEquals(DataFrameColumn column) { return(ElementwiseEqualsImplementation(this, column)); }
public virtual PrimitiveDataFrameColumn <bool> ElementwiseLessThan(DataFrameColumn column) { throw new NotImplementedException(); }
private static void AppendDataFrameColumnFromArrowArray(Field field, IArrowArray arrowArray, DataFrame ret, string fieldNamePrefix = "") { IArrowType fieldType = field.DataType; DataFrameColumn dataFrameColumn = null; string fieldName = fieldNamePrefix + field.Name; switch (fieldType.TypeId) { case ArrowTypeId.Boolean: BooleanArray arrowBooleanArray = (BooleanArray)arrowArray; ReadOnlyMemory <byte> valueBuffer = arrowBooleanArray.ValueBuffer.Memory; ReadOnlyMemory <byte> nullBitMapBuffer = arrowBooleanArray.NullBitmapBuffer.Memory; dataFrameColumn = new BooleanDataFrameColumn(fieldName, valueBuffer, nullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Double: PrimitiveArray <double> arrowDoubleArray = (PrimitiveArray <double>)arrowArray; ReadOnlyMemory <byte> doubleValueBuffer = arrowDoubleArray.ValueBuffer.Memory; ReadOnlyMemory <byte> doubleNullBitMapBuffer = arrowDoubleArray.NullBitmapBuffer.Memory; dataFrameColumn = new DoubleDataFrameColumn(fieldName, doubleValueBuffer, doubleNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Float: PrimitiveArray <float> arrowFloatArray = (PrimitiveArray <float>)arrowArray; ReadOnlyMemory <byte> floatValueBuffer = arrowFloatArray.ValueBuffer.Memory; ReadOnlyMemory <byte> floatNullBitMapBuffer = arrowFloatArray.NullBitmapBuffer.Memory; dataFrameColumn = new SingleDataFrameColumn(fieldName, floatValueBuffer, floatNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int8: PrimitiveArray <sbyte> arrowsbyteArray = (PrimitiveArray <sbyte>)arrowArray; ReadOnlyMemory <byte> sbyteValueBuffer = arrowsbyteArray.ValueBuffer.Memory; ReadOnlyMemory <byte> sbyteNullBitMapBuffer = arrowsbyteArray.NullBitmapBuffer.Memory; dataFrameColumn = new SByteDataFrameColumn(fieldName, sbyteValueBuffer, sbyteNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int16: PrimitiveArray <short> arrowshortArray = (PrimitiveArray <short>)arrowArray; ReadOnlyMemory <byte> shortValueBuffer = arrowshortArray.ValueBuffer.Memory; ReadOnlyMemory <byte> shortNullBitMapBuffer = arrowshortArray.NullBitmapBuffer.Memory; dataFrameColumn = new Int16DataFrameColumn(fieldName, shortValueBuffer, shortNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int32: PrimitiveArray <int> arrowIntArray = (PrimitiveArray <int>)arrowArray; ReadOnlyMemory <byte> intValueBuffer = arrowIntArray.ValueBuffer.Memory; ReadOnlyMemory <byte> intNullBitMapBuffer = arrowIntArray.NullBitmapBuffer.Memory; dataFrameColumn = new Int32DataFrameColumn(fieldName, intValueBuffer, intNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int64: PrimitiveArray <long> arrowLongArray = (PrimitiveArray <long>)arrowArray; ReadOnlyMemory <byte> longValueBuffer = arrowLongArray.ValueBuffer.Memory; ReadOnlyMemory <byte> longNullBitMapBuffer = arrowLongArray.NullBitmapBuffer.Memory; dataFrameColumn = new Int64DataFrameColumn(fieldName, longValueBuffer, longNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.String: StringArray stringArray = (StringArray)arrowArray; ReadOnlyMemory <byte> dataMemory = stringArray.ValueBuffer.Memory; ReadOnlyMemory <byte> offsetsMemory = stringArray.ValueOffsetsBuffer.Memory; ReadOnlyMemory <byte> nullMemory = stringArray.NullBitmapBuffer.Memory; dataFrameColumn = new ArrowStringDataFrameColumn(fieldName, dataMemory, offsetsMemory, nullMemory, stringArray.Length, stringArray.NullCount); break; case ArrowTypeId.UInt8: PrimitiveArray <byte> arrowbyteArray = (PrimitiveArray <byte>)arrowArray; ReadOnlyMemory <byte> byteValueBuffer = arrowbyteArray.ValueBuffer.Memory; ReadOnlyMemory <byte> byteNullBitMapBuffer = arrowbyteArray.NullBitmapBuffer.Memory; dataFrameColumn = new ByteDataFrameColumn(fieldName, byteValueBuffer, byteNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt16: PrimitiveArray <ushort> arrowUshortArray = (PrimitiveArray <ushort>)arrowArray; ReadOnlyMemory <byte> ushortValueBuffer = arrowUshortArray.ValueBuffer.Memory; ReadOnlyMemory <byte> ushortNullBitMapBuffer = arrowUshortArray.NullBitmapBuffer.Memory; dataFrameColumn = new UInt16DataFrameColumn(fieldName, ushortValueBuffer, ushortNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt32: PrimitiveArray <uint> arrowUintArray = (PrimitiveArray <uint>)arrowArray; ReadOnlyMemory <byte> uintValueBuffer = arrowUintArray.ValueBuffer.Memory; ReadOnlyMemory <byte> uintNullBitMapBuffer = arrowUintArray.NullBitmapBuffer.Memory; dataFrameColumn = new UInt32DataFrameColumn(fieldName, uintValueBuffer, uintNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt64: PrimitiveArray <ulong> arrowUlongArray = (PrimitiveArray <ulong>)arrowArray; ReadOnlyMemory <byte> ulongValueBuffer = arrowUlongArray.ValueBuffer.Memory; ReadOnlyMemory <byte> ulongNullBitMapBuffer = arrowUlongArray.NullBitmapBuffer.Memory; dataFrameColumn = new UInt64DataFrameColumn(fieldName, ulongValueBuffer, ulongNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Struct: StructArray structArray = (StructArray)arrowArray; StructType structType = (StructType)field.DataType; IEnumerator <Field> fieldsEnumerator = structType.Fields.GetEnumerator(); IEnumerator <IArrowArray> structArrayEnumerator = structArray.Fields.GetEnumerator(); while (fieldsEnumerator.MoveNext() && structArrayEnumerator.MoveNext()) { AppendDataFrameColumnFromArrowArray(fieldsEnumerator.Current, structArrayEnumerator.Current, ret, field.Name + "_"); } break; case ArrowTypeId.Decimal: case ArrowTypeId.Binary: case ArrowTypeId.Date32: case ArrowTypeId.Date64: case ArrowTypeId.Dictionary: case ArrowTypeId.FixedSizedBinary: case ArrowTypeId.HalfFloat: case ArrowTypeId.Interval: case ArrowTypeId.List: case ArrowTypeId.Map: case ArrowTypeId.Null: case ArrowTypeId.Time32: case ArrowTypeId.Time64: default: throw new NotImplementedException(nameof(fieldType.Name)); } if (dataFrameColumn != null) { ret.Columns.Insert(ret.Columns.Count, dataFrameColumn); } }
internal static PrimitiveDataFrameColumn <bool> ElementwiseEqualsImplementation(DataFrameColumn left, DataFrameColumn right) { if (left.Length != right.Length) { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(right)); } PrimitiveDataFrameColumn <bool> ret = new PrimitiveDataFrameColumn <bool>(left.Name, left.Length); for (long i = 0; i < left.Length; i++) { ret[i] = (string)left[i] == right[i]?.ToString(); } return(ret); }
public DataFrame Merge(DataFrame other, string[] leftJoinColumns, string[] rightJoinColumns, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left) { if (other == null) { throw new ArgumentNullException(nameof(other)); } //In Outer join the joined dataframe retains each row — even if no other matching row exists in supplementary dataframe. //Outer joins subdivide further into left outer joins (left dataframe is retained), right outer joins (rightdataframe is retained), in full outer both are retained PrimitiveDataFrameColumn <long> retainedRowIndices; PrimitiveDataFrameColumn <long> supplementaryRowIndices; DataFrame supplementaryDataFrame; DataFrame retainedDataFrame; bool isLeftDataFrameRetained; if (joinAlgorithm == JoinAlgorithm.Left || joinAlgorithm == JoinAlgorithm.Right) { isLeftDataFrameRetained = (joinAlgorithm == JoinAlgorithm.Left); supplementaryDataFrame = isLeftDataFrameRetained ? other : this; var supplementaryJoinColumns = isLeftDataFrameRetained ? rightJoinColumns : leftJoinColumns; retainedDataFrame = isLeftDataFrameRetained ? this : other; var retainedJoinColumns = isLeftDataFrameRetained ? leftJoinColumns : rightJoinColumns; Merge(retainedDataFrame, supplementaryDataFrame, retainedJoinColumns, supplementaryJoinColumns, out retainedRowIndices, out supplementaryRowIndices); } else if (joinAlgorithm == JoinAlgorithm.Inner) { // use as supplementary (for Hashing) the dataframe with the smaller RowCount isLeftDataFrameRetained = (Rows.Count > other.Rows.Count); supplementaryDataFrame = isLeftDataFrameRetained ? other : this; var supplementaryJoinColumns = isLeftDataFrameRetained ? rightJoinColumns : leftJoinColumns; retainedDataFrame = isLeftDataFrameRetained ? this : other; var retainedJoinColumns = isLeftDataFrameRetained ? leftJoinColumns : rightJoinColumns; Merge(retainedDataFrame, supplementaryDataFrame, retainedJoinColumns, supplementaryJoinColumns, out retainedRowIndices, out supplementaryRowIndices, true); } else if (joinAlgorithm == JoinAlgorithm.FullOuter) { //In full outer join we would like to retain data from both side, so we do it into 2 steps: one first we do LEFT JOIN and then add lost data from the RIGHT side //Step 1 //Do LEFT JOIN isLeftDataFrameRetained = true; supplementaryDataFrame = isLeftDataFrameRetained ? other : this; var supplementaryJoinColumns = isLeftDataFrameRetained ? rightJoinColumns : leftJoinColumns; retainedDataFrame = isLeftDataFrameRetained ? this : other; var retainedJoinColumns = isLeftDataFrameRetained ? leftJoinColumns : rightJoinColumns; var intersection = Merge(retainedDataFrame, supplementaryDataFrame, retainedJoinColumns, supplementaryJoinColumns, out retainedRowIndices, out supplementaryRowIndices, calculateIntersection: true); //Step 2 //Do RIGHT JOIN to retain all data from supplementary DataFrame too (take into account data intersection from the first step to avoid duplicates) for (long i = 0; i < supplementaryDataFrame.Columns.RowCount; i++) { var columns = supplementaryJoinColumns.Select(name => supplementaryDataFrame.Columns[name]).ToArray(); if (!IsAnyNullValueInColumns(columns, i)) { if (!intersection.Contains(i)) { retainedRowIndices.Append(null); supplementaryRowIndices.Append(i); } } } } else { throw new NotImplementedException(nameof(joinAlgorithm)); } DataFrame ret = new DataFrame(); //insert columns from left dataframe (this) for (int i = 0; i < this.Columns.Count; i++) { ret.Columns.Insert(i, this.Columns[i].Clone(isLeftDataFrameRetained ? retainedRowIndices : supplementaryRowIndices)); } //insert columns from right dataframe (other) for (int i = 0; i < other.Columns.Count; i++) { DataFrameColumn column = other.Columns[i].Clone(isLeftDataFrameRetained ? supplementaryRowIndices : retainedRowIndices); SetSuffixForDuplicatedColumnNames(ret, column, leftSuffix, rightSuffix); ret.Columns.Insert(ret.Columns.Count, column); } return(ret); }
private static HashSet <long> Merge(DataFrame retainedDataFrame, DataFrame supplementaryDataFrame, string[] retainedJoinColumnNames, string[] supplemetaryJoinColumnNames, out PrimitiveDataFrameColumn <long> retainedRowIndices, out PrimitiveDataFrameColumn <long> supplementaryRowIndices, bool isInner = false, bool calculateIntersection = false) { if (retainedJoinColumnNames == null) { throw new ArgumentNullException(nameof(retainedJoinColumnNames)); } if (supplemetaryJoinColumnNames == null) { throw new ArgumentNullException(nameof(supplemetaryJoinColumnNames)); } if (retainedJoinColumnNames.Length != supplemetaryJoinColumnNames.Length) { throw new ArgumentException(Strings.MismatchedArrayLengths, nameof(retainedJoinColumnNames)); } HashSet <long> intersection = calculateIntersection ? new HashSet <long>() : null; // Get occurrences of values in columns used for join in the retained and supplementary dataframes Dictionary <long, ICollection <long> > occurrences = null; Dictionary <long, long> retainedIndicesReverseMapping = null; HashSet <long> supplementaryJoinColumnsNullIndices = new HashSet <long>(); for (int colNameIndex = 0; colNameIndex < retainedJoinColumnNames.Length; colNameIndex++) { DataFrameColumn shrinkedRetainedColumn = retainedDataFrame.Columns[retainedJoinColumnNames[colNameIndex]]; //shrink retained column by row occurrences from previous step if (occurrences != null) { //only rows with occurences from previose step should go for futher processing var shrinkedRetainedIndices = occurrences.Keys.ToArray(); //create reverse mapping of index of the row in the shrinked column to the index of this row in the original dataframe (new index -> original index) var newRetainedIndicesReverseMapping = new Dictionary <long, long>(shrinkedRetainedIndices.Length); for (int i = 0; i < shrinkedRetainedIndices.Length; i++) { //store reverse mapping to restore original dataframe indices from indices in shrinked row var originalIndex = shrinkedRetainedIndices[i]; newRetainedIndicesReverseMapping.Add(i, originalIndex); } retainedIndicesReverseMapping = newRetainedIndicesReverseMapping; shrinkedRetainedColumn = shrinkedRetainedColumn.Clone(new Int64DataFrameColumn("Indices", shrinkedRetainedIndices)); } DataFrameColumn supplementaryColumn = supplementaryDataFrame.Columns[supplemetaryJoinColumnNames[colNameIndex]]; //Find occurrenses on current step (join column) var newOccurrences = shrinkedRetainedColumn.GetGroupedOccurrences(supplementaryColumn, out HashSet <long> supplementaryColumnNullIndices); //Convert indices from in key from local (shrinked row) to indices in original dataframe if (retainedIndicesReverseMapping != null) { newOccurrences = newOccurrences.ToDictionary(kvp => retainedIndicesReverseMapping[kvp.Key], kvp => kvp.Value); } supplementaryJoinColumnsNullIndices.UnionWith(supplementaryColumnNullIndices); // shrink join result on current column by previous join columns (if any) // (we have to remove occurrences that doesn't exist in previous columns, because JOIN happens only if ALL left and right columns in JOIN are matched) if (occurrences != null) { var shrinkedOccurences = new Dictionary <long, ICollection <long> >(); foreach (var kvp in newOccurrences) { var newValue = kvp.Value.Where(i => occurrences[kvp.Key].Contains(i)).ToArray(); if (newValue.Any()) { shrinkedOccurences.Add(kvp.Key, newValue); } } newOccurrences = shrinkedOccurences; } occurrences = newOccurrences; } retainedRowIndices = new Int64DataFrameColumn("RetainedIndices"); supplementaryRowIndices = new Int64DataFrameColumn("SupplementaryIndices"); //Perform Merging var retainJoinColumns = retainedJoinColumnNames.Select(name => retainedDataFrame.Columns[name]).ToArray(); for (long i = 0; i < retainedDataFrame.Columns.RowCount; i++) { if (!IsAnyNullValueInColumns(retainJoinColumns, i)) { //Get all row indexes from supplementary dataframe that sutisfy JOIN condition if (occurrences.TryGetValue(i, out ICollection <long> rowIndices)) { foreach (long supplementaryRowIndex in rowIndices) { retainedRowIndices.Append(i); supplementaryRowIndices.Append(supplementaryRowIndex); //store intersection if required if (calculateIntersection) { if (!intersection.Contains(supplementaryRowIndex)) { intersection.Add(supplementaryRowIndex); } } } } else { if (isInner) { continue; } retainedRowIndices.Append(i); supplementaryRowIndices.Append(null); } } else { foreach (long row in supplementaryJoinColumnsNullIndices) { retainedRowIndices.Append(i); supplementaryRowIndices.Append(row); } } } return(intersection); }
private Delegate CreateGetterDelegate(int col) { DataFrameColumn column = _dataFrame.Columns[col]; return(column.GetDataViewGetter(this)); }
/// <summary> /// Wraps a <see cref="DataFrame"/> around an Arrow <see cref="RecordBatch"/> without copying data /// </summary> /// <param name="recordBatch"></param> /// <returns><see cref="DataFrame"/></returns> public static DataFrame FromArrowRecordBatch(RecordBatch recordBatch) { DataFrame ret = new DataFrame(); Apache.Arrow.Schema arrowSchema = recordBatch.Schema; int fieldIndex = 0; IEnumerable <IArrowArray> arrowArrays = recordBatch.Arrays; foreach (IArrowArray arrowArray in arrowArrays) { Field field = arrowSchema.GetFieldByIndex(fieldIndex); IArrowType fieldType = field.DataType; DataFrameColumn dataFrameColumn = null; switch (fieldType.TypeId) { case ArrowTypeId.Boolean: BooleanArray arrowBooleanArray = (BooleanArray)arrowArray; ReadOnlyMemory <byte> valueBuffer = arrowBooleanArray.ValueBuffer.Memory; ReadOnlyMemory <byte> nullBitMapBuffer = arrowBooleanArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveDataFrameColumn <bool>(field.Name, valueBuffer, nullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Double: PrimitiveArray <double> arrowDoubleArray = (PrimitiveArray <double>)arrowArray; ReadOnlyMemory <byte> doubleValueBuffer = arrowDoubleArray.ValueBuffer.Memory; ReadOnlyMemory <byte> doubleNullBitMapBuffer = arrowDoubleArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveDataFrameColumn <double>(field.Name, doubleValueBuffer, doubleNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Float: PrimitiveArray <float> arrowFloatArray = (PrimitiveArray <float>)arrowArray; ReadOnlyMemory <byte> floatValueBuffer = arrowFloatArray.ValueBuffer.Memory; ReadOnlyMemory <byte> floatNullBitMapBuffer = arrowFloatArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveDataFrameColumn <float>(field.Name, floatValueBuffer, floatNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int8: PrimitiveArray <sbyte> arrowsbyteArray = (PrimitiveArray <sbyte>)arrowArray; ReadOnlyMemory <byte> sbyteValueBuffer = arrowsbyteArray.ValueBuffer.Memory; ReadOnlyMemory <byte> sbyteNullBitMapBuffer = arrowsbyteArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveDataFrameColumn <sbyte>(field.Name, sbyteValueBuffer, sbyteNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int16: PrimitiveArray <short> arrowshortArray = (PrimitiveArray <short>)arrowArray; ReadOnlyMemory <byte> shortValueBuffer = arrowshortArray.ValueBuffer.Memory; ReadOnlyMemory <byte> shortNullBitMapBuffer = arrowshortArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveDataFrameColumn <short>(field.Name, shortValueBuffer, shortNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int32: PrimitiveArray <int> arrowIntArray = (PrimitiveArray <int>)arrowArray; ReadOnlyMemory <byte> intValueBuffer = arrowIntArray.ValueBuffer.Memory; ReadOnlyMemory <byte> intNullBitMapBuffer = arrowIntArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveDataFrameColumn <int>(field.Name, intValueBuffer, intNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Int64: PrimitiveArray <long> arrowLongArray = (PrimitiveArray <long>)arrowArray; ReadOnlyMemory <byte> longValueBuffer = arrowLongArray.ValueBuffer.Memory; ReadOnlyMemory <byte> longNullBitMapBuffer = arrowLongArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveDataFrameColumn <long>(field.Name, longValueBuffer, longNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.String: StringArray stringArray = (StringArray)arrowArray; ReadOnlyMemory <byte> dataMemory = stringArray.ValueBuffer.Memory; ReadOnlyMemory <byte> offsetsMemory = stringArray.ValueOffsetsBuffer.Memory; ReadOnlyMemory <byte> nullMemory = stringArray.NullBitmapBuffer.Memory; dataFrameColumn = new ArrowStringDataFrameColumn(field.Name, dataMemory, offsetsMemory, nullMemory, stringArray.Length, stringArray.NullCount); break; case ArrowTypeId.UInt8: PrimitiveArray <byte> arrowbyteArray = (PrimitiveArray <byte>)arrowArray; ReadOnlyMemory <byte> byteValueBuffer = arrowbyteArray.ValueBuffer.Memory; ReadOnlyMemory <byte> byteNullBitMapBuffer = arrowbyteArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveDataFrameColumn <byte>(field.Name, byteValueBuffer, byteNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt16: PrimitiveArray <ushort> arrowUshortArray = (PrimitiveArray <ushort>)arrowArray; ReadOnlyMemory <byte> ushortValueBuffer = arrowUshortArray.ValueBuffer.Memory; ReadOnlyMemory <byte> ushortNullBitMapBuffer = arrowUshortArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveDataFrameColumn <ushort>(field.Name, ushortValueBuffer, ushortNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt32: PrimitiveArray <uint> arrowUintArray = (PrimitiveArray <uint>)arrowArray; ReadOnlyMemory <byte> uintValueBuffer = arrowUintArray.ValueBuffer.Memory; ReadOnlyMemory <byte> uintNullBitMapBuffer = arrowUintArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveDataFrameColumn <uint>(field.Name, uintValueBuffer, uintNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.UInt64: PrimitiveArray <ulong> arrowUlongArray = (PrimitiveArray <ulong>)arrowArray; ReadOnlyMemory <byte> ulongValueBuffer = arrowUlongArray.ValueBuffer.Memory; ReadOnlyMemory <byte> ulongNullBitMapBuffer = arrowUlongArray.NullBitmapBuffer.Memory; dataFrameColumn = new PrimitiveDataFrameColumn <ulong>(field.Name, ulongValueBuffer, ulongNullBitMapBuffer, arrowArray.Length, arrowArray.NullCount); break; case ArrowTypeId.Decimal: case ArrowTypeId.Binary: case ArrowTypeId.Date32: case ArrowTypeId.Date64: case ArrowTypeId.Dictionary: case ArrowTypeId.FixedSizedBinary: case ArrowTypeId.HalfFloat: case ArrowTypeId.Interval: case ArrowTypeId.List: case ArrowTypeId.Map: case ArrowTypeId.Null: case ArrowTypeId.Struct: case ArrowTypeId.Time32: case ArrowTypeId.Time64: default: throw new NotImplementedException(nameof(fieldType.Name)); } ret.Columns.Insert(ret.Columns.Count, dataFrameColumn); fieldIndex++; } return(ret); }
// TODO: Merge API with an "On" parameter that merges on a column common to 2 dataframes /// <summary> /// Merge DataFrames with a database style join /// </summary> /// <param name="other"></param> /// <param name="leftJoinColumn"></param> /// <param name="rightJoinColumn"></param> /// <param name="leftSuffix"></param> /// <param name="rightSuffix"></param> /// <param name="joinAlgorithm"></param> /// <returns></returns> public DataFrame Merge <TKey>(DataFrame other, string leftJoinColumn, string rightJoinColumn, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left) { // A simple hash join DataFrame ret = new DataFrame(); DataFrame leftDataFrame = this; DataFrame rightDataFrame = other; // The final table size is not known until runtime long rowNumber = 0; PrimitiveDataFrameColumn <long> leftRowIndices = new PrimitiveDataFrameColumn <long>("LeftIndices"); PrimitiveDataFrameColumn <long> rightRowIndices = new PrimitiveDataFrameColumn <long>("RightIndices"); if (joinAlgorithm == JoinAlgorithm.Left) { // First hash other dataframe on the rightJoinColumn DataFrameColumn otherColumn = other[rightJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>(); // Go over the records in this dataframe and match with the dictionary DataFrameColumn thisColumn = this[leftJoinColumn]; for (long i = 0; i < thisColumn.Length; i++) { var thisColumnValue = thisColumn[i]; TKey thisColumnValueOrDefault = (TKey)(thisColumnValue == null ? default(TKey) : thisColumnValue); if (multimap.TryGetValue(thisColumnValueOrDefault, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (thisColumnValue == null) { // Match only with nulls in otherColumn if (otherColumn[row] == null) { leftRowIndices.Append(i); rightRowIndices.Append(row); } } else { // Cannot match nulls in otherColumn if (otherColumn[row] != null) { leftRowIndices.Append(i); rightRowIndices.Append(row); } } } } else { leftRowIndices.Append(i); rightRowIndices.Append(null); } } } else if (joinAlgorithm == JoinAlgorithm.Right) { DataFrameColumn thisColumn = this[leftJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = thisColumn.GroupColumnValues <TKey>(); DataFrameColumn otherColumn = other[rightJoinColumn]; for (long i = 0; i < otherColumn.Length; i++) { var otherColumnValue = otherColumn[i]; TKey otherColumnValueOrDefault = (TKey)(otherColumnValue == null ? default(TKey) : otherColumnValue); if (multimap.TryGetValue(otherColumnValueOrDefault, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (otherColumnValue == null) { if (thisColumn[row] == null) { leftRowIndices.Append(row); rightRowIndices.Append(i); } } else { if (thisColumn[row] != null) { leftRowIndices.Append(row); rightRowIndices.Append(i); } } } } else { leftRowIndices.Append(null); rightRowIndices.Append(i); } } } else if (joinAlgorithm == JoinAlgorithm.Inner) { // Hash the column with the smaller RowCount long leftRowCount = Rows.Count; long rightRowCount = other.Rows.Count; DataFrame longerDataFrame = leftRowCount <= rightRowCount ? other : this; DataFrame shorterDataFrame = ReferenceEquals(longerDataFrame, this) ? other : this; DataFrameColumn hashColumn = (leftRowCount <= rightRowCount) ? this[leftJoinColumn] : other[rightJoinColumn]; DataFrameColumn otherColumn = ReferenceEquals(hashColumn, this[leftJoinColumn]) ? other[rightJoinColumn] : this[leftJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = hashColumn.GroupColumnValues <TKey>(); for (long i = 0; i < otherColumn.Length; i++) { var otherColumnValue = otherColumn[i]; TKey otherColumnValueOrDefault = (TKey)(otherColumnValue == null ? default(TKey) : otherColumnValue); if (multimap.TryGetValue(otherColumnValueOrDefault, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (otherColumnValue == null) { if (hashColumn[row] == null) { leftRowIndices.Append(row); rightRowIndices.Append(i); } } else { if (hashColumn[row] != null) { leftRowIndices.Append(row); rightRowIndices.Append(i); } } } } } leftDataFrame = shorterDataFrame; rightDataFrame = longerDataFrame; } else if (joinAlgorithm == JoinAlgorithm.FullOuter) { DataFrameColumn otherColumn = other[rightJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>(); Dictionary <TKey, long> intersection = new Dictionary <TKey, long>(EqualityComparer <TKey> .Default); // Go over the records in this dataframe and match with the dictionary DataFrameColumn thisColumn = this[leftJoinColumn]; for (long i = 0; i < thisColumn.Length; i++) { var thisColumnValue = thisColumn[i]; TKey thisColumnValueOrDefault = (TKey)(thisColumnValue == null ? default(TKey) : thisColumnValue); if (multimap.TryGetValue(thisColumnValueOrDefault, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { if (thisColumnValue == null) { // Has to match only with nulls in otherColumn if (otherColumn[row] == null) { leftRowIndices.Append(i); rightRowIndices.Append(row); if (!intersection.ContainsKey(thisColumnValueOrDefault)) { intersection.Add(thisColumnValueOrDefault, rowNumber); } } } else { // Cannot match to nulls in otherColumn if (otherColumn[row] != null) { leftRowIndices.Append(i); rightRowIndices.Append(row); if (!intersection.ContainsKey(thisColumnValueOrDefault)) { intersection.Add(thisColumnValueOrDefault, rowNumber); } } } } } else { leftRowIndices.Append(i); rightRowIndices.Append(null); } } for (long i = 0; i < otherColumn.Length; i++) { TKey value = (TKey)(otherColumn[i] ?? default(TKey)); if (!intersection.ContainsKey(value)) { leftRowIndices.Append(null); rightRowIndices.Append(i); } } } else { throw new NotImplementedException(nameof(joinAlgorithm)); } for (int i = 0; i < leftDataFrame.Columns.Count; i++) { ret.Columns.Insert(i, leftDataFrame.Columns[i].Clone(leftRowIndices)); } for (int i = 0; i < rightDataFrame.Columns.Count; i++) { DataFrameColumn column = rightDataFrame.Columns[i].Clone(rightRowIndices); SetSuffixForDuplicatedColumnNames(ret, column, leftSuffix, rightSuffix); ret.Columns.Insert(ret.Columns.Count, column); } return(ret); }
public virtual PrimitiveDataFrameColumn <bool> ElementwiseGreaterThanOrEqual(DataFrameColumn column) { throw new NotImplementedException(); }
public DataFrame Join(DataFrame other, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left) { DataFrame ret = new DataFrame(); if (joinAlgorithm == JoinAlgorithm.Left) { for (int i = 0; i < Columns.Count; i++) { DataFrameColumn newColumn = Columns[i].Clone(); ret.Columns.Insert(ret.Columns.Count, newColumn); } long minLength = Math.Min(Rows.Count, other.Rows.Count); PrimitiveDataFrameColumn <long> mapIndices = new PrimitiveDataFrameColumn <long>("mapIndices", minLength); for (long i = 0; i < minLength; i++) { mapIndices[i] = i; } for (int i = 0; i < other.Columns.Count; i++) { DataFrameColumn newColumn; if (other.Rows.Count < Rows.Count) { newColumn = other.Columns[i].Clone(numberOfNullsToAppend: Rows.Count - other.Rows.Count); } else { newColumn = other.Columns[i].Clone(mapIndices); } SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix); ret.Columns.Insert(ret.Columns.Count, newColumn); } } else if (joinAlgorithm == JoinAlgorithm.Right) { long minLength = Math.Min(Rows.Count, other.Rows.Count); PrimitiveDataFrameColumn <long> mapIndices = new PrimitiveDataFrameColumn <long>("mapIndices", minLength); for (long i = 0; i < minLength; i++) { mapIndices[i] = i; } for (int i = 0; i < Columns.Count; i++) { DataFrameColumn newColumn; if (Rows.Count < other.Rows.Count) { newColumn = Columns[i].Clone(numberOfNullsToAppend: other.Rows.Count - Rows.Count); } else { newColumn = Columns[i].Clone(mapIndices); } ret.Columns.Insert(ret.Columns.Count, newColumn); } for (int i = 0; i < other.Columns.Count; i++) { DataFrameColumn newColumn = other.Columns[i].Clone(); SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix); ret.Columns.Insert(ret.Columns.Count, newColumn); } } else if (joinAlgorithm == JoinAlgorithm.FullOuter) { long newRowCount = Math.Max(Rows.Count, other.Rows.Count); long numberOfNulls = newRowCount - Rows.Count; for (int i = 0; i < Columns.Count; i++) { DataFrameColumn newColumn = Columns[i].Clone(numberOfNullsToAppend: numberOfNulls); ret.Columns.Insert(ret.Columns.Count, newColumn); } numberOfNulls = newRowCount - other.Rows.Count; for (int i = 0; i < other.Columns.Count; i++) { DataFrameColumn newColumn = other.Columns[i].Clone(numberOfNullsToAppend: numberOfNulls); SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix); ret.Columns.Insert(ret.Columns.Count, newColumn); } } else if (joinAlgorithm == JoinAlgorithm.Inner) { long newRowCount = Math.Min(Rows.Count, other.Rows.Count); PrimitiveDataFrameColumn <long> mapIndices = new PrimitiveDataFrameColumn <long>("mapIndices", newRowCount); for (long i = 0; i < newRowCount; i++) { mapIndices[i] = i; } for (int i = 0; i < Columns.Count; i++) { DataFrameColumn newColumn = Columns[i].Clone(mapIndices); ret.Columns.Insert(ret.Columns.Count, newColumn); } for (int i = 0; i < other.Columns.Count; i++) { DataFrameColumn newColumn = other.Columns[i].Clone(mapIndices); SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix); ret.Columns.Insert(ret.Columns.Count, newColumn); } } return(ret); }
public virtual DataFrameColumn Modulo(DataFrameColumn column, bool inPlace = false) { throw new NotImplementedException(); }
// TODO: Merge API with an "On" parameter that merges on a column common to 2 dataframes /// <summary> /// Merge DataFrames with a database style join /// </summary> /// <param name="other"></param> /// <param name="leftJoinColumn"></param> /// <param name="rightJoinColumn"></param> /// <param name="leftSuffix"></param> /// <param name="rightSuffix"></param> /// <param name="joinAlgorithm"></param> /// <returns></returns> public DataFrame Merge <TKey>(DataFrame other, string leftJoinColumn, string rightJoinColumn, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left) { // A simple hash join DataFrame ret = new DataFrame(); DataFrame leftDataFrame = this; DataFrame rightDataFrame = other; // The final table size is not known until runtime long rowNumber = 0; PrimitiveDataFrameColumn <long> leftRowIndices = new PrimitiveDataFrameColumn <long>("LeftIndices"); PrimitiveDataFrameColumn <long> rightRowIndices = new PrimitiveDataFrameColumn <long>("RightIndices"); if (joinAlgorithm == JoinAlgorithm.Left) { // First hash other dataframe on the rightJoinColumn DataFrameColumn otherColumn = other.Columns[rightJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>(out HashSet <long> otherColumnNullIndices); // Go over the records in this dataframe and match with the dictionary DataFrameColumn thisColumn = Columns[leftJoinColumn]; for (long i = 0; i < thisColumn.Length; i++) { var thisColumnValue = thisColumn[i]; if (thisColumnValue != null) { if (multimap.TryGetValue((TKey)thisColumnValue, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { leftRowIndices.Append(i); rightRowIndices.Append(row); } } else { leftRowIndices.Append(i); rightRowIndices.Append(null); } } else { foreach (long row in otherColumnNullIndices) { leftRowIndices.Append(i); rightRowIndices.Append(row); } } } } else if (joinAlgorithm == JoinAlgorithm.Right) { DataFrameColumn thisColumn = Columns[leftJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = thisColumn.GroupColumnValues <TKey>(out HashSet <long> thisColumnNullIndices); DataFrameColumn otherColumn = other.Columns[rightJoinColumn]; for (long i = 0; i < otherColumn.Length; i++) { var otherColumnValue = otherColumn[i]; if (otherColumnValue != null) { if (multimap.TryGetValue((TKey)otherColumnValue, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { leftRowIndices.Append(row); rightRowIndices.Append(i); } } else { leftRowIndices.Append(null); rightRowIndices.Append(i); } } else { foreach (long thisColumnNullIndex in thisColumnNullIndices) { leftRowIndices.Append(thisColumnNullIndex); rightRowIndices.Append(i); } } } } else if (joinAlgorithm == JoinAlgorithm.Inner) { // Hash the column with the smaller RowCount long leftRowCount = Rows.Count; long rightRowCount = other.Rows.Count; bool leftColumnIsSmaller = leftRowCount <= rightRowCount; DataFrameColumn hashColumn = leftColumnIsSmaller ? Columns[leftJoinColumn] : other.Columns[rightJoinColumn]; DataFrameColumn otherColumn = ReferenceEquals(hashColumn, Columns[leftJoinColumn]) ? other.Columns[rightJoinColumn] : Columns[leftJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = hashColumn.GroupColumnValues <TKey>(out HashSet <long> smallerDataFrameColumnNullIndices); for (long i = 0; i < otherColumn.Length; i++) { var otherColumnValue = otherColumn[i]; if (otherColumnValue != null) { if (multimap.TryGetValue((TKey)otherColumnValue, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { leftRowIndices.Append(leftColumnIsSmaller ? row : i); rightRowIndices.Append(leftColumnIsSmaller ? i : row); } } } else { foreach (long nullIndex in smallerDataFrameColumnNullIndices) { leftRowIndices.Append(leftColumnIsSmaller ? nullIndex : i); rightRowIndices.Append(leftColumnIsSmaller ? i : nullIndex); } } } } else if (joinAlgorithm == JoinAlgorithm.FullOuter) { DataFrameColumn otherColumn = other.Columns[rightJoinColumn]; Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>(out HashSet <long> otherColumnNullIndices); Dictionary <TKey, long> intersection = new Dictionary <TKey, long>(EqualityComparer <TKey> .Default); // Go over the records in this dataframe and match with the dictionary DataFrameColumn thisColumn = Columns[leftJoinColumn]; Int64DataFrameColumn thisColumnNullIndices = new Int64DataFrameColumn("ThisColumnNullIndices"); for (long i = 0; i < thisColumn.Length; i++) { var thisColumnValue = thisColumn[i]; if (thisColumnValue != null) { if (multimap.TryGetValue((TKey)thisColumnValue, out ICollection <long> rowNumbers)) { foreach (long row in rowNumbers) { leftRowIndices.Append(i); rightRowIndices.Append(row); if (!intersection.ContainsKey((TKey)thisColumnValue)) { intersection.Add((TKey)thisColumnValue, rowNumber); } } } else { leftRowIndices.Append(i); rightRowIndices.Append(null); } } else { thisColumnNullIndices.Append(i); } } for (long i = 0; i < otherColumn.Length; i++) { var value = otherColumn[i]; if (value != null) { if (!intersection.ContainsKey((TKey)value)) { leftRowIndices.Append(null); rightRowIndices.Append(i); } } } // Now handle the null rows foreach (long?thisColumnNullIndex in thisColumnNullIndices) { foreach (long otherColumnNullIndex in otherColumnNullIndices) { leftRowIndices.Append(thisColumnNullIndex.Value); rightRowIndices.Append(otherColumnNullIndex); } if (otherColumnNullIndices.Count == 0) { leftRowIndices.Append(thisColumnNullIndex.Value); rightRowIndices.Append(null); } } if (thisColumnNullIndices.Length == 0) { foreach (long otherColumnNullIndex in otherColumnNullIndices) { leftRowIndices.Append(null); rightRowIndices.Append(otherColumnNullIndex); } } } else { throw new NotImplementedException(nameof(joinAlgorithm)); } for (int i = 0; i < leftDataFrame.Columns.Count; i++) { ret.Columns.Insert(i, leftDataFrame.Columns[i].Clone(leftRowIndices)); } for (int i = 0; i < rightDataFrame.Columns.Count; i++) { DataFrameColumn column = rightDataFrame.Columns[i].Clone(rightRowIndices); SetSuffixForDuplicatedColumnNames(ret, column, leftSuffix, rightSuffix); ret.Columns.Insert(ret.Columns.Count, column); } return(ret); }