public void TestIDataViewSchemaInvalidate() { DataFrame df = MakeDataFrameWithAllMutableColumnTypes(10, withNulls: false); IDataView dataView = df; DataViewSchema schema = dataView.Schema; Assert.Equal(14, schema.Count); df.RemoveColumn("Bool"); schema = dataView.Schema; Assert.Equal(13, schema.Count); BaseColumn boolColumn = new PrimitiveColumn <bool>("Bool", Enumerable.Range(0, (int)df.RowCount).Select(x => x % 2 == 1)); df.InsertColumn(0, boolColumn); schema = dataView.Schema; Assert.Equal(14, schema.Count); Assert.Equal("Bool", schema[0].Name); BaseColumn boolClone = boolColumn.Clone(); boolClone.Name = "BoolClone"; df.SetColumn(1, boolClone); schema = dataView.Schema; Assert.Equal("BoolClone", schema[1].Name); }
public void TestEmptyDataFrameRecordBatch() { PrimitiveColumn <int> ageColumn = new PrimitiveColumn <int>("Age"); PrimitiveColumn <int> lengthColumn = new PrimitiveColumn <int>("CharCount"); DataFrame df = new DataFrame(new List <BaseColumn>() { ageColumn, lengthColumn }); IEnumerable <RecordBatch> recordBatches = df.AsArrowRecordBatches(); bool foundARecordBatch = false; foreach (RecordBatch recordBatch in recordBatches) { foundARecordBatch = true; MemoryStream stream = new MemoryStream(); ArrowStreamWriter writer = new ArrowStreamWriter(stream, recordBatch.Schema); writer.WriteRecordBatchAsync(recordBatch).GetAwaiter().GetResult(); stream.Position = 0; ArrowStreamReader reader = new ArrowStreamReader(stream); RecordBatch readRecordBatch = reader.ReadNextRecordBatch(); while (readRecordBatch != null) { RecordBatchComparer.CompareBatches(recordBatch, readRecordBatch); readRecordBatch = reader.ReadNextRecordBatch(); } } Assert.True(foundARecordBatch); }
public void TestBinaryOperationsWithConversions() { DataFrame df = DataFrameTests.MakeDataFrameWithTwoColumns(10); // Add a double to an int column DataFrame dfd = df.Add(5.0f); var dtype = dfd.Column(0).DataType; Assert.True(dtype == typeof(double)); // Add a decimal to an int column DataFrame dfm = df.Add(5.0m); dtype = dfm.Column(0).DataType; Assert.True(dtype == typeof(decimal)); // int + bool should throw Assert.Throws <NotSupportedException>(() => df.Add(true)); var dataFrameColumn1 = new PrimitiveColumn <double>("Double1", Enumerable.Range(0, 10).Select(x => (double)x)); df.SetColumn(0, dataFrameColumn1); // Double + comparison ops should throw Assert.Throws <NotSupportedException>(() => df.And(true)); }
public static DataFrame MakeDataFrameWithNumericColumns(int length, bool withNulls = true) { BaseColumn byteColumn = new PrimitiveColumn <byte>("Byte", Enumerable.Range(0, length).Select(x => (byte)x)); BaseColumn charColumn = new PrimitiveColumn <char>("Char", Enumerable.Range(0, length).Select(x => (char)(x + 65))); BaseColumn decimalColumn = new PrimitiveColumn <decimal>("Decimal", Enumerable.Range(0, length).Select(x => (decimal)x)); BaseColumn doubleColumn = new PrimitiveColumn <double>("Double", Enumerable.Range(0, length).Select(x => (double)x)); BaseColumn floatColumn = new PrimitiveColumn <float>("Float", Enumerable.Range(0, length).Select(x => (float)x)); BaseColumn intColumn = new PrimitiveColumn <int>("Int", Enumerable.Range(0, length).Select(x => x)); BaseColumn longColumn = new PrimitiveColumn <long>("Long", Enumerable.Range(0, length).Select(x => (long)x)); BaseColumn sbyteColumn = new PrimitiveColumn <sbyte>("Sbyte", Enumerable.Range(0, length).Select(x => (sbyte)x)); BaseColumn shortColumn = new PrimitiveColumn <short>("Short", Enumerable.Range(0, length).Select(x => (short)x)); BaseColumn uintColumn = new PrimitiveColumn <uint>("Uint", Enumerable.Range(0, length).Select(x => (uint)x)); BaseColumn ulongColumn = new PrimitiveColumn <ulong>("Ulong", Enumerable.Range(0, length).Select(x => (ulong)x)); BaseColumn ushortColumn = new PrimitiveColumn <ushort>("Ushort", Enumerable.Range(0, length).Select(x => (ushort)x)); DataFrame dataFrame = new DataFrame(new List <BaseColumn> { byteColumn, charColumn, decimalColumn, doubleColumn, floatColumn, intColumn, longColumn, sbyteColumn, shortColumn, uintColumn, ulongColumn, ushortColumn }); if (withNulls) { for (int i = 0; i < dataFrame.ColumnCount; i++) { dataFrame.Column(i)[length / 2] = null; } } return(dataFrame); }
public void TestPrimitiveColumnSort(int numberOfNulls) { // Primitive Column Sort PrimitiveColumn <int> intColumn = new PrimitiveColumn <int>("Int", 0); Assert.Equal(0, intColumn.NullCount); intColumn.AppendMany(null, numberOfNulls); Assert.Equal(numberOfNulls, intColumn.NullCount); // Should handle all nulls PrimitiveColumn <int> sortedIntColumn = intColumn.Sort() as PrimitiveColumn <int>; Assert.Equal(numberOfNulls, sortedIntColumn.NullCount); Assert.Null(sortedIntColumn[0]); for (int i = 0; i < 5; i++) { intColumn.Append(i); } Assert.Equal(numberOfNulls, intColumn.NullCount); // Ascending sort sortedIntColumn = intColumn.Sort() as PrimitiveColumn <int>; Assert.Equal(0, sortedIntColumn[0]); Assert.Null(sortedIntColumn[9]); // Descending sort sortedIntColumn = intColumn.Sort(false) as PrimitiveColumn <int>; Assert.Equal(4, sortedIntColumn[0]); Assert.Null(sortedIntColumn[9]); }
public void TestAppendMany() { PrimitiveColumn <int> intColumn = new PrimitiveColumn <int>("Int1"); intColumn.AppendMany(null, 5); Assert.Equal(5, intColumn.NullCount); Assert.Equal(5, intColumn.Length); for (int i = 0; i < intColumn.Length; i++) { Assert.False(intColumn.IsValid(i)); } intColumn.AppendMany(5, 5); Assert.Equal(5, intColumn.NullCount); Assert.Equal(10, intColumn.Length); for (int i = 5; i < intColumn.Length; i++) { Assert.True(intColumn.IsValid(i)); } intColumn[2] = 10; Assert.Equal(4, intColumn.NullCount); Assert.True(intColumn.IsValid(2)); intColumn[7] = null; Assert.Equal(5, intColumn.NullCount); Assert.False(intColumn.IsValid(7)); }
public static DataFrame MakeDataFrameWithAllColumnTypes(int length) { DataFrame df = MakeDataFrameWithNumericAndStringColumns(length); BaseColumn boolColumn = new PrimitiveColumn <bool>("Bool", Enumerable.Range(0, length).Select(x => x % 2 == 0)); df.InsertColumn(df.ColumnCount, boolColumn); return(df); }
public void TestNullCounts() { PrimitiveColumn <int> dataFrameColumn1 = new PrimitiveColumn <int>("Int1", Enumerable.Range(0, 10).Select(x => x)); dataFrameColumn1.Append(null); Assert.Equal(1, dataFrameColumn1.NullCount); PrimitiveColumn <int> df2 = new PrimitiveColumn <int>("Int2"); Assert.Equal(0, df2.NullCount); PrimitiveColumn <int> df3 = new PrimitiveColumn <int>("Int3", 10); Assert.Equal(0, df3.NullCount); // Test null counts with assignments on Primitive Columns df2.Append(null); df2.Append(1); Assert.Equal(1, df2.NullCount); df2[1] = 10; Assert.Equal(1, df2.NullCount); df2[1] = null; Assert.Equal(2, df2.NullCount); df2[1] = 5; Assert.Equal(1, df2.NullCount); df2[0] = null; Assert.Equal(1, df2.NullCount); // Test null counts with assignments on String Columns StringColumn strCol = new StringColumn("String", 0); Assert.Equal(0, strCol.NullCount); StringColumn strCol1 = new StringColumn("String1", 5); Assert.Equal(0, strCol1.NullCount); StringColumn strCol2 = new StringColumn("String", Enumerable.Range(0, 10).Select(x => x.ToString())); Assert.Equal(0, strCol2.NullCount); StringColumn strCol3 = new StringColumn("String", Enumerable.Range(0, 10).Select(x => (string)null)); Assert.Equal(10, strCol3.NullCount); strCol.Append(null); Assert.Equal(1, strCol.NullCount); strCol.Append("foo"); Assert.Equal(1, strCol.NullCount); strCol[1] = "bar"; Assert.Equal(1, strCol.NullCount); strCol[1] = null; Assert.Equal(2, strCol.NullCount); strCol[1] = "foo"; Assert.Equal(1, strCol.NullCount); strCol[0] = null; Assert.Equal(1, strCol.NullCount); }
public static DataFrame MakeDataFrameWithTwoColumns(int length) { BaseColumn dataFrameColumn1 = new PrimitiveColumn <int>("Int1", Enumerable.Range(0, length).Select(x => x)); BaseColumn dataFrameColumn2 = new PrimitiveColumn <int>("Int2", Enumerable.Range(10, length).Select(x => x)); Data.DataFrame dataFrame = new Data.DataFrame(); dataFrame.InsertColumn(0, dataFrameColumn1); dataFrame.InsertColumn(1, dataFrameColumn2); return(dataFrame); }
public void TestValidity() { PrimitiveColumn <int> dataFrameColumn1 = new PrimitiveColumn <int>("Int1", Enumerable.Range(0, 10).Select(x => x)); dataFrameColumn1.Append(null); Assert.False(dataFrameColumn1.IsValid(10)); for (long i = 0; i < dataFrameColumn1.Length - 1; i++) { Assert.True(dataFrameColumn1.IsValid(i)); } }
public static DataFrame MakeDataFrameWithAllMutableColumnTypes(int length, bool withNulls = true) { DataFrame df = MakeDataFrameWithNumericAndStringColumns(length, withNulls); BaseColumn boolColumn = new PrimitiveColumn <bool>("Bool", Enumerable.Range(0, length).Select(x => x % 2 == 0)); df.InsertColumn(df.ColumnCount, boolColumn); if (withNulls) { boolColumn[length / 2] = null; } return(df); }
public void InsertAndRemoveColumnTests() { DataFrame dataFrame = MakeDataFrameWithAllMutableColumnTypes(10); BaseColumn intColumn = new PrimitiveColumn <int>("IntColumn", Enumerable.Range(0, 10).Select(x => x)); BaseColumn charColumn = dataFrame["Char"]; int insertedIndex = dataFrame.ColumnCount; dataFrame.InsertColumn(dataFrame.ColumnCount, intColumn); dataFrame.RemoveColumn(0); BaseColumn intColumn_1 = dataFrame["IntColumn"]; BaseColumn charColumn_1 = dataFrame["Char"]; Assert.True(ReferenceEquals(intColumn, intColumn_1)); Assert.True(ReferenceEquals(charColumn, charColumn_1)); }
public void TestPrimitiveColumnGetReadOnlyBuffers() { RecordBatch recordBatch = new RecordBatch.Builder() .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))).Build(); DataFrame df = new DataFrame(recordBatch); PrimitiveColumn <int> column = df["Column1"] as PrimitiveColumn <int>; IEnumerable <ReadOnlyMemory <int> > buffers = column.GetReadOnlyDataBuffers(); IEnumerable <ReadOnlyMemory <byte> > nullBitMaps = column.GetReadOnlyNullBitMapBuffers(); long i = 0; IEnumerator <ReadOnlyMemory <int> > bufferEnumerator = buffers.GetEnumerator(); IEnumerator <ReadOnlyMemory <byte> > nullBitMapsEnumerator = nullBitMaps.GetEnumerator(); while (bufferEnumerator.MoveNext() && nullBitMapsEnumerator.MoveNext()) { ReadOnlyMemory <int> dataBuffer = bufferEnumerator.Current; ReadOnlyMemory <byte> nullBitMap = nullBitMapsEnumerator.Current; ReadOnlySpan <int> span = dataBuffer.Span; for (int j = 0; j < span.Length; j++) { // Each buffer has a max length of int.MaxValue Assert.Equal(span[j], column[j + i * int.MaxValue]); } bool GetBit(byte curBitMap, int index) { return(((curBitMap >> (index & 7)) & 1) != 0); } ReadOnlySpan <byte> bitMapSpan = nullBitMap.Span; // No nulls in this column, so each bit must be set for (int j = 0; j < bitMapSpan.Length; j++) { for (int k = 0; k < 8; k++) { Assert.True(GetBit(bitMapSpan[j], k)); } } i++; } }
public static DataFrame MakeDataFrame <T1, T2>(int length, bool withNulls = true) where T1 : unmanaged where T2 : unmanaged { BaseColumn baseColumn1 = new PrimitiveColumn <T1>("Column1", Enumerable.Range(0, length).Select(x => (T1)Convert.ChangeType(x % 2 == 0 ? 0 : 1, typeof(T1)))); BaseColumn baseColumn2 = new PrimitiveColumn <T2>("Column2", Enumerable.Range(0, length).Select(x => (T2)Convert.ChangeType(x % 2 == 0 ? 0 : 1, typeof(T2)))); DataFrame dataFrame = new DataFrame(new List <BaseColumn> { baseColumn1, baseColumn2 }); if (withNulls) { for (int i = 0; i < dataFrame.ColumnCount; i++) { dataFrame.Column(i)[length / 2] = null; } } return(dataFrame); }
public void TestBinaryOperationsOnBoolColumn() { var df = new DataFrame(); var dataFrameColumn1 = new PrimitiveColumn <bool>("Bool1", Enumerable.Range(0, 10).Select(x => true)); var dataFrameColumn2 = new PrimitiveColumn <bool>("Bool2", Enumerable.Range(0, 10).Select(x => true)); df.InsertColumn(0, dataFrameColumn1); df.InsertColumn(1, dataFrameColumn2); // bool + int should throw Assert.Throws <NotSupportedException>(() => df.Add(5)); // Left shift should throw Assert.Throws <NotSupportedException>(() => df.LeftShift(5)); IReadOnlyList <bool> listOfBools = new List <bool>() { true, false }; // bool equals and And should work var newdf = df.Equals(true); Assert.Equal(true, newdf[4, 0]); var newdf1 = df.Equals(listOfBools); Assert.Equal(false, newdf1[4, 1]); newdf = df.And(true); Assert.Equal(true, newdf[4, 0]); newdf1 = df.And(listOfBools); Assert.Equal(false, newdf1[4, 1]); newdf = df.Or(true); Assert.Equal(true, newdf[4, 0]); newdf1 = df.Or(listOfBools); Assert.Equal(true, newdf1[4, 1]); newdf = df.Xor(true); Assert.Equal(false, newdf[4, 0]); newdf1 = df.Xor(listOfBools); Assert.Equal(true, newdf1[4, 1]); }
public void ColumnAndTableCreationTest() { BaseColumn intColumn = new PrimitiveColumn <int>("IntColumn", Enumerable.Range(0, 10).Select(x => x)); BaseColumn floatColumn = new PrimitiveColumn <float>("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x)); DataFrame dataFrame = new DataFrame(); dataFrame.InsertColumn(0, intColumn); dataFrame.InsertColumn(1, floatColumn); Assert.Equal(10, dataFrame.RowCount); Assert.Equal(2, dataFrame.ColumnCount); Assert.Equal(10, dataFrame.Column(0).Length); Assert.Equal("IntColumn", dataFrame.Column(0).Name); Assert.Equal(10, dataFrame.Column(1).Length); Assert.Equal("FloatColumn", dataFrame.Column(1).Name); BaseColumn bigColumn = new PrimitiveColumn <float>("BigColumn", Enumerable.Range(0, 11).Select(x => (float)x)); BaseColumn repeatedName = new PrimitiveColumn <float>("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x)); Assert.Throws <ArgumentException>(() => dataFrame.InsertColumn(2, bigColumn)); Assert.Throws <ArgumentException>(() => dataFrame.InsertColumn(2, repeatedName)); Assert.Throws <ArgumentOutOfRangeException>(() => dataFrame.InsertColumn(10, repeatedName)); Assert.Equal(2, dataFrame.ColumnCount); BaseColumn intColumnCopy = new PrimitiveColumn <int>("IntColumn", Enumerable.Range(0, 10).Select(x => x)); Assert.Throws <ArgumentException>(() => dataFrame.SetColumn(1, intColumnCopy)); BaseColumn differentIntColumn = new PrimitiveColumn <int>("IntColumn1", Enumerable.Range(0, 10).Select(x => x)); dataFrame.SetColumn(1, differentIntColumn); Assert.True(object.ReferenceEquals(differentIntColumn, dataFrame.Column(1))); dataFrame.RemoveColumn(1); Assert.Equal(1, dataFrame.ColumnCount); Assert.True(ReferenceEquals(intColumn, dataFrame.Column(0))); }
public void TestIEnumerable() { DataFrame df = MakeDataFrameWithAllColumnTypes(10); int totalValueCount = 0; for (int i = 0; i < df.ColumnCount; i++) { BaseColumn baseColumn = df.Column(i); foreach (object value in baseColumn) { totalValueCount++; } } Assert.Equal(10 * df.ColumnCount, totalValueCount); // spot check a few column types: StringColumn stringColumn = (StringColumn)df["String"]; StringBuilder actualStrings = new StringBuilder(); foreach (string value in stringColumn) { if (value == null) { actualStrings.Append("<null>"); } else { actualStrings.Append(value); } } Assert.Equal("01234<null>6789", actualStrings.ToString()); ArrowStringColumn arrowStringColumn = (ArrowStringColumn)df["ArrowString"]; actualStrings.Clear(); foreach (string value in arrowStringColumn) { if (value == null) { actualStrings.Append("<null>"); } else { actualStrings.Append(value); } } Assert.Equal("foofoofoofoofoofoofoofoofoofoo", actualStrings.ToString()); PrimitiveColumn <float> floatColumn = (PrimitiveColumn <float>)df["Float"]; actualStrings.Clear(); foreach (float?value in floatColumn) { if (value == null) { actualStrings.Append("<null>"); } else { actualStrings.Append(value); } } Assert.Equal("01234<null>6789", actualStrings.ToString()); PrimitiveColumn <int> intColumn = (PrimitiveColumn <int>)df["Int"]; actualStrings.Clear(); foreach (int?value in intColumn) { if (value == null) { actualStrings.Append("<null>"); } else { actualStrings.Append(value); } } Assert.Equal("01234<null>6789", actualStrings.ToString()); }
private void VerifyJoin(DataFrame join, DataFrame left, DataFrame right, JoinAlgorithm joinAlgorithm) { PrimitiveColumn <long> mapIndices = new PrimitiveColumn <long>("map", join.RowCount); for (long i = 0; i < join.RowCount; i++) { mapIndices[i] = i; } for (int i = 0; i < join.ColumnCount; i++) { BaseColumn joinColumn = join.Column(i); BaseColumn isEqual; if (joinAlgorithm == JoinAlgorithm.Left) { if (i < left.ColumnCount) { BaseColumn leftColumn = left.Column(i); isEqual = joinColumn == leftColumn; } else { int columnIndex = i - left.ColumnCount; BaseColumn rightColumn = right.Column(columnIndex); BaseColumn compareColumn = rightColumn.Length <= join.RowCount ? rightColumn.Clone(numberOfNullsToAppend: join.RowCount - rightColumn.Length) : rightColumn.Clone(mapIndices); isEqual = joinColumn == compareColumn; } } else if (joinAlgorithm == JoinAlgorithm.Right) { if (i < left.ColumnCount) { BaseColumn leftColumn = left.Column(i); BaseColumn compareColumn = leftColumn.Length <= join.RowCount ? leftColumn.Clone(numberOfNullsToAppend: join.RowCount - leftColumn.Length) : leftColumn.Clone(mapIndices); isEqual = joinColumn == compareColumn; } else { int columnIndex = i - left.ColumnCount; BaseColumn rightColumn = right.Column(columnIndex); isEqual = joinColumn == rightColumn; } } else if (joinAlgorithm == JoinAlgorithm.Inner) { if (i < left.ColumnCount) { BaseColumn leftColumn = left.Column(i); isEqual = joinColumn == leftColumn.Clone(mapIndices); } else { int columnIndex = i - left.ColumnCount; BaseColumn rightColumn = right.Column(columnIndex); isEqual = joinColumn == rightColumn.Clone(mapIndices); } } else { if (i < left.ColumnCount) { BaseColumn leftColumn = left.Column(i); isEqual = joinColumn == leftColumn.Clone(numberOfNullsToAppend: join.RowCount - leftColumn.Length); } else { int columnIndex = i - left.ColumnCount; BaseColumn rightColumn = right.Column(columnIndex); isEqual = joinColumn == rightColumn.Clone(numberOfNullsToAppend: join.RowCount - rightColumn.Length); } } for (int j = 0; j < join.RowCount; j++) { Assert.Equal(true, isEqual[j]); } } }