public void TestIDataViewSchemaInvalidate() { DataFrame df = DataFrameTests.MakeDataFrameWithAllMutableColumnTypes(10, withNulls: false); IDataView dataView = df; DataViewSchema schema = dataView.Schema; Assert.Equal(14, schema.Count); df.Columns.Remove("Bool"); schema = dataView.Schema; Assert.Equal(13, schema.Count); DataFrameColumn boolColumn = new PrimitiveDataFrameColumn <bool>("Bool", Enumerable.Range(0, (int)df.Rows.Count).Select(x => x % 2 == 1)); df.Columns.Insert(0, boolColumn); schema = dataView.Schema; Assert.Equal(14, schema.Count); Assert.Equal("Bool", schema[0].Name); DataFrameColumn boolClone = boolColumn.Clone(); boolClone.SetName("BoolClone"); df.Columns[1] = boolClone; schema = dataView.Schema; Assert.Equal("BoolClone", schema[1].Name); }
public void TestWriteCsvWithSemicolonSeparator() { using MemoryStream csvStream = new MemoryStream(); DataFrame dataFrame = DataFrameTests.MakeDataFrameWithAllColumnTypes(10, true); var separator = ';'; DataFrame.WriteCsv(dataFrame, csvStream, separator: separator); csvStream.Seek(0, SeekOrigin.Begin); DataFrame readIn = DataFrame.LoadCsv(csvStream, separator: separator); Assert.Equal(dataFrame.Rows.Count, readIn.Rows.Count); Assert.Equal(dataFrame.Columns.Count, readIn.Columns.Count); Assert.Equal(1F, readIn[1, 0]); Assert.Equal(1F, readIn[1, 1]); Assert.Equal(1F, readIn[1, 2]); Assert.Equal(1F, readIn[1, 3]); Assert.Equal(1F, readIn[1, 4]); Assert.Equal(1F, readIn[1, 5]); Assert.Equal(1F, readIn[1, 6]); Assert.Equal(1F, readIn[1, 7]); Assert.Equal(1F, readIn[1, 8]); Assert.Equal(1F, readIn[1, 9]); Assert.Equal(1F, readIn[1, 10]); }
public void TestWriteCsvWithCultureInfo() { using MemoryStream csvStream = new MemoryStream(); DataFrame dataFrame = DataFrameTests.MakeDataFrameWithNumericColumns(10, true); dataFrame[1, 1] = 1.1M; dataFrame[1, 2] = 1.2D; dataFrame[1, 3] = 1.3F; var cultureInfo = new CultureInfo("en-US"); DataFrame.WriteCsv(dataFrame, csvStream, cultureInfo: cultureInfo); csvStream.Seek(0, SeekOrigin.Begin); DataFrame readIn = DataFrame.LoadCsv(csvStream); Assert.Equal(dataFrame.Rows.Count, readIn.Rows.Count); Assert.Equal(dataFrame.Columns.Count, readIn.Columns.Count); Assert.Equal(1F, readIn[1, 0]); Assert.Equal(1.1F, readIn[1, 1]); Assert.Equal(1.2F, readIn[1, 2]); Assert.Equal(1.3F, readIn[1, 3]); Assert.Equal(1F, readIn[1, 4]); Assert.Equal(1F, readIn[1, 5]); Assert.Equal(1F, readIn[1, 6]); Assert.Equal(1F, readIn[1, 7]); Assert.Equal(1F, readIn[1, 8]); Assert.Equal(1F, readIn[1, 9]); Assert.Equal(1F, readIn[1, 10]); }
public void TestWriteCsvWithCultureInfoRomanianAndSemiColon() { DataFrame dataFrame = DataFrameTests.MakeDataFrameWithNumericColumns(10, true); dataFrame[1, 1] = 1.1M; dataFrame[1, 2] = 1.2D; dataFrame[1, 3] = 1.3F; using MemoryStream csvStream = new MemoryStream(); var cultureInfo = new CultureInfo("ro-RO"); var separator = ';'; DataFrame.WriteCsv(dataFrame, csvStream, separator: separator, cultureInfo: cultureInfo); csvStream.Seek(0, SeekOrigin.Begin); DataFrame readIn = DataFrame.LoadCsv(csvStream, separator: separator); Assert.Equal(dataFrame.Rows.Count, readIn.Rows.Count); Assert.Equal(dataFrame.Columns.Count, readIn.Columns.Count); Assert.Equal(1F, readIn[1, 0]); // LoadCsv does not support culture info, therefore decimal point comma (,) is seen as thousand separator and is ignored when read Assert.Equal(11F, readIn[1, 1]); Assert.Equal(12F, readIn[1, 2]); Assert.Equal(129999992F, readIn[1, 3]); Assert.Equal(1F, readIn[1, 4]); Assert.Equal(1F, readIn[1, 5]); Assert.Equal(1F, readIn[1, 6]); Assert.Equal(1F, readIn[1, 7]); Assert.Equal(1F, readIn[1, 8]); Assert.Equal(1F, readIn[1, 9]); Assert.Equal(1F, readIn[1, 10]); }
public void TestWriteCsvWithCultureInfoRomanianAndComma() { using MemoryStream csvStream = new MemoryStream(); DataFrame dataFrame = DataFrameTests.MakeDataFrameWithNumericColumns(10, true); var cultureInfo = new CultureInfo("ro-RO"); var separator = cultureInfo.NumberFormat.NumberDecimalSeparator.First(); Assert.Throws <ArgumentException>(() => DataFrame.WriteCsv(dataFrame, csvStream, separator: separator, cultureInfo: cultureInfo)); }
private DataFrame MakeTestDataFrameWithParityAndTensColumns(int length) { DataFrame df = DataFrameTests.MakeDataFrameWithNumericColumns(length, false); DataFrameColumn parityColumn = new StringDataFrameColumn("Parity", Enumerable.Range(0, length).Select(x => x % 2 == 0 ? "even" : "odd")); DataFrameColumn tensColumn = new Int32DataFrameColumn("Tens", Enumerable.Range(0, length).Select(x => x / 10)); df.Columns.Insert(df.Columns.Count, parityColumn); df.Columns.Insert(df.Columns.Count, tensColumn); return(df); }
public void TestDataFrameFromIDataView_SelectColumns() { DataFrame df = DataFrameTests.MakeDataFrameWithAllColumnTypes(10, withNulls: false); IDataView dfAsIDataView = df; DataFrame newDf = dfAsIDataView.ToDataFrame("Int", "Double"); Assert.Equal(dfAsIDataView.GetRowCount(), newDf.Rows.Count); Assert.Equal(2, newDf.Columns.Count); Assert.True(df.Columns["Int"].ElementwiseEquals(newDf.Columns["Int"]).All()); Assert.True(df.Columns["Double"].ElementwiseEquals(newDf.Columns["Double"]).All()); }
public void TestDataFrameFromIDataView() { DataFrame df = DataFrameTests.MakeDataFrameWithAllColumnTypes(10, withNulls: false); df.Columns.Remove("Char"); // Because chars are returned as uint16 by IDataView, so end up comparing CharDataFrameColumn to UInt16DataFrameColumn and fail asserts IDataView dfAsIDataView = df; DataFrame newDf = dfAsIDataView.ToDataFrame(); Assert.Equal(dfAsIDataView.GetRowCount(), newDf.Rows.Count); Assert.Equal(dfAsIDataView.Schema.Count, newDf.Columns.Count); for (int i = 0; i < df.Columns.Count; i++) { Assert.True(df.Columns[i].ElementwiseEquals(newDf.Columns[i]).All()); } }
public void TestArrowIntegration() { RecordBatch originalBatch = new RecordBatch.Builder() .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))) .Append("Column2", true, new Int32Array( valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(), length: 10, nullCount: 1, offset: 0)) .Append("Column3", true, new Int32Array( valueBuffer: new ArrowBuffer.Builder <int>().AppendRange(Enumerable.Range(0, 10)).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0x00).Append(0x00).Build(), length: 10, nullCount: 10, offset: 0)) .Append("NullableBooleanColumn", true, new BooleanArray( valueBuffer: new ArrowBuffer.Builder <byte>().Append(0xfd).Append(0xff).Build(), nullBitmapBuffer: new ArrowBuffer.Builder <byte>().Append(0xed).Append(0xff).Build(), length: 10, nullCount: 2, offset: 0)) .Append("StringDataFrameColumn", false, new StringArray.Builder().AppendRange(Enumerable.Range(0, 10).Select(x => x.ToString())).Build()) .Append("DoubleColumn", false, new DoubleArray.Builder().AppendRange(Enumerable.Repeat(1.0, 10)).Build()) .Append("FloatColumn", false, new FloatArray.Builder().AppendRange(Enumerable.Repeat(1.0f, 10)).Build()) .Append("ShortColumn", false, new Int16Array.Builder().AppendRange(Enumerable.Repeat((short)1, 10)).Build()) .Append("LongColumn", false, new Int64Array.Builder().AppendRange(Enumerable.Repeat((long)1, 10)).Build()) .Append("UIntColumn", false, new UInt32Array.Builder().AppendRange(Enumerable.Repeat((uint)1, 10)).Build()) .Append("UShortColumn", false, new UInt16Array.Builder().AppendRange(Enumerable.Repeat((ushort)1, 10)).Build()) .Append("ULongColumn", false, new UInt64Array.Builder().AppendRange(Enumerable.Repeat((ulong)1, 10)).Build()) .Append("ByteColumn", false, new Int8Array.Builder().AppendRange(Enumerable.Repeat((sbyte)1, 10)).Build()) .Append("UByteColumn", false, new UInt8Array.Builder().AppendRange(Enumerable.Repeat((byte)1, 10)).Build()) .Build(); DataFrame df = DataFrame.FromArrowRecordBatch(originalBatch); DataFrameTests.VerifyColumnTypes(df, testArrowStringColumn: true); IEnumerable <RecordBatch> recordBatches = df.ToArrowRecordBatches(); foreach (RecordBatch batch in recordBatches) { RecordBatchComparer.CompareBatches(originalBatch, batch); } }
public void TestDataFrameFromIDataView_SelectColumnsAndRows() { DataFrame df = DataFrameTests.MakeDataFrameWithAllColumnTypes(10, withNulls: false); IDataView dfAsIDataView = df; DataFrame newDf = dfAsIDataView.ToDataFrame(5, "Int", "Double"); Assert.Equal(5, newDf.Rows.Count); for (int i = 0; i < newDf.Columns.Count; i++) { Assert.Equal(5, newDf.Columns[i].Length); } Assert.Equal(2, newDf.Columns.Count); for (int r = 0; r < 5; r++) { Assert.Equal(df.Columns["Int"][r], newDf.Columns["Int"][r]); Assert.Equal(df.Columns["Double"][r], newDf.Columns["Double"][r]); } }
public void TestDataFrameFromIDataView_SelectRows(int dataFrameSize, int rowSize) { DataFrame df = DataFrameTests.MakeDataFrameWithAllColumnTypes(dataFrameSize, withNulls: false); df.Columns.Remove("Char"); // Because chars are returned as uint16 by DataViewSchema, so end up comparing CharDataFrameColumn to UInt16DataFrameColumn and fail asserts df.Columns.Remove("Decimal"); // Because decimal is returned as double by DataViewSchema, so end up comparing DecimalDataFrameColumn to DoubleDataFrameColumn and fail asserts IDataView dfAsIDataView = df; DataFrame newDf; if (rowSize == 100) { // Test default newDf = dfAsIDataView.ToDataFrame(); } else { newDf = dfAsIDataView.ToDataFrame(rowSize); } if (rowSize == -1) { rowSize = dataFrameSize; } Assert.Equal(rowSize, newDf.Rows.Count); Assert.Equal(df.Columns.Count, newDf.Columns.Count); for (int i = 0; i < newDf.Columns.Count; i++) { Assert.Equal(rowSize, newDf.Columns[i].Length); Assert.Equal(df.Columns[i].Name, newDf.Columns[i].Name); } Assert.Equal(dfAsIDataView.Schema.Count, newDf.Columns.Count); for (int c = 0; c < df.Columns.Count; c++) { for (int r = 0; r < rowSize; r++) { Assert.Equal(df.Columns[c][r], newDf.Columns[c][r]); } } }
public void TestIDataView() { IDataView dataView = DataFrameTests.MakeDataFrameWithAllColumnTypes(10, withNulls: false); DataDebuggerPreview preview = dataView.Preview(); Assert.Equal(10, preview.RowView.Length); Assert.Equal(15, preview.ColumnView.Length); Assert.Equal("Byte", preview.ColumnView[0].Column.Name); Assert.Equal((byte)0, preview.ColumnView[0].Values[0]); Assert.Equal((byte)1, preview.ColumnView[0].Values[1]); Assert.Equal("Decimal", preview.ColumnView[1].Column.Name); Assert.Equal((double)0, preview.ColumnView[1].Values[0]); Assert.Equal((double)1, preview.ColumnView[1].Values[1]); Assert.Equal("Double", preview.ColumnView[2].Column.Name); Assert.Equal((double)0, preview.ColumnView[2].Values[0]); Assert.Equal((double)1, preview.ColumnView[2].Values[1]); Assert.Equal("Float", preview.ColumnView[3].Column.Name); Assert.Equal((float)0, preview.ColumnView[3].Values[0]); Assert.Equal((float)1, preview.ColumnView[3].Values[1]); Assert.Equal("Int", preview.ColumnView[4].Column.Name); Assert.Equal((int)0, preview.ColumnView[4].Values[0]); Assert.Equal((int)1, preview.ColumnView[4].Values[1]); Assert.Equal("Long", preview.ColumnView[5].Column.Name); Assert.Equal((long)0, preview.ColumnView[5].Values[0]); Assert.Equal((long)1, preview.ColumnView[5].Values[1]); Assert.Equal("Sbyte", preview.ColumnView[6].Column.Name); Assert.Equal((sbyte)0, preview.ColumnView[6].Values[0]); Assert.Equal((sbyte)1, preview.ColumnView[6].Values[1]); Assert.Equal("Short", preview.ColumnView[7].Column.Name); Assert.Equal((short)0, preview.ColumnView[7].Values[0]); Assert.Equal((short)1, preview.ColumnView[7].Values[1]); Assert.Equal("Uint", preview.ColumnView[8].Column.Name); Assert.Equal((uint)0, preview.ColumnView[8].Values[0]); Assert.Equal((uint)1, preview.ColumnView[8].Values[1]); Assert.Equal("Ulong", preview.ColumnView[9].Column.Name); Assert.Equal((ulong)0, preview.ColumnView[9].Values[0]); Assert.Equal((ulong)1, preview.ColumnView[9].Values[1]); Assert.Equal("Ushort", preview.ColumnView[10].Column.Name); Assert.Equal((ushort)0, preview.ColumnView[10].Values[0]); Assert.Equal((ushort)1, preview.ColumnView[10].Values[1]); Assert.Equal("String", preview.ColumnView[11].Column.Name); Assert.Equal("0".ToString(), preview.ColumnView[11].Values[0].ToString()); Assert.Equal("1".ToString(), preview.ColumnView[11].Values[1].ToString()); Assert.Equal("Char", preview.ColumnView[12].Column.Name); Assert.Equal((ushort)65, preview.ColumnView[12].Values[0]); Assert.Equal((ushort)66, preview.ColumnView[12].Values[1]); Assert.Equal("Bool", preview.ColumnView[13].Column.Name); Assert.Equal(true, preview.ColumnView[13].Values[0]); Assert.Equal(false, preview.ColumnView[13].Values[1]); Assert.Equal("ArrowString", preview.ColumnView[14].Column.Name); Assert.Equal("foo".ToString(), preview.ColumnView[14].Values[0].ToString()); Assert.Equal("foo".ToString(), preview.ColumnView[14].Values[1].ToString()); }
public void TestIDataViewWithNulls() { int length = 10; IDataView dataView = DataFrameTests.MakeDataFrameWithAllColumnTypes(length, withNulls: true); DataDebuggerPreview preview = dataView.Preview(); Assert.Equal(length, preview.RowView.Length); Assert.Equal(15, preview.ColumnView.Length); Assert.Equal("Byte", preview.ColumnView[0].Column.Name); Assert.Equal((byte)0, preview.ColumnView[0].Values[0]); Assert.Equal((byte)1, preview.ColumnView[0].Values[1]); Assert.Equal((byte)4, preview.ColumnView[0].Values[4]); Assert.Equal((byte)0, preview.ColumnView[0].Values[5]); // null row Assert.Equal((byte)6, preview.ColumnView[0].Values[6]); Assert.Equal("Decimal", preview.ColumnView[1].Column.Name); Assert.Equal((double)0, preview.ColumnView[1].Values[0]); Assert.Equal((double)1, preview.ColumnView[1].Values[1]); Assert.Equal((double)4, preview.ColumnView[1].Values[4]); Assert.Equal(double.NaN, preview.ColumnView[1].Values[5]); // null row Assert.Equal((double)6, preview.ColumnView[1].Values[6]); Assert.Equal("Double", preview.ColumnView[2].Column.Name); Assert.Equal((double)0, preview.ColumnView[2].Values[0]); Assert.Equal((double)1, preview.ColumnView[2].Values[1]); Assert.Equal((double)4, preview.ColumnView[2].Values[4]); Assert.Equal(double.NaN, preview.ColumnView[2].Values[5]); // null row Assert.Equal((double)6, preview.ColumnView[2].Values[6]); Assert.Equal("Float", preview.ColumnView[3].Column.Name); Assert.Equal((float)0, preview.ColumnView[3].Values[0]); Assert.Equal((float)1, preview.ColumnView[3].Values[1]); Assert.Equal((float)4, preview.ColumnView[3].Values[4]); Assert.Equal(float.NaN, preview.ColumnView[3].Values[5]); // null row Assert.Equal((float)6, preview.ColumnView[3].Values[6]); Assert.Equal("Int", preview.ColumnView[4].Column.Name); Assert.Equal((int)0, preview.ColumnView[4].Values[0]); Assert.Equal((int)1, preview.ColumnView[4].Values[1]); Assert.Equal((int)4, preview.ColumnView[4].Values[4]); Assert.Equal((int)0, preview.ColumnView[4].Values[5]); // null row Assert.Equal((int)6, preview.ColumnView[4].Values[6]); Assert.Equal("Long", preview.ColumnView[5].Column.Name); Assert.Equal((long)0, preview.ColumnView[5].Values[0]); Assert.Equal((long)1, preview.ColumnView[5].Values[1]); Assert.Equal((long)4, preview.ColumnView[5].Values[4]); Assert.Equal((long)0, preview.ColumnView[5].Values[5]); // null row Assert.Equal((long)6, preview.ColumnView[5].Values[6]); Assert.Equal("Sbyte", preview.ColumnView[6].Column.Name); Assert.Equal((sbyte)0, preview.ColumnView[6].Values[0]); Assert.Equal((sbyte)1, preview.ColumnView[6].Values[1]); Assert.Equal((sbyte)4, preview.ColumnView[6].Values[4]); Assert.Equal((sbyte)0, preview.ColumnView[6].Values[5]); // null row Assert.Equal((sbyte)6, preview.ColumnView[6].Values[6]); Assert.Equal("Short", preview.ColumnView[7].Column.Name); Assert.Equal((short)0, preview.ColumnView[7].Values[0]); Assert.Equal((short)1, preview.ColumnView[7].Values[1]); Assert.Equal((short)4, preview.ColumnView[7].Values[4]); Assert.Equal((short)0, preview.ColumnView[7].Values[5]); // null row Assert.Equal((short)6, preview.ColumnView[7].Values[6]); Assert.Equal("Uint", preview.ColumnView[8].Column.Name); Assert.Equal((uint)0, preview.ColumnView[8].Values[0]); Assert.Equal((uint)1, preview.ColumnView[8].Values[1]); Assert.Equal((uint)4, preview.ColumnView[8].Values[4]); Assert.Equal((uint)0, preview.ColumnView[8].Values[5]); // null row Assert.Equal((uint)6, preview.ColumnView[8].Values[6]); Assert.Equal("Ulong", preview.ColumnView[9].Column.Name); Assert.Equal((ulong)0, preview.ColumnView[9].Values[0]); Assert.Equal((ulong)1, preview.ColumnView[9].Values[1]); Assert.Equal((ulong)4, preview.ColumnView[9].Values[4]); Assert.Equal((ulong)0, preview.ColumnView[9].Values[5]); // null row Assert.Equal((ulong)6, preview.ColumnView[9].Values[6]); Assert.Equal("Ushort", preview.ColumnView[10].Column.Name); Assert.Equal((ushort)0, preview.ColumnView[10].Values[0]); Assert.Equal((ushort)1, preview.ColumnView[10].Values[1]); Assert.Equal((ushort)4, preview.ColumnView[10].Values[4]); Assert.Equal((ushort)0, preview.ColumnView[10].Values[5]); // null row Assert.Equal((ushort)6, preview.ColumnView[10].Values[6]); Assert.Equal("String", preview.ColumnView[11].Column.Name); Assert.Equal("0", preview.ColumnView[11].Values[0].ToString()); Assert.Equal("1", preview.ColumnView[11].Values[1].ToString()); Assert.Equal("4", preview.ColumnView[11].Values[4].ToString()); Assert.Equal("", preview.ColumnView[11].Values[5].ToString()); // null row Assert.Equal("6", preview.ColumnView[11].Values[6].ToString()); Assert.Equal("Char", preview.ColumnView[12].Column.Name); Assert.Equal((ushort)65, preview.ColumnView[12].Values[0]); Assert.Equal((ushort)66, preview.ColumnView[12].Values[1]); Assert.Equal((ushort)69, preview.ColumnView[12].Values[4]); Assert.Equal((ushort)0, preview.ColumnView[12].Values[5]); // null row Assert.Equal((ushort)71, preview.ColumnView[12].Values[6]); Assert.Equal("Bool", preview.ColumnView[13].Column.Name); Assert.Equal(true, preview.ColumnView[13].Values[0]); Assert.Equal(false, preview.ColumnView[13].Values[1]); Assert.Equal(true, preview.ColumnView[13].Values[4]); Assert.Equal(false, preview.ColumnView[13].Values[5]); // null row Assert.Equal(true, preview.ColumnView[13].Values[6]); Assert.Equal("ArrowString", preview.ColumnView[14].Column.Name); Assert.Equal("foo", preview.ColumnView[14].Values[0].ToString()); Assert.Equal("foo", preview.ColumnView[14].Values[1].ToString()); Assert.Equal("foo", preview.ColumnView[14].Values[4].ToString()); Assert.Equal("", preview.ColumnView[14].Values[5].ToString()); // null row Assert.Equal("foo", preview.ColumnView[14].Values[6].ToString()); }