public void TestEmptyDataFrameRecordBatch() { PrimitiveColumn <int> ageColumn = new PrimitiveColumn <int>("Age"); PrimitiveColumn <int> lengthColumn = new PrimitiveColumn <int>("CharCount"); ArrowStringColumn stringColumn = new ArrowStringColumn("Empty"); DataFrame df = new DataFrame(new List <BaseColumn>() { ageColumn, lengthColumn, stringColumn }); IEnumerable <RecordBatch> recordBatches = df.AsArrowRecordBatches(); bool foundARecordBatch = false; foreach (RecordBatch recordBatch in recordBatches) { foundARecordBatch = true; MemoryStream stream = new MemoryStream(); ArrowStreamWriter writer = new ArrowStreamWriter(stream, recordBatch.Schema); writer.WriteRecordBatchAsync(recordBatch).GetAwaiter().GetResult(); stream.Position = 0; ArrowStreamReader reader = new ArrowStreamReader(stream); RecordBatch readRecordBatch = reader.ReadNextRecordBatch(); while (readRecordBatch != null) { RecordBatchComparer.CompareBatches(recordBatch, readRecordBatch); readRecordBatch = reader.ReadNextRecordBatch(); } } Assert.True(foundARecordBatch); }
public void TestBasicArrowStringColumn() { StringArray strArray = new StringArray.Builder().Append("foo").Append("bar").Build(); Memory <byte> dataMemory = new byte[] { 102, 111, 111, 98, 97, 114 }; Memory <byte> nullMemory = new byte[] { 0, 0, 0, 0 }; Memory <byte> offsetMemory = new byte[] { 0, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0 }; ArrowStringColumn stringColumn = new ArrowStringColumn("String", dataMemory, offsetMemory, nullMemory, strArray.Length, strArray.NullCount); Assert.Equal(2, stringColumn.Length); Assert.Equal("foo", stringColumn[0]); Assert.Equal("bar", stringColumn[1]); }
public void TestArrowStringColumnClone() { StringArray strArray = new StringArray.Builder().Append("foo").Append("bar").Build(); Memory <byte> dataMemory = new byte[] { 102, 111, 111, 98, 97, 114 }; Memory <byte> nullMemory = new byte[] { 0, 0, 0, 0 }; Memory <byte> offsetMemory = new byte[] { 0, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0 }; ArrowStringColumn stringColumn = new ArrowStringColumn("String", dataMemory, offsetMemory, nullMemory, strArray.Length, strArray.NullCount); BaseColumn clone = stringColumn.Clone(numberOfNullsToAppend: 5); Assert.Equal(7, clone.Length); Assert.Equal(stringColumn[0], clone[0]); Assert.Equal(stringColumn[1], clone[1]); }
public void TestArrowStringColumnGetReadOnlyBuffers() { // Test ArrowStringColumn. StringArray strArray = new StringArray.Builder().Append("foo").Append("bar").Build(); Memory <byte> dataMemory = new byte[] { 102, 111, 111, 98, 97, 114 }; Memory <byte> nullMemory = new byte[] { 1 }; Memory <byte> offsetMemory = new byte[] { 0, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0 }; ArrowStringColumn column = new ArrowStringColumn("String", dataMemory, offsetMemory, nullMemory, strArray.Length, strArray.NullCount); IEnumerable <ReadOnlyMemory <byte> > dataBuffers = column.GetReadOnlyDataBuffers(); IEnumerable <ReadOnlyMemory <byte> > nullBitMaps = column.GetReadOnlyNullBitMapBuffers(); IEnumerable <ReadOnlyMemory <int> > offsetsBuffers = column.GetReadOnlyOffsetsBuffers(); using (IEnumerator <ReadOnlyMemory <byte> > bufferEnumerator = dataBuffers.GetEnumerator()) using (IEnumerator <ReadOnlyMemory <int> > offsetsEnumerator = offsetsBuffers.GetEnumerator()) using (IEnumerator <ReadOnlyMemory <byte> > nullBitMapsEnumerator = nullBitMaps.GetEnumerator()) { while (bufferEnumerator.MoveNext() && nullBitMapsEnumerator.MoveNext() && offsetsEnumerator.MoveNext()) { ReadOnlyMemory <byte> dataBuffer = bufferEnumerator.Current; ReadOnlyMemory <byte> nullBitMap = nullBitMapsEnumerator.Current; ReadOnlyMemory <int> offsets = offsetsEnumerator.Current; ReadOnlySpan <byte> dataSpan = dataBuffer.Span; ReadOnlySpan <int> offsetsSpan = offsets.Span; int dataStart = 0; for (int j = 1; j < offsetsSpan.Length; j++) { int length = offsetsSpan[j] - offsetsSpan[j - 1]; ReadOnlySpan <byte> str = dataSpan.Slice(dataStart, length); ReadOnlySpan <byte> columnStr = dataMemory.Span.Slice(dataStart, length); Assert.Equal(str.Length, columnStr.Length); for (int s = 0; s < str.Length; s++) { Assert.Equal(str[s], columnStr[s]); } dataStart = length; } } } }
public void TestArrowStringColumnWithNulls() { string data = "joemark"; byte[] bytes = Encoding.UTF8.GetBytes(data); Memory <byte> dataMemory = new Memory <byte>(bytes); Memory <byte> nullMemory = new byte[] { 0b1101 }; Memory <byte> offsetMemory = new byte[] { 0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0 }; ArrowStringColumn stringColumn = new ArrowStringColumn("String", dataMemory, offsetMemory, nullMemory, 4, 1); Assert.Equal(4, stringColumn.Length); Assert.Equal("joe", stringColumn[0]); Assert.Null(stringColumn[1]); Assert.Equal("mark", stringColumn[2]); Assert.Equal("", stringColumn[3]); List <string> ret = stringColumn[0, 4]; Assert.Equal("joe", ret[0]); Assert.Null(ret[1]); Assert.Equal("mark", ret[2]); Assert.Equal("", ret[3]); }
public void TestIEnumerable() { DataFrame df = MakeDataFrameWithAllColumnTypes(10); int totalValueCount = 0; for (int i = 0; i < df.ColumnCount; i++) { BaseColumn baseColumn = df.Column(i); foreach (object value in baseColumn) { totalValueCount++; } } Assert.Equal(10 * df.ColumnCount, totalValueCount); // spot check a few column types: StringColumn stringColumn = (StringColumn)df["String"]; StringBuilder actualStrings = new StringBuilder(); foreach (string value in stringColumn) { if (value == null) { actualStrings.Append("<null>"); } else { actualStrings.Append(value); } } Assert.Equal("01234<null>6789", actualStrings.ToString()); ArrowStringColumn arrowStringColumn = (ArrowStringColumn)df["ArrowString"]; actualStrings.Clear(); foreach (string value in arrowStringColumn) { if (value == null) { actualStrings.Append("<null>"); } else { actualStrings.Append(value); } } Assert.Equal("foofoofoofoofoofoofoofoofoofoo", actualStrings.ToString()); PrimitiveColumn <float> floatColumn = (PrimitiveColumn <float>)df["Float"]; actualStrings.Clear(); foreach (float?value in floatColumn) { if (value == null) { actualStrings.Append("<null>"); } else { actualStrings.Append(value); } } Assert.Equal("01234<null>6789", actualStrings.ToString()); PrimitiveColumn <int> intColumn = (PrimitiveColumn <int>)df["Int"]; actualStrings.Clear(); foreach (int?value in intColumn) { if (value == null) { actualStrings.Append("<null>"); } else { actualStrings.Append(value); } } Assert.Equal("01234<null>6789", actualStrings.ToString()); }