private void CreateParquetFile(ResizableBuffer buffer) { using (var output = new BufferOutputStream(buffer)) using (var fileWriter = new ParquetFileWriter(output, CreateFloatColumns(), keyValueMetadata: _keyValueProperties)) { using var rowGroupWriter = fileWriter.AppendRowGroup(); using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>()) { for (int i = 0; i != _dates.Length; ++i) { dateTimeWriter.WriteBatch(Enumerable.Repeat(_dates[i], _objectIds.Length).ToArray()); } } using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>()) { for (int i = 0; i != _dates.Length; ++i) { objectIdWriter.WriteBatch(_objectIds); } } using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>()) { for (int i = 0; i != _dates.Length; ++i) { valueWriter.WriteBatch(_values[i]); } } fileWriter.Close(); } }
public static void TestBufferOutputStreamFinish() { var expected = Enumerable.Range(0, 100).ToArray(); using var outStream = new BufferOutputStream(); // Write out a single column using (var fileWriter = new ParquetFileWriter(outStream, new Column[] { new Column <int>("int_field") })) { using (var rowGroupWriter = fileWriter.AppendRowGroup()) { using var colWriter = rowGroupWriter.NextColumn().LogicalWriter <int>(); colWriter.WriteBatch(expected); } fileWriter.Close(); } // Read it back using var buffer = outStream.Finish(); using var inStream = new BufferReader(buffer); using var fileReader = new ParquetFileReader(inStream); using var rowGroup = fileReader.RowGroup(0); using var columnReader = rowGroup.Column(0).LogicalReader <int>(); var allData = columnReader.ReadAll((int)rowGroup.MetaData.NumRows); Assert.AreEqual(expected, allData); }
public static void TestFileHandleHasBeenReleased() { var exception = Assert.Throws <InvalidCastException>(() => { try { using (var writer = new ParquetFileWriter("file.parquet", new Column[] { new Column <int>("ids") })) { using var groupWriter = writer.AppendRowGroup(); using var columnWriter = groupWriter.NextColumn().LogicalWriter <int>(); columnWriter.WriteBatch(new[] { 1, 2, 3 }); writer.Close(); } // Open with the wrong logical reader type on purpose. using var reader = new ParquetFileReader("file.parquet"); using var groupReader = reader.RowGroup(0); using var columnReader = groupReader.Column(0).LogicalReader <float>(); Assert.AreEqual(new[] { 1, 2, 3 }, columnReader.ReadAll(3)); } finally { // This will throw on Windows if the file handle has not been released. File.Delete("file.parquet"); } }); StringAssert.StartsWith("Unable to cast object of type", exception?.Message); }
public void DecryptParquetFileCorrectly() { using Stream inputFile = File.OpenRead("ResourceFiles\\ciphertext.parquet"); using Stream outputFile = File.OpenWrite($"ResourceFiles\\{nameof(DecryptParquetFileCorrectly)}_out.parquet"); using ParquetFileReader reader = new ParquetFileReader(inputFile); reader.RegisterKeyStoreProviders( new Dictionary <string, EncryptionKeyStoreProvider> { [azureKeyProvider.ProviderName] = azureKeyProvider } ); var writerSettings = reader.FileEncryptionSettings .Select(s => (FileEncryptionSettings)s.Clone()) .ToList(); var targetColumnTypes = reader.FileEncryptionSettings .Select(s => s.GetSerializer().GetGenericType()) .ToList(); writerSettings[0] = Create(targetColumnTypes[0], dataEncryptionKey, EncryptionType.Plaintext, GetSerializer(targetColumnTypes[0])); writerSettings[3] = Create(targetColumnTypes[3], dataEncryptionKey, EncryptionType.Plaintext, GetSerializer(targetColumnTypes[3])); writerSettings[10] = Create(targetColumnTypes[10], dataEncryptionKey, EncryptionType.Plaintext, GetSerializer(targetColumnTypes[10])); using ParquetFileWriter writer = new ParquetFileWriter(outputFile, writerSettings); ColumnarCryptographer cryptographer = new ColumnarCryptographer(reader, writer); cryptographer.Transform(); }
public static void TestHasNext() { const int numRows = 5; var schemaColumns = new Column[] { new Column <int>("int32_field") }; var values = Enumerable.Range(0, numRows).ToArray(); using var buffer = new ResizableBuffer(); using (var outStream = new BufferOutputStream(buffer)) { using var writer = new ParquetFileWriter(outStream, schemaColumns); using var rowGroupWriter = writer.AppendRowGroup(); using var colWriter = (ColumnWriter <int>)rowGroupWriter.NextColumn(); colWriter.WriteBatch(values); writer.Close(); } // Read back the columns and make sure they match. using var inStream = new BufferReader(buffer); using var fileReader = new ParquetFileReader(inStream); using var rowGroupReader = fileReader.RowGroup(0); using var column = (ColumnReader <int>)rowGroupReader.Column(0); var read = new int[1024]; column.ReadBatch(1024, read, out var numValues); Assert.AreEqual(numValues, numRows); Assert.AreEqual(values, read.AsSpan(0, numRows).ToArray()); Assert.IsFalse(column.HasNext); }
public DecimalRead() { Console.WriteLine("Writing data..."); var timer = Stopwatch.StartNew(); var rand = new Random(123); _values = Enumerable.Range(0, 1_000_000).Select(i => { var n = rand.Next(); var sign = rand.NextDouble() < 0.5 ? -1M : +1M; return(sign * ((decimal)n * n * n) / 1000M); }).ToArray(); using (var fileWriter = new ParquetFileWriter(Filename, new Column[] { new Column <decimal>("Value", LogicalType.Decimal(precision: 29, scale: 3)) })) { using var rowGroupWriter = fileWriter.AppendRowGroup(); using var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <decimal>(); valueWriter.WriteBatch(_values); fileWriter.Close(); } Console.WriteLine("Wrote {0:N0} rows in {1:N2} sec", _values.Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); }
private ParquetRowWriter(ParquetFileWriter parquetFileWriter, WriteAction writeAction) { _parquetFileWriter = parquetFileWriter; _rowGroupWriter = _parquetFileWriter.AppendRowGroup(); _writeAction = writeAction; _rows = new TTuple[1024]; }
public static void TestWriteLongString() { const int numStrings = 100; // Generate lots of digits of 0.1234567891011121131415... var strings = Enumerable.Range(0, numStrings).Select(i => "0." + string.Join("", Enumerable.Range(1, 3500).Select(j => j.ToString())) + "...").ToArray(); using var buffer = new ResizableBuffer(); using (var outStream = new BufferOutputStream(buffer)) { using var fileWriter = new ParquetFileWriter(outStream, new Column[] { new Column <string>("Name") }); using var groupWriter = fileWriter.AppendRowGroup(); using var columnWriter = groupWriter.NextColumn().LogicalWriter <string>(); // Strings to byte arrays memory pooling is done by the ByteBuffer class. // If something is fishy there (e.g. bad memory ownership wrt the GC), // we expect to see consequences here if we write enough strings. // It's not bullet proof, but it has found a few issues. columnWriter.WriteBatch(strings); fileWriter.Close(); } using var inStream = new BufferReader(buffer); using var fileReader = new ParquetFileReader(inStream); using var groupReader = fileReader.RowGroup(0); using var columnReader = groupReader.Column(0).LogicalReader <string>(); Assert.AreEqual(strings, columnReader.ReadAll(numStrings)); }
public static void TestReadExeption() { var expected = Enumerable.Range(0, 1024 * 1024).ToArray(); var exception = Assert.Throws <ParquetException>(() => { using var buffer = new ErroneousReaderStream(); using (var output = new ManagedOutputStream(buffer, leaveOpen: true)) { using var writer = new ParquetFileWriter(output, new Column[] { new Column <int>("ids") }); using var groupWriter = writer.AppendRowGroup(); using var columnWriter = groupWriter.NextColumn().LogicalWriter <int>(); columnWriter.WriteBatch(expected); writer.Close(); } buffer.Seek(0, SeekOrigin.Begin); using var input = new ManagedRandomAccessFile(buffer); using (new ParquetFileReader(input)) { } }); Assert.That( exception.Message, Contains.Substring("this is an erroneous reader")); }
private static void TestWriteNoColumnNorWriterOverride <TValue, TCustom>(TValue[] expected, TCustom[] written) { using var buffer = new ResizableBuffer(); // Write float values using a custom user-type: // - Provide explicit schema definition that knows nothing about VolumeInDollars, and states that it's a float column. // - Provide a type factory such that Column("values") is known to be of VolumeInDollars, // as we do not explicitly state the expected type when accessing the LogicalColumnWriter. // - Provide a converter factory such that VolumeInDollars values can be written as floats. // - Do not explicitly override the expected type when accessing the LogicalColumnWriter. using (var output = new BufferOutputStream(buffer)) { using var schema = Column.CreateSchemaNode(new Column[] { new Column <TValue>("values") }); using var writerProperties = CreateWriterProperties(); using var fileWriter = new ParquetFileWriter(output, schema, writerProperties) { LogicalTypeFactory = new WriteTypeFactoryNoOverride(), LogicalWriteConverterFactory = new WriteConverterFactory() }; using var groupWriter = fileWriter.AppendRowGroup(); using var columnWriter = groupWriter.NextColumn().LogicalWriter <TCustom>(); columnWriter.WriteBatch(written); fileWriter.Close(); } CheckWrittenValues(buffer, expected); }
public static void TestWriteBatchWithNullOptionalField() { using (var buffer = new ResizableBuffer()) { using (var outStream = new BufferOutputStream(buffer)) using (var writer = new ParquetFileWriter(outStream, new Column[] { new Column <int?>("int32?") })) using (var rowGroupWriter = writer.AppendRowGroup()) using (var colWriter = (ColumnWriter <int>)rowGroupWriter.NextColumn()) { var defLevels = new short[] { 1, 0, 1 }; var values = new[] { 1, 2 }; colWriter.WriteBatch(defLevels.Length, defLevels, null, values); } using (var inStream = new BufferReader(buffer)) using (var reader = new ParquetFileReader(inStream)) using (var rowGroupReader = reader.RowGroup(0)) using (var colReader = rowGroupReader.Column(0).LogicalReader <int?>()) { var results = new int?[3]; colReader.ReadBatch(results, 0, 3); Assert.AreEqual(new int?[] { 1, null, 2 }, results); } } }
public static void TestFileHandleHasBeenReleased() { var exception = Assert.Throws <InvalidCastException>(() => { try { using (var writer = new ParquetFileWriter("file.parquet", new Column[] { new Column <int>("ids") })) using (var group = writer.AppendRowGroup()) using (var column = group.NextColumn().LogicalWriter <int>()) { column.WriteBatch(new[] { 1, 2, 3 }); } // Open with the wrong logical reader type on purpose. using (var reader = new ParquetFileReader("file.parquet")) using (var group = reader.RowGroup(0)) using (var column = group.Column(0).LogicalReader <float>()) { Assert.AreEqual(new[] { 1, 2, 3 }, column.ReadAll(3)); } } finally { // This will throw on Windows if the file handle has not been released. File.Delete("file.parquet"); } }); Assert.AreEqual( "Unable to cast object of type " + "'ParquetSharp.LogicalColumnReader`3[System.Int32,System.Int32,System.Int32]'" + " to type 'ParquetSharp.LogicalColumnReader`1[System.Single]'.", exception.Message); }
private static void TestRoundTrip(ExpectedColumn[] expectedColumns, bool useDictionaryEncoding) { var schema = CreateSchema(expectedColumns); var writerProperties = CreateWriterProperties(expectedColumns, useDictionaryEncoding); var keyValueMetadata = new Dictionary <string, string> { { "case", "Test" }, { "Awesome", "true" } }; using var buffer = new ResizableBuffer(); // Write our expected columns to the parquet in-memory file. using (var outStream = new BufferOutputStream(buffer)) { using var fileWriter = new ParquetFileWriter(outStream, schema, writerProperties, keyValueMetadata); using var rowGroupWriter = fileWriter.AppendRowGroup(); foreach (var column in expectedColumns) { Console.WriteLine("Writing '{0}'", column.Name); using var columnWriter = rowGroupWriter.NextColumn(); columnWriter.Apply(new ValueSetter(column.Values)); } fileWriter.Close(); } // Read back the columns and make sure they match. AssertReadRoundtrip(buffer, expectedColumns, useDictionaryEncoding); }
public static void TestArrayOfEmptyStringArraysRoundtrip() { var expected = new[] { new string[] { }, new string[] { }, new string[] { }, new string[] { } }; using var buffer = new ResizableBuffer(); using (var outStream = new BufferOutputStream(buffer)) { using var fileWriter = new ParquetFileWriter(outStream, new Column[] { new Column <string[]>("a") }); using (var rowGroupWriter = fileWriter.AppendRowGroup()) { using var colWriter = rowGroupWriter.NextColumn().LogicalWriter <string[]>(); colWriter.WriteBatch(expected); } fileWriter.Close(); } using var inStream = new BufferReader(buffer); using var fileReader = new ParquetFileReader(inStream); using var rowGroup = fileReader.RowGroup(0); using var columnReader = rowGroup.Column(0).LogicalReader <string[]>(); Assert.AreEqual(4, rowGroup.MetaData.NumRows); var allData = columnReader.ReadAll(4); Assert.AreEqual(expected, allData); }
public static void Demo() { // open input and output file streams Stream inputFile = File.OpenRead(".\\ResourceFiles\\userdata1.parquet"); Stream outputFile = File.OpenWrite(".\\ResourceFiles\\out1.parquet"); // Create reader using ParquetFileReader reader = new ParquetFileReader(inputFile); // Copy source settings as target settings List <FileEncryptionSettings> writerSettings = reader.FileEncryptionSettings .Select(s => Copy(s)) .ToList(); // Modify a few column settings writerSettings[0] = new FileEncryptionSettings <DateTimeOffset?>(encryptionKey, SqlSerializerFactory.Default.GetDefaultSerializer <DateTimeOffset?>()); writerSettings[3] = new FileEncryptionSettings <string>(encryptionKey, EncryptionType.Deterministic, new SqlVarcharSerializer(size: 255)); writerSettings[10] = new FileEncryptionSettings <double?>(encryptionKey, StandardSerializerFactory.Default.GetDefaultSerializer <double?>()); // Create and pass the target settings to the writer using ParquetFileWriter writer = new ParquetFileWriter(outputFile, writerSettings); // Process the file ColumnarCryptographer cryptographer = new ColumnarCryptographer(reader, writer); cryptographer.Transform(); Console.Clear(); }
public static void TestInMemoryRoundTrip() { var expected = Enumerable.Range(0, 1024 * 1024).ToArray(); using var buffer = new MemoryStream(); // Write test data. using (var output = new ManagedOutputStream(buffer, leaveOpen: true)) { using var writer = new ParquetFileWriter(output, new Column[] { new Column <int>("ids") }); using var groupWriter = writer.AppendRowGroup(); using var columnWriter = groupWriter.NextColumn().LogicalWriter <int>(); columnWriter.WriteBatch(expected); writer.Close(); } // Seek back to start. buffer.Seek(0, SeekOrigin.Begin); // Read test data. using var input = new ManagedRandomAccessFile(buffer, leaveOpen: true); using var reader = new ParquetFileReader(input); using var groupReader = reader.RowGroup(0); using var columnReader = groupReader.Column(0).LogicalReader <int>(); Assert.AreEqual(expected, columnReader.ReadAll(expected.Length)); }
public static void TestFileStreamRoundTrip() { try { using (var output = new ManagedOutputStream(File.OpenWrite("file.parquet"))) { using var writer = new ParquetFileWriter(output, new Column[] { new Column <int>("ids") }); using var groupWriter = writer.AppendRowGroup(); using var columnWriter = groupWriter.NextColumn().LogicalWriter <int>(); columnWriter.WriteBatch(new[] { 1, 2, 3 }); writer.Close(); } using var input = new ManagedRandomAccessFile(File.OpenRead("file.parquet")); using var reader = new ParquetFileReader(input); using var groupReader = reader.RowGroup(0); using var columnReader = groupReader.Column(0).LogicalReader <int>(); Assert.AreEqual(new[] { 1, 2, 3 }, columnReader.ReadAll(3)); } finally { File.Delete("file.parquet"); } }
public static unsafe void TestParquetReadFromBuffer() { var expected = Enumerable.Range(0, 100).ToArray(); // Write out a single column byte[] parquetFileBytes; using (var outBuffer = new ResizableBuffer()) { using (var outStream = new BufferOutputStream(outBuffer)) using (var fileWriter = new ParquetFileWriter(outStream, new Column[] { new Column <int>("int_field") })) using (var rowGroupWriter = fileWriter.AppendRowGroup()) using (var colWriter = rowGroupWriter.NextColumn().LogicalWriter <int>()) { colWriter.WriteBatch(expected); } parquetFileBytes = outBuffer.ToArray(); } // Read it back fixed(byte *fixedBytes = parquetFileBytes) using (var buffer = new IO.Buffer(new IntPtr(fixedBytes), parquetFileBytes.Length)) using (var inStream = new BufferReader(buffer)) using (var fileReader = new ParquetFileReader(inStream)) using (var rowGroup = fileReader.RowGroup(0)) using (var columnReader = rowGroup.Column(0).LogicalReader <int>()) { var allData = columnReader.ReadAll((int)rowGroup.MetaData.NumRows); Assert.AreEqual(expected, allData); } }
public static void TestAgainstThirdParty() { var columns = new Column[] { new Column <decimal>("Decimal", LogicalType.Decimal(precision: 29, scale: 3)) }; var values = Enumerable.Range(0, 10_000) .Select(i => ((decimal)i * i * i) / 1000 - 10) .Concat(new [] { decimal.MinValue / 1000, decimal.MaxValue / 1000 }) .ToArray(); using var buffer = new ResizableBuffer(); // Write using ParquetSharp using (var outStream = new BufferOutputStream(buffer)) { using var fileWriter = new ParquetFileWriter(outStream, columns, Compression.Snappy); using var rowGroupWriter = fileWriter.AppendRowGroup(); using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter <decimal>(); columnWriter.WriteBatch(values); fileWriter.Close(); } // Read using Parquet.NET using var memoryStream = new MemoryStream(buffer.ToArray()); using var fileReader = new ParquetReader(memoryStream); using var rowGroupReader = fileReader.OpenRowGroupReader(0); var read = (decimal[])rowGroupReader.ReadColumn(fileReader.Schema.GetDataFields()[0]).Data; Assert.AreEqual(values, read); }
private void ParquetImpl(ParquetFileWriter fileWriter) { using var rowGroupWriter = fileWriter.AppendRowGroup(); using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>()) { for (int i = 0; i != _dates.Length; ++i) { dateTimeWriter.WriteBatch(Enumerable.Repeat(_dates[i], _objectIds.Length).ToArray()); } } using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>()) { for (int i = 0; i != _dates.Length; ++i) { objectIdWriter.WriteBatch(_objectIds); } } using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>()) { for (int i = 0; i != _dates.Length; ++i) { valueWriter.WriteBatch(_values[i]); } } fileWriter.Close(); }
public static void TestReadWriteParquetMultipleTasks() { void WriteFile() { var schema = new Column[] { new Column <DateTime>("Col1"), new Column <int>("Col2"), new Column <float>("Col3") }; const int numRowGroups = 7; const int rowsPerRowGroup = 21; var data = Enumerable.Range(0, rowsPerRowGroup).ToArray(); using (var writer1 = new ParquetFileWriter(Task.CurrentId + ".parquet", schema)) { for (var i = 0; i < numRowGroups; i++) { using var rg1 = writer1.AppendRowGroup(); using (var col1Rg1 = rg1.NextColumn().LogicalWriter <DateTime>()) { col1Rg1.WriteBatch(data.Select(n => new DateTime(2012, 1, 1).AddDays(n)).ToArray()); } using (var col1Rg1 = rg1.NextColumn().LogicalWriter <int>()) { col1Rg1.WriteBatch(data); } using (var col1Rg1 = rg1.NextColumn().LogicalWriter <float>()) { col1Rg1.WriteBatch(data.Select(n => n + 0.1f).ToArray()); } } writer1.Close(); } File.Delete(Task.CurrentId + ".parquet"); Console.WriteLine(Task.CurrentId + " completed."); } const int numThreads = 14; const int numRuns = 30000; var running = new Task[numRuns]; ThreadPool.SetMaxThreads(numThreads, numThreads); foreach (var i in Enumerable.Range(0, numRuns)) { running[i] = Task.Factory.StartNew(WriteFile, CancellationToken.None); } Task.WaitAll(running); }
public long Parquet() { using (var fileWriter = new ParquetFileWriter("float_timeseries.parquet", CreateFloatColumns())) { ParquetImpl(fileWriter); } return(new FileInfo("float_timeseries.parquet").Length); }
public long ParquetStream() { using (var stream = new FileStream("float_timeseries.parquet.stream", FileMode.Create)) { using var writer = new IO.ManagedOutputStream(stream); using var fileWriter = new ParquetFileWriter(writer, CreateFloatColumns()); ParquetImpl(fileWriter); } return(new FileInfo("float_timeseries.parquet.stream").Length); }
public static void TestWriteNoTypeFactory() { // Test that we cannot create a writer using a custom type without providing a factory. using var buffer = new ResizableBuffer(); using var output = new BufferOutputStream(buffer); var exception = Assert.Throws <ArgumentException>(() => { using var fileWriter = new ParquetFileWriter(output, new Column[] { new Column <VolumeInDollars>("value") }); }); StringAssert.StartsWith("unsupported logical type", exception?.Message); }
public static void TestDisposeExceptionSafety_ParquetFileWriter() { var exception = Assert.Throws <Exception>(() => { using var buffer = new ResizableBuffer(); using var outStream = new BufferOutputStream(buffer); using var fileWriter = new ParquetFileWriter(outStream, new Column[] { new Column <int>("Index"), new Column <float>("Value") }); throw new Exception("this is the expected message"); }); Assert.That(exception.Message, Contains.Substring("this is the expected message")); }
public static void TestSkip() { const int numRows = 11; var schemaColumns = new Column[] { new Column <int>("int32_field") }; var values = Enumerable.Range(0, numRows).ToArray(); using var buffer = new ResizableBuffer(); using (var outStream = new BufferOutputStream(buffer)) { using var writer = new ParquetFileWriter(outStream, schemaColumns); using (var rowGroupWriter = writer.AppendRowGroup()) { var colWriter = (ColumnWriter <int>)rowGroupWriter.NextColumn(); colWriter.WriteBatch(numRows, values); } writer.Close(); } using var inStream = new BufferReader(buffer); using var fileReader = new ParquetFileReader(inStream); using var rowGroupReader = fileReader.RowGroup(0); // Read back the columns after skipping numRows and make sure the values are what we expect. using (var column = rowGroupReader.Column(0)) { const int numToSkip = 5; var skipped = column.Skip(numToSkip); Assert.AreEqual(numToSkip, skipped); var read = new int[1024]; ((ColumnReader <int>)column).ReadBatch(1024, read, out var numValues); Assert.AreEqual(numValues, numRows - numToSkip); Assert.AreEqual(values.AsSpan(numToSkip).ToArray(), read.AsSpan(0, numRows - numToSkip).ToArray()); } // Check skipped is bound to the maximum number of rows. using (var column = rowGroupReader.Column(0)) { var skipped = column.Skip(1024); Assert.AreEqual(numRows, skipped); Assert.IsFalse(column.HasNext); } }
public static void TestByteStreamSplitEncoding() { const int numRows = 10230; var ids = Enumerable.Range(0, numRows).ToArray(); var values = ids.Select(i => i / 3.14f).ToArray(); using var buffer = new ResizableBuffer(); using (var output = new BufferOutputStream(buffer)) { var columns = new Column[] { new Column <int>("id"), new Column <float>("value") }; var p = new WriterPropertiesBuilder() .Compression(Compression.Lz4) .DisableDictionary("value") .Encoding("value", Encoding.ByteStreamSplit) .Build(); using var fileWriter = new ParquetFileWriter(output, columns, p); using var groupWriter = fileWriter.AppendRowGroup(); using var idWriter = groupWriter.NextColumn().LogicalWriter <int>(); idWriter.WriteBatch(ids); using var valueWriter = groupWriter.NextColumn().LogicalWriter <float>(); valueWriter.WriteBatch(values); fileWriter.Close(); } using var input = new BufferReader(buffer); using var fileReader = new ParquetFileReader(input); using var groupReader = fileReader.RowGroup(0); using var metadataId = groupReader.MetaData.GetColumnChunkMetaData(0); using var metadataValue = groupReader.MetaData.GetColumnChunkMetaData(1); Assert.AreEqual(new[] { Encoding.PlainDictionary, Encoding.Plain, Encoding.Rle }, metadataId.Encodings); Assert.AreEqual(new[] { Encoding.ByteStreamSplit, Encoding.Rle }, metadataValue.Encodings); using var idReader = groupReader.Column(0).LogicalReader <int>(); using var valueReader = groupReader.Column(1).LogicalReader <float>(); Assert.AreEqual(ids, idReader.ReadAll(numRows)); Assert.AreEqual(values, valueReader.ReadAll(numRows)); }
public static void TestDisposedAccess() { using var buffer = new ResizableBuffer(); // Write our expected columns to the parquet in-memory file. using var outStream = new BufferOutputStream(buffer); using var fileWriter = new ParquetFileWriter(outStream, new Column[] { new Column <int>("Index") }); fileWriter.Dispose(); var exception = Assert.Throws <NullReferenceException>(() => fileWriter.AppendRowGroup()); Assert.AreEqual("null native handle", exception.Message); }
public long ParquetSharp() { using (var fileWriter = new ParquetFileWriter("decimal_timeseries.parquet", new Column[] { new Column <decimal>("Value", LogicalType.Decimal(precision: 29, scale: 3)) })) { using var rowGroupWriter = fileWriter.AppendRowGroup(); using var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <decimal>(); valueWriter.WriteBatch(_values); fileWriter.Close(); } return(new FileInfo("decimal_timeseries.parquet").Length); }
public static void TestWriteNoConverterFactory() { // Test that we cannot writer values using a custom type without providing a factory. using var buffer = new ResizableBuffer(); using var output = new BufferOutputStream(buffer); using var schema = Column.CreateSchemaNode(new Column[] { new Column <float>("values") }); using var writerProperties = CreateWriterProperties(); using var fileWriter = new ParquetFileWriter(output, schema, writerProperties); using var groupWriter = fileWriter.AppendRowGroup(); var exception = Assert.Throws <NotSupportedException>(() => groupWriter.NextColumn().LogicalWriterOverride <VolumeInDollars>()); StringAssert.StartsWith("unsupported logical system type", exception?.Message); }