public void FileMetadata_sets_num_rows_on_file_and_row_group_multiple_row_groups() { var ms = new MemoryStream(); var id = new DataField <int>("id"); //write using (var writer = new ParquetWriter(new Schema(id), ms)) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new[] { 1, 2, 3, 4 })); } using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new[] { 5, 6 })); } } //read back using (var reader = new ParquetReader(ms)) { Assert.Equal(6, reader.ThriftMetadata.Num_rows); using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0)) { Assert.Equal(4, rg.RowCount); } using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(1)) { Assert.Equal(2, rg.RowCount); } } }
public static Stream GetParquetFileWithThreeRowGroups() { var stream = new MemoryStream(); var schema = SchemaReflector.Reflect <TwoColumn>(); using (var parquetWriter = new ParquetWriter(schema, stream)) { using (var rowGroup = parquetWriter.CreateRowGroup()) { rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[0], new[] { 1, 2, 3, 4 })); rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[1], new[] { "one", "two", "three", "four" })); } using (var rowGroup = parquetWriter.CreateRowGroup()) { rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[0], new[] { 5, 6, 7, 8 })); rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[1], new[] { "five", "six", "seven", "eight" })); } using (var rowGroup = parquetWriter.CreateRowGroup()) { rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[0], new[] { 9, 10, 11, 12 })); rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[1], new[] { "nine", "ten", "eleven", "twelve" })); } } stream.Position = 0; return(stream); }
public void Write_in_small_row_groups() { //write a single file having 3 row groups var id = new DataField <int>("id"); var ms = new MemoryStream(); using (var writer = new ParquetWriter(new Schema(id), ms)) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new int[] { 1 })); } using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new int[] { 2 })); } using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new int[] { 3 })); } } //read the file back and validate ms.Position = 0; using (var reader = new ParquetReader(ms)) { Assert.Equal(3, reader.RowGroupCount); using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0)) { Assert.Equal(1, rg.RowCount); DataColumn dc = rg.ReadColumn(id); Assert.Equal(new int[] { 1 }, dc.Data); } using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(1)) { Assert.Equal(1, rg.RowCount); DataColumn dc = rg.ReadColumn(id); Assert.Equal(new int[] { 2 }, dc.Data); } using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(2)) { Assert.Equal(1, rg.RowCount); DataColumn dc = rg.ReadColumn(id); Assert.Equal(new int[] { 3 }, dc.Data); } } }
public long ParquetDotNet() { { var valueField = new DecimalDataField("Value", precision: 29, scale: 3, hasNulls: false); var schema = new Parquet.Data.Schema(valueField); using var stream = File.Create("decimal_timeseries.parquet.net"); using var parquetWriter = new ParquetWriter(schema, stream); using var groupWriter = parquetWriter.CreateRowGroup(); groupWriter.WriteColumn(new DataColumn(valueField, _values)); } if (Check.Enabled) { // Read content from ParquetSharp and Parquet.NET var baseline = ReadFile("decimal_timeseries.parquet"); var results = ReadFile("decimal_timeseries.parquet.net"); // Prove that the content is the same Check.ArraysAreEqual(_values, baseline); Check.ArraysAreEqual(baseline, results); } return(new FileInfo("decimal_timeseries.parquet.net").Length); }
static void ConvertCsvToParquet(string inputFile, string outputFile) { var data = new Dictionary <string, ArrayList>(); using (var reader = new StreamReader(inputFile, true)) { var header = reader.ReadLine(); var columns = header.Split(","); for (int i = 0; i < columns.Length; i++) { columns[i] = columns[i].Trim(); } while (!reader.EndOfStream) { var line = reader.ReadLine(); if (String.IsNullOrEmpty(line)) { continue; } var parts = line.Split(","); for (int i = 0; i < parts.Length && i < columns.Length; i++) { var column = columns[i]; if (parquet_types.ContainsKey(column)) { if (!data.ContainsKey(column)) { data.Add(column, new ArrayList()); } data[column].Add(ParseValue(parquet_types[column], parts[i])); } } } } var datacolumns = parquet_types.Select( x => new DataColumn(CreateParquetField(x.Key, x.Value), data[x.Key].ToArray(ConvertParquetType(x.Value))) ).ToArray(); var schema = new Schema(datacolumns.Select(x => (Field)x.Field).ToArray()); using (Stream fileStream = System.IO.File.OpenWrite(outputFile)) { using (var parquetWriter = new ParquetWriter(schema, fileStream)) { // create a new row group in the file using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup()) { foreach (var column in datacolumns) { groupWriter.WriteColumn(column); } } } } }
public void WriteIntro() { //create data columns with schema metadata and the data you need var idColumn = new DataColumn( new DataField <int>("id"), new int[] { 1, 2 }); var cityColumn = new DataColumn( new DataField <string>("city"), new string[] { "London", "Derby" }); // create file schema var schema = new Schema(idColumn.Field, cityColumn.Field); using (Stream fileStream = System.IO.File.OpenWrite("c:\\test.parquet")) { using (var parquetWriter = new ParquetWriter(schema, fileStream)) { // create a new row group in the file using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup(2)) { groupWriter.WriteColumn(idColumn); groupWriter.WriteColumn(cityColumn); } } } }
public void Write_read_nullable_column(Array input) { var id = new DataField <int?>("id"); var ms = new MemoryStream(); using (var writer = new ParquetWriter(new Schema(id), ms)) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, input)); } } ms.Position = 0; using (var reader = new ParquetReader(ms)) { Assert.Equal(1, reader.RowGroupCount); using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0)) { Assert.Equal(input.Length, rg.RowCount); Assert.Equal(input, rg.ReadColumn(id).Data); } } }
public void CustomMetadata_can_write_and_read() { var ms = new MemoryStream(); var id = new DataField <int>("id"); //write using (var writer = new ParquetWriter(new Schema(id), ms)) { writer.CustomMetadata = new Dictionary <string, string> { ["key1"] = "value1", ["key2"] = "value2" }; using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new[] { 1, 2, 3, 4 })); } } //read back using (var reader = new ParquetReader(ms)) { Assert.Equal("value1", reader.CustomMetadata["key1"]); Assert.Equal("value2", reader.CustomMetadata["key2"]); } }
public static void WriteAthenaRowsAsParquet(this Stream stream, ResultSetMetadata tableSchema, List <FieldMapping> mappings, IEnumerable <Row> rows) { List <DataColumn> columns = new List <DataColumn>(); int index = 0; foreach (var column in tableSchema.ColumnInfo) { columns.Add(column.ToParquetColumn(mappings, index, rows)); index++; } Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray())); using (ParquetWriter writer = new ParquetWriter(schema, stream)) { writer.CompressionMethod = CompressionMethod.Snappy; using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup()) { foreach (var column in columns) { rowGroupWriter.WriteColumn(column); } } } }
/// <inheritdoc/> public void Write(IList <IColumn> columns) { List <DataColumn> parquetColumns = CreateParquetColumns(columns); List <DataField> parquetFields = parquetColumns.Select(p => p.Field).ToList(); Schema schema = new Schema(parquetFields); using (var parquetWriter = new ParquetWriter(schema, FileStream)) { // TODO - Write is called many times; one for each rowgroup in the file. We do not need to compile // and write metadata many times. Refactor to write metadata only once. CryptoMetadata metadata = CompileMetadata(columns, FileEncryptionSettings); if (!metadata.IsEmpty()) { parquetWriter.CustomMetadata = new Dictionary <string, string> { [nameof(CryptoMetadata)] = JsonConvert.SerializeObject( value: metadata, settings: new JsonSerializerSettings() { NullValueHandling = NullValueHandling.Ignore, Converters = { new StringEnumConverter() }, Formatting = Formatting.Indented }) }; } // create a new row group in the file using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup()) { parquetColumns.ForEach(groupWriter.WriteColumn); } } }
public void Write_multiple_row_groups_to_forward_only_stream() { var ms = new MemoryStream(); var forwardOnly = new WriteableNonSeekableStream(ms); var schema = new Schema( new DataField <int>("id"), new DataField <string>("nonsense")); using (var writer = new ParquetWriter(schema, forwardOnly)) { using (ParquetRowGroupWriter rgw = writer.CreateRowGroup(1)) { rgw.WriteColumn(new DataColumn((DataField)schema[0], new[] { 1 })); rgw.WriteColumn(new DataColumn((DataField)schema[1], new[] { "1" })); } using (ParquetRowGroupWriter rgw = writer.CreateRowGroup(1)) { rgw.WriteColumn(new DataColumn((DataField)schema[0], new[] { 2 })); rgw.WriteColumn(new DataColumn((DataField)schema[1], new[] { "2" })); } } ms.Position = 0; using (var reader = new ParquetReader(ms)) { Assert.Equal(2, reader.RowGroupCount); using (ParquetRowGroupReader rgr = reader.OpenRowGroupReader(0)) { Assert.Equal(1, rgr.RowCount); DataColumn column = rgr.ReadColumn((DataField)schema[0]); Assert.Equal(1, column.Data.GetValue(0)); } using (ParquetRowGroupReader rgr = reader.OpenRowGroupReader(1)) { Assert.Equal(1, rgr.RowCount); DataColumn column = rgr.ReadColumn((DataField)schema[0]); Assert.Equal(2, column.Data.GetValue(0)); } } }
//TODO: Implement this class from scratch by leveraging grouping queries in the DB engine //TODO: Unit & Integration Test //https://stackoverflow.com/questions/50933429/how-to-view-apache-parquet-file-in-windows //https://github.com/elastacloud/parquet-dotnet public void WriteData(IEnumerable <PriceForecast> data, string basePath) { var forecastsByCountry = data.GroupBy(f => f.CountryCode); foreach (var countryGroup in forecastsByCountry) { var country = countryGroup.Key; var forecastsByCategory = countryGroup.ToList().GroupBy(f => f.Category); foreach (var categoryGroup in forecastsByCategory) { var category = categoryGroup.Key; var forecastsByYear = categoryGroup.ToList().GroupBy(f => f.ForecastedDate.Year); foreach (var yearGroup in forecastsByYear) { var year = yearGroup.Key; var forecastsByMonth = yearGroup.ToList().GroupBy(f => f.ForecastedDate.Month); foreach (var monthGroup in forecastsByMonth) { var month = monthGroup.Key; var forecasts = monthGroup.ToList(); var dirPath = $"{basePath}/Country={country}/Category={category}/Year={year}/Month={month}/"; var filePath = dirPath + "forecast.parquet"; //TODO: automating schema generation using reflection and attributes var columns = new DataColumn[] { new DataColumn(ParquetSchemaHelper.ForecastDateField, forecasts.Select(f => f.ForecastDateTime.ToString()).ToArray()), new DataColumn(ParquetSchemaHelper.ForecastModelField, forecasts.Select(f => f.ForecastModel).ToArray()), new DataColumn(ParquetSchemaHelper.MarketField, forecasts.Select(f => f.Market).ToArray()), new DataColumn(ParquetSchemaHelper.ProductField, forecasts.Select(f => f.Product).ToArray()), new DataColumn(ParquetSchemaHelper.CountryField, forecasts.Select(f => f.CountryCode).ToArray()), new DataColumn(ParquetSchemaHelper.ForecastedDateField, forecasts.Select(f => f.ForecastedDate.ToString()).ToArray()), new DataColumn(ParquetSchemaHelper.CategoryField, forecasts.Select(f => f.Category).ToArray()), new DataColumn(ParquetSchemaHelper.PriceField, forecasts.Select(f => f.Price).ToArray()) }; var schema = new Schema(columns.Select(c => c.Field).ToArray()); DirectoryInfo di = Directory.CreateDirectory(dirPath);//safe using Stream fileStream = System.IO.File.OpenWrite(filePath); using var parquetWriter = new ParquetWriter(schema, fileStream); using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup()) { foreach (var col in columns) { groupWriter.WriteColumn(col); } } } } } } return; }
private string CreateTestArchive(Guid sessionId) { var tmpName = Path.GetTempFileName(); var timeData = new[] { 1L, 2L, 3L }; var timeColumn = new DataColumn( new DataField <long>("time"), timeData); var numbersData = new[] { 42d, 1337d, 6.022e23 }; var numbersColumn = new DataColumn( new DataField <double>("cool_numbers"), numbersData); var schema = new Schema(timeColumn.Field, numbersColumn.Field); var json = new JObject { ["meta"] = new JObject(), ["user"] = new JObject() }; using (var ms = new MemoryStream()) { using (var parquetWriter = new ParquetWriter(schema, ms)) using (var groupWriter = parquetWriter.CreateRowGroup()) { groupWriter.WriteColumn(timeColumn); groupWriter.WriteColumn(numbersColumn); } ms.Position = 0; using (var parquetReader = new ParquetReader(ms)) { var tableInformation = new ArchiveTableInformation() { Columns = new List <DataField>(parquetReader.Schema.GetDataFields()), Time = timeColumn.Field }; var table = new ArchiveTable(json, parquetReader, tableInformation, "testData"); var archive = Archive.Create(tmpName); var session = ArchiveSession.Create(archive, "testName"); var folder = ArchiveFolder.Create(archive, sessionId, "testFolder"); folder.AddChild(table); session.AddChild(folder); archive.AddSession(session); archive.WriteFile().Wait(); archive.Close(); } } return(tmpName); }
/// <summary> /// Escribe un grupo de filas en el archivo /// </summary> private void FlushRowGroup(ParquetWriter writer, Table table) { if (table.Count > 0) { using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup()) { rowGroupWriter.Write(table); } } }
private static MemoryStream CreateStream(Schema schema, Dictionary <int, List <object> > data) { var memoryStream = new MemoryStream(); var fields = schema.GetDataFields(); using (var parquet = new ParquetWriter(schema, memoryStream) { CompressionMethod = CompressionMethod.Snappy }) using (var rgw = parquet.CreateRowGroup()) { for (var i = 0; i < fields.Length; i++) { var columData = data[i]; var dataField = fields[i]; switch (fields[i].DataType) { case DataType.Int32: rgw.WriteColumn(new DataColumn(dataField, columData.Cast <int?>().ToArray())); break; case DataType.Int64: rgw.WriteColumn(new DataColumn(dataField, columData.Cast <long?>().ToArray())); break; case DataType.Float: rgw.WriteColumn(new DataColumn(dataField, columData.Cast <float?>().ToArray())); break; case DataType.Decimal: rgw.WriteColumn(new DataColumn(dataField, columData.Cast <decimal?>().ToArray())); break; case DataType.Double: rgw.WriteColumn(new DataColumn(dataField, columData.Cast <double?>().ToArray())); break; case DataType.String: rgw.WriteColumn(new DataColumn(dataField, columData.Cast <string>().ToArray())); break; case DataType.DateTimeOffset: rgw.WriteColumn(new DataColumn(dataField, columData.Cast <DateTimeOffset?>().ToArray())); break; } } } return(memoryStream); }
public static void TestDecimalSeries([Values(0, 1)] int warmup) { var timer = Stopwatch.StartNew(); var rand = new Random(123); Console.WriteLine("Generating data..."); var values = Enumerable.Range(0, 10_000_000).Select(i => { var n = rand.Next(); var sign = rand.NextDouble() < 0.5 ? -1M : +1M; return(sign * ((decimal)n * n * n) / 1000M); }).ToArray(); Console.WriteLine("Generated {0:N0} rows in {1:N2} sec", values.Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet"); timer.Restart(); using (var fileWriter = new ParquetFileWriter("decimal_timeseries.parquet", new Column[] { new Column <decimal>("Value", LogicalType.Decimal(precision: 29, scale: 3)) })) { using (var rowGroupWriter = fileWriter.AppendRowGroup()) { using var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <decimal>(); valueWriter.WriteBatch(values); } fileWriter.Close(); } Console.WriteLine("Saved to Parquet ({0:N0} bytes) in {1:N2} sec", new FileInfo("decimal_timeseries.parquet").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet.NET"); timer.Restart(); { var valueField = new DecimalDataField("Value", precision: 29, scale: 3); var schema = new Parquet.Data.Schema(valueField); using var stream = File.Create("decimal_timeseries.parquet.net"); using var parquetWriter = new ParquetWriter(schema, stream); using var groupWriter = parquetWriter.CreateRowGroup(); groupWriter.WriteColumn(new DataColumn(valueField, values)); } Console.WriteLine("Saved to Parquet.NET ({0:N0} bytes) in {1:N2} sec", new FileInfo("decimal_timeseries.parquet.net").Length, timer.Elapsed.TotalSeconds); }
private void OnWrite <TModel>(MapperConfig <TModel> mapConfig, Stream fileStream, bool isAppend, params TModel[] models) { var dataColumns = mapConfig.ToDataColumns(models); var columnDataFields = dataColumns.Select(s => s.Field).ToArray(); var schema = new Schema(columnDataFields); using var parquetWriter = new ParquetWriter(schema, fileStream, append: isAppend); using var groupWriter = parquetWriter.CreateRowGroup(); for (int i = 0; i < dataColumns.Length; i++) { groupWriter.WriteColumn(dataColumns[i]); } }
public void Cannot_write_columns_in_wrong_order() { var schema = new Schema(new DataField <int>("id"), new DataField <int>("id2")); using (var writer = new ParquetWriter(schema, new MemoryStream())) { using (ParquetRowGroupWriter gw = writer.CreateRowGroup()) { Assert.Throws <ArgumentException>(() => { gw.WriteColumn(new DataColumn((DataField)schema[1], new int[] { 1 })); }); } } }
public async Task <bool> WriteData(JObject root, ISessionWriter writer) { var pathArr = Meta["attachments"].ToObject <string[]>() ?? throw new ArgumentException("Table is missing 'attachments'"); //TODO: Implement? if (false && IsSaved) { var stream = await _archive.OpenFile(_zipEntry); writer.StoreFileId(stream, pathArr[0]); } else { // This stream will be disposed by the sessionWriter var ms = new MemoryStream(); // Make a copy of the Remembering reader that later can be discarded // This to avoid to read in all tables in memory at the same time. var fullReader = new RememberingParquetReader(_reader); fullReader.LoadAll(); using (var tableWriter = new ParquetWriter(fullReader.Schema, ms)) { using (var rowGroup = tableWriter.CreateRowGroup()) // Using construction assure correct storage of final rowGroup details in parquet file { foreach (var field in fullReader.Schema.GetDataFields()) { var column = new DataColumn(field, fullReader.GetColumn(field)); rowGroup.WriteColumn(column); } } } ms.Position = 0; writer.StoreFileId(ms, pathArr[0]); } // TODO AUTOACTIVE-58 - Generalize copy of previous metadata for save // Copy previous root["meta"] = Meta; root["user"] = User; // Overwrite potentially changed // TODO root["meta"]["is_world_clock"] = ; // TODO root["meta"]["synced_to"] = ; return(true); }
public static void WriteParquetColumns(this Stream stream, List <DataColumn> columns) { Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray())); using (ParquetWriter writer = new ParquetWriter(schema, stream)) { writer.CompressionMethod = CompressionMethod.Snappy; using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup()) // items.Count() { foreach (var column in columns) { rowGroupWriter.WriteColumn(column); } } } }
public static void WriteParquet <T>(this Stream stream, IEnumerable <T> items) where T : class { Type classType = typeof(T); var properties = classType.GetProperties(); List <DataColumn> columns = new List <DataColumn>(); foreach (var prop in properties) { if (prop.PropertyType == DateTimeType) { columns.Add(new DataColumn( new DateTimeDataField(prop.Name, DateTimeFormat.DateAndTime), items.Select(item => new DateTimeOffset(((DateTime)prop.GetValue(item)))).ToArray() )); } else { var genericArguments = new Type[] { prop.PropertyType }; var genericType = DataFieldGenericType.MakeGenericType(genericArguments); var genericConstructor = genericType.GetConstructor(DataFieldConstructorGenericArguments); DataField field = genericConstructor.Invoke(new object[] { prop.Name }) as DataField; var dataSource = items.Select(item => prop.GetValue(item)); var castMethod = CastMethodGeneric.MakeGenericMethod(genericArguments); var toArrayMethod = ToArrayMethodGeneric.MakeGenericMethod(genericArguments); var data = toArrayMethod.Invoke(null, new object[] { castMethod.Invoke(null, new object[] { dataSource }) }) as Array; var column = new DataColumn(field, data); columns.Add(column); } } Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray())); using (ParquetWriter writer = new ParquetWriter(schema, stream)) { writer.CompressionMethod = CompressionMethod.Snappy; using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup()) // items.Count() { foreach (var column in columns) { rowGroupWriter.WriteColumn(column); } } } }
public static void BuildParquetFile(DataColumn license, DataColumn sensor, DataColumn time, string outPath) { var schema = new Schema(license.Field, sensor.Field, time.Field); using (Stream fileStream = File.Create(outPath)) { using (var parquetWriter = new ParquetWriter(schema, fileStream)) { parquetWriter.CompressionMethod = CompressionMethod.Gzip; using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup()) { groupWriter.WriteColumn(license); groupWriter.WriteColumn(sensor); groupWriter.WriteColumn(time); } } } }
private static void WriteDataInFile(string path, List <DataColumn> schemaColumns) { var schema = new Schema(schemaColumns.ConvertAll(col => col.Field)); using (Stream fileStream = File.Create(path)) { using (var parquetWriter = new ParquetWriter(schema, fileStream)) { using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup()) { foreach (var column in schemaColumns) { groupWriter.WriteColumn(column); } } } } }
public void Append_to_file_reads_all_data() { //write a file with a single row group var id = new DataField <int>("id"); var ms = new MemoryStream(); using (var writer = new ParquetWriter(new Schema(id), ms)) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new int[] { 1, 2 })); } } //append to this file. Note that you cannot append to existing row group, therefore create a new one ms.Position = 0; using (var writer = new ParquetWriter(new Schema(id), ms, append: true)) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new int[] { 3, 4 })); } } //check that this file now contains two row groups and all the data is valid ms.Position = 0; using (var reader = new ParquetReader(ms)) { Assert.Equal(2, reader.RowGroupCount); using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0)) { Assert.Equal(2, rg.RowCount); Assert.Equal(new int[] { 1, 2 }, rg.ReadColumn(id).Data); } using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(1)) { Assert.Equal(2, rg.RowCount); Assert.Equal(new int[] { 3, 4 }, rg.ReadColumn(id).Data); } } }
protected object WriteReadSingle(DataField field, object value, CompressionMethod compressionMethod = CompressionMethod.None, int compressionLevel = -1) { //for sanity, use disconnected streams byte[] data; using (var ms = new MemoryStream()) { // write single value using (var writer = new ParquetWriter(new Schema(field), ms)) { writer.CompressionMethod = compressionMethod; writer.CompressionLevel = compressionLevel; using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { Array dataArray = Array.CreateInstance(field.ClrNullableIfHasNullsType, 1); dataArray.SetValue(value, 0); var column = new DataColumn(field, dataArray); rg.WriteColumn(column); } } data = ms.ToArray(); } using (var ms = new MemoryStream(data)) { // read back single value ms.Position = 0; using (var reader = new ParquetReader(ms)) { using (ParquetRowGroupReader rowGroupReader = reader.OpenRowGroupReader(0)) { DataColumn column = rowGroupReader.ReadColumn(field); return(column.Data.GetValue(0)); } } } }
public static void Main(string[] args) { var dt = GenerateTestData(); var fields = GenerateSchema(dt); // Open the output file for writing using (var stream = File.Open(OutputFilePath, FileMode.Create, FileAccess.Write)) { using (var writer = new ParquetWriter(new Schema(fields), stream)) { var startRow = 0; // Keep on creating row groups until we run out of data while (startRow < dt.Rows.Count) { using (var rgw = writer.CreateRowGroup(RowGroupSize)) { // Data is written to the row group column by column for (var i = 0; i < dt.Columns.Count; i++) { var columnIndex = i; // Determine the target data type for the column var targetType = dt.Columns[columnIndex].DataType; if (targetType == typeof(DateTime)) { targetType = typeof(DateTimeOffset); } // Generate the value type, this is to ensure it can handle null values var valueType = targetType.IsClass ? targetType : typeof(Nullable <>).MakeGenericType(targetType); // Create a list to hold values of the required type for the column var list = (IList)typeof(List <>) .MakeGenericType(valueType) .GetConstructor(Type.EmptyTypes) .Invoke(null); // Get the data to be written to the parquet stream foreach (var row in dt.AsEnumerable().Skip(startRow).Take(RowGroupSize)) { // Check if value is null, if so then add a null value if (row[columnIndex] == null || row[columnIndex] == DBNull.Value) { list.Add(null); } else { // Add the value to the list, but if it's a DateTime then create it as a DateTimeOffset first list.Add(dt.Columns[columnIndex].DataType == typeof(DateTime) ? new DateTimeOffset((DateTime)row[columnIndex]) : row[columnIndex]); } } // Copy the list values to an array of the same type as the WriteColumn method expects // and Array var valuesArray = Array.CreateInstance(valueType, list.Count); list.CopyTo(valuesArray, 0); // Write the column rgw.WriteColumn(new Parquet.Data.DataColumn(fields[i], valuesArray)); } } startRow += RowGroupSize; } } } }
public void SaveSingleDataArchive() { Guid sessionId = Guid.NewGuid(); // Todo fake sessionId var tmpName = Path.GetTempFileName(); var timeData = new[] { 1L, 2L, 3L }; var timeColumn = new DataColumn( new DataField <long>("time"), timeData); var numbersData = new[] { 42d, 1337d, 6.022e23 }; var numbersColumn = new DataColumn( new DataField <double>("cool_numbers"), numbersData); var schema = new Schema(timeColumn.Field, numbersColumn.Field); var json = new JObject { ["meta"] = new JObject(), ["user"] = new JObject() }; using (var ms = new MemoryStream()) { using (var parquetWriter = new ParquetWriter(schema, ms)) using (var groupWriter = parquetWriter.CreateRowGroup()) { groupWriter.WriteColumn(timeColumn); groupWriter.WriteColumn(numbersColumn); } ms.Position = 0; using (var parquetReader = new ParquetReader(ms)) { var tableInformation = new ArchiveTableInformation() { Columns = new List <DataField>(parquetReader.Schema.GetDataFields()), Time = timeColumn.Field }; var table = new ArchiveTable(json, parquetReader, tableInformation, "testData"); var archive = Archive.Create(tmpName); var session = ArchiveSession.Create(archive, "testName"); var folder = ArchiveFolder.Create(archive, sessionId, "testFolder"); folder.AddChild(table); session.AddChild(folder); archive.AddSession(session); try { archive.WriteFile().Wait(); } catch (Exception) { Assert.True(false); } archive.Close(); using (var fr = new FileReader(tmpName)) { var openTask = Archive.Open(fr); openTask.Wait(); var newArchive = openTask.Result; AssertArchivesEqual(archive, newArchive); Assert.Equal("testName", session.Name); Assert.Single(newArchive.Sessions.First().Children); var readFolder = newArchive.Sessions.First().Children.First(); Assert.Equal("testFolder", readFolder.Name); Assert.Single(readFolder.Children); var child = readFolder.Children.First(); Assert.Single(child.DataPoints); Assert.IsAssignableFrom <ArchiveTable>(child); var tableChild = (ArchiveTable)child; var dataPoint = tableChild.DataPoints.First(); var context = new TimeSynchronizedContext(); context.AvailableTimeRangeChanged += (sender, from, to) => context.SetSelectedTimeRange(from, to); var viewer = context.GetDataViewerFor(dataPoint); viewer.Wait(); var dataViewer = viewer.Result; Assert.IsAssignableFrom <ITimeSeriesViewer>(dataViewer); var timeViewer = (ITimeSeriesViewer)dataViewer; var data = timeViewer.GetCurrentData <double>(); Assert.Equal("cool_numbers", dataViewer.DataPoint.Name); Assert.Equal(timeData, data.X.ToArray()); Assert.Equal(numbersData, data.Y.ToArray()); newArchive.Close(); } } } File.Delete(tmpName); }
public static void TestWriteFloatTimeSeries([Values(0, 1)] int warmup) { var timer = Stopwatch.StartNew(); Console.WriteLine("Generating data..."); var(dates, objectIds, values, numRows) = CreateFloatDataFrame(); Console.WriteLine("Generated {0:N0} rows in {1:N2} sec", numRows, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to CSV"); timer.Restart(); using (var csv = new StreamWriter("float_timeseries.csv")) { for (int i = 0; i != dates.Length; ++i) { for (int j = 0; j != objectIds.Length; ++j) { csv.WriteLine("{0:yyyy-MM-dd HH:mm:ss},{1},{2}", dates[i], objectIds[j], values[i][j]); } } } Console.WriteLine("Saved to CSV ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.csv").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to CSV.GZ"); timer.Restart(); using (var stream = new FileStream("float_timeseries.csv.gz", FileMode.Create)) { using var zip = new GZipStream(stream, CompressionLevel.Optimal); using var csv = new StreamWriter(zip); for (int i = 0; i != dates.Length; ++i) { for (int j = 0; j != objectIds.Length; ++j) { csv.WriteLine("{0:yyyy-MM-dd HH:mm:ss},{1},{2}", dates[i], objectIds[j], values[i][j]); } } } Console.WriteLine("Saved to CSV ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.csv.gz").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet"); timer.Restart(); using (var fileWriter = new ParquetFileWriter("float_timeseries.parquet", CreateFloatColumns())) { using var rowGroupWriter = fileWriter.AppendRowGroup(); using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>()) { for (int i = 0; i != dates.Length; ++i) { dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray()); } } using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>()) { for (int i = 0; i != dates.Length; ++i) { objectIdWriter.WriteBatch(objectIds); } } using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>()) { for (int i = 0; i != dates.Length; ++i) { valueWriter.WriteBatch(values[i]); } } fileWriter.Close(); } Console.WriteLine("Saved to Parquet ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet.Chunked (by date)"); timer.Restart(); using (var fileWriter = new ParquetFileWriter("float_timeseries.parquet.chunked", CreateFloatColumns())) { for (int i = 0; i != dates.Length; ++i) { using var rowGroupWriter = fileWriter.AppendRowGroup(); using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>()) { dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray()); } using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>()) { objectIdWriter.WriteBatch(objectIds); } using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>()) { valueWriter.WriteBatch(values[i]); } } fileWriter.Close(); } Console.WriteLine("Saved to Parquet.Chunked ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.chunked").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet.RowOriented"); timer.Restart(); using (var rowWriter = ParquetFile.CreateRowWriter <(DateTime, int, float)>("float_timeseries.parquet.roworiented", new[] { "DateTime", "ObjectId", "Value" })) { for (int i = 0; i != dates.Length; ++i) { for (int j = 0; j != objectIds.Length; ++j) { rowWriter.WriteRow((dates[i], objectIds[j], values[i][j])); } } } Console.WriteLine("Saved to Parquet.RowOriented ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.roworiented").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet.Stream"); timer.Restart(); using (var stream = new FileStream("float_timeseries.parquet.stream", FileMode.Create)) { using var writer = new IO.ManagedOutputStream(stream); using var fileWriter = new ParquetFileWriter(writer, CreateFloatColumns()); using var rowGroupWriter = fileWriter.AppendRowGroup(); using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>()) { for (int i = 0; i != dates.Length; ++i) { dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray()); } } using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>()) { for (int i = 0; i != dates.Length; ++i) { objectIdWriter.WriteBatch(objectIds); } } using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>()) { for (int i = 0; i != dates.Length; ++i) { valueWriter.WriteBatch(values[i]); } } fileWriter.Close(); } Console.WriteLine("Saved to Parquet.Stream ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.stream").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet.Chunked.Stream (by date)"); timer.Restart(); using (var stream = new FileStream("float_timeseries.parquet.chunked.stream", FileMode.Create)) { using var writer = new IO.ManagedOutputStream(stream); using var fileWriter = new ParquetFileWriter(writer, CreateFloatColumns()); for (int i = 0; i != dates.Length; ++i) { using var rowGroupWriter = fileWriter.AppendRowGroup(); using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>()) { dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray()); } using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>()) { objectIdWriter.WriteBatch(objectIds); } using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>()) { valueWriter.WriteBatch(values[i]); } } fileWriter.Close(); } Console.WriteLine("Saved to Parquet.Chunked.Stream ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.chunked.stream").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet.RowOriented.Stream"); timer.Restart(); using (var stream = new FileStream("float_timeseries.parquet.roworiented.stream", FileMode.Create)) { using var writer = new IO.ManagedOutputStream(stream); using var rowWriter = ParquetFile.CreateRowWriter <(DateTime, int, float)>(writer, new[] { "DateTime", "ObjectId", "Value" }); for (int i = 0; i != dates.Length; ++i) { for (int j = 0; j != objectIds.Length; ++j) { rowWriter.WriteRow((dates[i], objectIds[j], values[i][j])); } } rowWriter.Close(); } Console.WriteLine("Saved to Parquet.RowOriented.Stream ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.roworiented.stream").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet.NET"); timer.Restart(); { var dateTimeField = new DateTimeDataField("DateTime", DateTimeFormat.DateAndTime); var objectIdField = new DataField <int>("ObjectId"); var valueField = new DataField <float>("Value"); var schema = new Parquet.Data.Schema(dateTimeField, objectIdField, valueField); using (var stream = File.Create("float_timeseries.parquet.net")) using (var parquetWriter = new ParquetWriter(schema, stream)) using (var groupWriter = parquetWriter.CreateRowGroup()) { var dateTimeColumn = new DataColumn(dateTimeField, dates.SelectMany(d => Enumerable.Repeat(new DateTimeOffset(d), objectIds.Length)).ToArray()); var objectIdColumn = new DataColumn(objectIdField, dates.SelectMany(d => objectIds).ToArray()); var valueColumn = new DataColumn(valueField, dates.SelectMany((d, i) => values[i]).ToArray()); groupWriter.WriteColumn(dateTimeColumn); groupWriter.WriteColumn(objectIdColumn); groupWriter.WriteColumn(valueColumn); } } Console.WriteLine("Saved to Parquet.NET ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.net").Length, timer.Elapsed.TotalSeconds); }
/// <summary> /// Writes dataLen rows and typed columns to the file. /// </summary> /// <param name="csvColumns">Processed CSV data</param> /// <param name="dataLen">Row count</param> /// <param name="writer">ParquetWriter</param> /// <param name="fields">Field structure</param> /// <param name="config">Config structure</param> public static void WriteGroup(List <Object> csvColumns, long dataLen, ParquetWriter writer, List <DataField> fields, Config config) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { for (int i = 0; i < fields.Count; i++) { if (fields[i].HasNulls) { switch (fields[i].DataType) { case DataType.Boolean: rg.WriteColumn(new DataColumn(fields[i], ((bool?[])csvColumns[i]))); break; case DataType.DateTimeOffset: rg.WriteColumn(new DataColumn(fields[i], ((DateTimeOffset?[])csvColumns[i]))); break; case DataType.Decimal: rg.WriteColumn(new DataColumn(fields[i], ((decimal?[])csvColumns[i]))); break; case DataType.Double: rg.WriteColumn(new DataColumn(fields[i], ((double?[])csvColumns[i]))); break; case DataType.Float: rg.WriteColumn(new DataColumn(fields[i], ((float?[])csvColumns[i]))); break; case DataType.Int16: rg.WriteColumn(new DataColumn(fields[i], ((Int16?[])csvColumns[i]))); break; case DataType.Int32: rg.WriteColumn(new DataColumn(fields[i], ((Int32?[])csvColumns[i]))); break; case DataType.Int64: rg.WriteColumn(new DataColumn(fields[i], ((Int64?[])csvColumns[i]))); break; case DataType.String: rg.WriteColumn(new DataColumn(fields[i], ((string[])csvColumns[i]))); break; default: throw new ArgumentOutOfRangeException(fields[i].DataType.ToString()); } } else { switch (fields[i].DataType) { case DataType.Boolean: rg.WriteColumn(new DataColumn(fields[i], ((bool[])csvColumns[i]))); break; case DataType.DateTimeOffset: rg.WriteColumn(new DataColumn(fields[i], ((DateTimeOffset[])csvColumns[i]))); break; case DataType.Decimal: rg.WriteColumn(new DataColumn(fields[i], ((decimal[])csvColumns[i]))); break; case DataType.Double: rg.WriteColumn(new DataColumn(fields[i], ((double[])csvColumns[i]))); break; case DataType.Float: rg.WriteColumn(new DataColumn(fields[i], ((float[])csvColumns[i]))); break; case DataType.Int16: rg.WriteColumn(new DataColumn(fields[i], ((Int16[])csvColumns[i]))); break; case DataType.Int32: rg.WriteColumn(new DataColumn(fields[i], ((Int32[])csvColumns[i]))); break; case DataType.Int64: rg.WriteColumn(new DataColumn(fields[i], ((Int64[])csvColumns[i]))); break; case DataType.String: rg.WriteColumn(new DataColumn(fields[i], ((string[])csvColumns[i]))); break; default: throw new ArgumentOutOfRangeException(fields[i].DataType.ToString()); } } } } }
public static void TestFloatTimeSeries() { var timer = Stopwatch.StartNew(); var rand = new Random(123); Console.WriteLine("Generating data..."); var dates = Enumerable.Range(0, 360)//*24*12) .Select(i => new DateTime(2001, 01, 01) + TimeSpan.FromHours(i)) .Where(d => d.DayOfWeek != DayOfWeek.Saturday && d.DayOfWeek != DayOfWeek.Sunday) .ToArray(); var objectIds = Enumerable.Range(0, 10000) .Select(i => rand.Next()) .Distinct() .OrderBy(i => i) .ToArray(); var values = dates.Select(d => objectIds.Select(o => (float)rand.NextDouble()).ToArray()).ToArray(); Console.WriteLine("Generated {0:N0} rows in {1:N2} sec", values.Select(v => v.Length).Aggregate(0, (sum, l) => sum + l), timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to CSV"); timer.Restart(); using (var csv = new StreamWriter("float_timeseries.csv")) { for (int i = 0; i != dates.Length; ++i) { for (int j = 0; j != objectIds.Length; ++j) { csv.WriteLine("{0:yyyy-MM-dd HH:mm:ss},{1},{2}", dates[i], objectIds[j], values[i][j]); } } } Console.WriteLine("Saved to CSV ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.csv").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to CSV.GZ"); timer.Restart(); using (var stream = new FileStream("float_timeseries.csv.gz", FileMode.Create)) using (var zip = new GZipStream(stream, CompressionLevel.Optimal)) using (var csv = new StreamWriter(zip)) { for (int i = 0; i != dates.Length; ++i) { for (int j = 0; j != objectIds.Length; ++j) { csv.WriteLine("{0:yyyy-MM-dd HH:mm:ss},{1},{2}", dates[i], objectIds[j], values[i][j]); } } } Console.WriteLine("Saved to CSV ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.csv.gz").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet"); timer.Restart(); using (var fileWriter = new ParquetFileWriter("float_timeseries.parquet", CreateFloatColumns())) using (var rowGroupWriter = fileWriter.AppendRowGroup()) { using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>()) { for (int i = 0; i != dates.Length; ++i) { dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray()); } } using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>()) { for (int i = 0; i != dates.Length; ++i) { objectIdWriter.WriteBatch(objectIds); } } using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>()) { for (int i = 0; i != dates.Length; ++i) { valueWriter.WriteBatch(values[i]); } } } Console.WriteLine("Saved to Parquet ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet.Chunked (by date)"); timer.Restart(); using (var fileWriter = new ParquetFileWriter("float_timeseries.parquet.chunked", CreateFloatColumns())) { for (int i = 0; i != dates.Length; ++i) { using (var rowGroupWriter = fileWriter.AppendRowGroup()) { using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>()) { dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray()); } using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>()) { objectIdWriter.WriteBatch(objectIds); } using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>()) { valueWriter.WriteBatch(values[i]); } } } } Console.WriteLine("Saved to Parquet.Chunked ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.chunked").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet.RowOriented"); timer.Restart(); using (var rowWriter = ParquetFile.CreateRowWriter <(DateTime, int, float)>("float_timeseries.parquet.roworiented", new[] { "DateTime", "ObjectId", "Value" })) { for (int i = 0; i != dates.Length; ++i) { for (int j = 0; j != objectIds.Length; ++j) { rowWriter.WriteRow((dates[i], objectIds[j], values[i][j])); } } } Console.WriteLine("Saved to Parquet.RowOriented ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.roworiented").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet.NET"); timer.Restart(); { var dateTimeField = new DateTimeDataField("DateTime", DateTimeFormat.DateAndTime); var objectIdField = new DataField <int>("ObjectId"); var valueField = new DataField <float>("Value"); var schema = new Parquet.Data.Schema(dateTimeField, objectIdField, valueField); using (var stream = File.Create("float_timeseries.parquet.net")) using (var parquetWriter = new ParquetWriter(schema, stream)) using (var groupWriter = parquetWriter.CreateRowGroup()) { var dateTimeColumn = new DataColumn(dateTimeField, dates.SelectMany(d => Enumerable.Repeat(new DateTimeOffset(d), objectIds.Length)).ToArray()); var objectIdColumn = new DataColumn(objectIdField, dates.SelectMany(d => objectIds).ToArray()); var valueColumn = new DataColumn(valueField, dates.SelectMany((d, i) => values[i]).ToArray()); groupWriter.WriteColumn(dateTimeColumn); groupWriter.WriteColumn(objectIdColumn); groupWriter.WriteColumn(valueColumn); } } Console.WriteLine("Saved to Parquet.NET ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.net").Length, timer.Elapsed.TotalSeconds); }