public long ParquetDotNet() { { var valueField = new DecimalDataField("Value", precision: 29, scale: 3, hasNulls: false); var schema = new Parquet.Data.Schema(valueField); using var stream = File.Create("decimal_timeseries.parquet.net"); using var parquetWriter = new ParquetWriter(schema, stream); using var groupWriter = parquetWriter.CreateRowGroup(); groupWriter.WriteColumn(new DataColumn(valueField, _values)); } if (Check.Enabled) { // Read content from ParquetSharp and Parquet.NET var baseline = ReadFile("decimal_timeseries.parquet"); var results = ReadFile("decimal_timeseries.parquet.net"); // Prove that the content is the same Check.ArraysAreEqual(_values, baseline); Check.ArraysAreEqual(baseline, results); } return(new FileInfo("decimal_timeseries.parquet.net").Length); }
public static void TestDecimalSeries([Values(0, 1)] int warmup) { var timer = Stopwatch.StartNew(); var rand = new Random(123); Console.WriteLine("Generating data..."); var values = Enumerable.Range(0, 10_000_000).Select(i => { var n = rand.Next(); var sign = rand.NextDouble() < 0.5 ? -1M : +1M; return(sign * ((decimal)n * n * n) / 1000M); }).ToArray(); Console.WriteLine("Generated {0:N0} rows in {1:N2} sec", values.Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet"); timer.Restart(); using (var fileWriter = new ParquetFileWriter("decimal_timeseries.parquet", new Column[] { new Column <decimal>("Value", LogicalType.Decimal(precision: 29, scale: 3)) })) { using (var rowGroupWriter = fileWriter.AppendRowGroup()) { using var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <decimal>(); valueWriter.WriteBatch(values); } fileWriter.Close(); } Console.WriteLine("Saved to Parquet ({0:N0} bytes) in {1:N2} sec", new FileInfo("decimal_timeseries.parquet").Length, timer.Elapsed.TotalSeconds); Console.WriteLine(); Console.WriteLine("Saving to Parquet.NET"); timer.Restart(); { var valueField = new DecimalDataField("Value", precision: 29, scale: 3); var schema = new Parquet.Data.Schema(valueField); using var stream = File.Create("decimal_timeseries.parquet.net"); using var parquetWriter = new ParquetWriter(schema, stream); using var groupWriter = parquetWriter.CreateRowGroup(); groupWriter.WriteColumn(new DataColumn(valueField, values)); } Console.WriteLine("Saved to Parquet.NET ({0:N0} bytes) in {1:N2} sec", new FileInfo("decimal_timeseries.parquet.net").Length, timer.Elapsed.TotalSeconds); }
private Field GetField(PropertyInfo property) { Type pt = property.PropertyType; if (pt.IsNullable()) { pt = pt.GetNonNullable(); } if (pt.IsArray) { pt = pt.GetElementType(); } IDataTypeHandler handler = DataTypeFactory.Match(pt); if (handler == null) { return(null); } ParquetColumnAttribute columnAttr = property.GetCustomAttribute <ParquetColumnAttribute>(); string name = columnAttr?.Name ?? property.Name; DataType type = handler.DataType; var r = new DataField(name, property.PropertyType //use CLR type here as DF constructor will figure out nullability and other parameters ); if (columnAttr != null) { if (handler.ClrType == typeof(TimeSpan)) { r = new TimeSpanDataField(r.Name, columnAttr.TimeSpanFormat, r.HasNulls, r.IsArray); } if (handler.ClrType == typeof(DateTime) || handler.ClrType == typeof(DateTimeOffset)) { r = new DateTimeDataField(r.Name, columnAttr.DateTimeFormat, r.HasNulls, r.IsArray); } if (handler.ClrType == typeof(decimal)) { r = new DecimalDataField(r.Name, columnAttr.DecimalPrecision, columnAttr.DecimalScale, columnAttr.DecimalForceByteArrayEncoding, r.HasNulls, r.IsArray); } } r.ClrPropName = property.Name; return(r); }