Exemple #1
0
        public long ParquetDotNet()
        {
            {
                var valueField = new DecimalDataField("Value", precision: 29, scale: 3, hasNulls: false);
                var schema     = new Parquet.Data.Schema(valueField);

                using var stream        = File.Create("decimal_timeseries.parquet.net");
                using var parquetWriter = new ParquetWriter(schema, stream);
                using var groupWriter   = parquetWriter.CreateRowGroup();

                groupWriter.WriteColumn(new DataColumn(valueField, _values));
            }

            if (Check.Enabled)
            {
                // Read content from ParquetSharp and Parquet.NET
                var baseline = ReadFile("decimal_timeseries.parquet");
                var results  = ReadFile("decimal_timeseries.parquet.net");

                // Prove that the content is the same
                Check.ArraysAreEqual(_values, baseline);
                Check.ArraysAreEqual(baseline, results);
            }

            return(new FileInfo("decimal_timeseries.parquet.net").Length);
        }
        public static void TestDecimalSeries([Values(0, 1)] int warmup)
        {
            var timer = Stopwatch.StartNew();
            var rand  = new Random(123);

            Console.WriteLine("Generating data...");

            var values = Enumerable.Range(0, 10_000_000).Select(i =>
            {
                var n    = rand.Next();
                var sign = rand.NextDouble() < 0.5 ? -1M : +1M;
                return(sign * ((decimal)n * n * n) / 1000M);
            }).ToArray();

            Console.WriteLine("Generated {0:N0} rows in {1:N2} sec", values.Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet");

            timer.Restart();

            using (var fileWriter = new ParquetFileWriter("decimal_timeseries.parquet", new Column[] { new Column <decimal>("Value", LogicalType.Decimal(precision: 29, scale: 3)) }))
            {
                using (var rowGroupWriter = fileWriter.AppendRowGroup())
                {
                    using var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <decimal>();
                    valueWriter.WriteBatch(values);
                }

                fileWriter.Close();
            }

            Console.WriteLine("Saved to Parquet ({0:N0} bytes) in {1:N2} sec", new FileInfo("decimal_timeseries.parquet").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet.NET");

            timer.Restart();

            {
                var valueField = new DecimalDataField("Value", precision: 29, scale: 3);
                var schema     = new Parquet.Data.Schema(valueField);

                using var stream        = File.Create("decimal_timeseries.parquet.net");
                using var parquetWriter = new ParquetWriter(schema, stream);
                using var groupWriter   = parquetWriter.CreateRowGroup();

                groupWriter.WriteColumn(new DataColumn(valueField, values));
            }

            Console.WriteLine("Saved to Parquet.NET ({0:N0} bytes) in {1:N2} sec", new FileInfo("decimal_timeseries.parquet.net").Length, timer.Elapsed.TotalSeconds);
        }
Exemple #3
0
        private Field GetField(PropertyInfo property)
        {
            Type pt = property.PropertyType;

            if (pt.IsNullable())
            {
                pt = pt.GetNonNullable();
            }
            if (pt.IsArray)
            {
                pt = pt.GetElementType();
            }

            IDataTypeHandler handler = DataTypeFactory.Match(pt);

            if (handler == null)
            {
                return(null);
            }

            ParquetColumnAttribute columnAttr = property.GetCustomAttribute <ParquetColumnAttribute>();

            string   name = columnAttr?.Name ?? property.Name;
            DataType type = handler.DataType;

            var r = new DataField(name,
                                  property.PropertyType //use CLR type here as DF constructor will figure out nullability and other parameters
                                  );

            if (columnAttr != null)
            {
                if (handler.ClrType == typeof(TimeSpan))
                {
                    r = new TimeSpanDataField(r.Name, columnAttr.TimeSpanFormat, r.HasNulls, r.IsArray);
                }
                if (handler.ClrType == typeof(DateTime) || handler.ClrType == typeof(DateTimeOffset))
                {
                    r = new DateTimeDataField(r.Name, columnAttr.DateTimeFormat, r.HasNulls, r.IsArray);
                }
                if (handler.ClrType == typeof(decimal))
                {
                    r = new DecimalDataField(r.Name, columnAttr.DecimalPrecision, columnAttr.DecimalScale, columnAttr.DecimalForceByteArrayEncoding, r.HasNulls, r.IsArray);
                }
            }

            r.ClrPropName = property.Name;

            return(r);
        }