public void WriteIntro() { //create data columns with schema metadata and the data you need var idColumn = new DataColumn( new DataField <int>("id"), new int[] { 1, 2 }); var cityColumn = new DataColumn( new DataField <string>("city"), new string[] { "London", "Derby" }); // create file schema var schema = new Schema(idColumn.Field, cityColumn.Field); using (Stream fileStream = System.IO.File.OpenWrite("c:\\test.parquet")) { using (var parquetWriter = new ParquetWriter(schema, fileStream)) { // create a new row group in the file using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup(2)) { groupWriter.WriteColumn(idColumn); groupWriter.WriteColumn(cityColumn); } } } }
public void Write_read_nullable_column(Array input) { var id = new DataField <int?>("id"); var ms = new MemoryStream(); using (var writer = new ParquetWriter(new Schema(id), ms)) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, input)); } } ms.Position = 0; using (var reader = new ParquetReader(ms)) { Assert.Equal(1, reader.RowGroupCount); using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0)) { Assert.Equal(input.Length, rg.RowCount); Assert.Equal(input, rg.ReadColumn(id).Data); } } }
public void FileMetadata_sets_num_rows_on_file_and_row_group_multiple_row_groups() { var ms = new MemoryStream(); var id = new DataField <int>("id"); //write using (var writer = new ParquetWriter(new Schema(id), ms)) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new[] { 1, 2, 3, 4 })); } using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new[] { 5, 6 })); } } //read back using (var reader = new ParquetReader(ms)) { Assert.Equal(6, reader.ThriftMetadata.Num_rows); using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0)) { Assert.Equal(4, rg.RowCount); } using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(1)) { Assert.Equal(2, rg.RowCount); } } }
public void CustomMetadata_can_write_and_read() { var ms = new MemoryStream(); var id = new DataField <int>("id"); //write using (var writer = new ParquetWriter(new Schema(id), ms)) { writer.CustomMetadata = new Dictionary <string, string> { ["key1"] = "value1", ["key2"] = "value2" }; using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new[] { 1, 2, 3, 4 })); } } //read back using (var reader = new ParquetReader(ms)) { Assert.Equal("value1", reader.CustomMetadata["key1"]); Assert.Equal("value2", reader.CustomMetadata["key2"]); } }
public static void WriteAthenaRowsAsParquet(this Stream stream, ResultSetMetadata tableSchema, List <FieldMapping> mappings, IEnumerable <Row> rows) { List <DataColumn> columns = new List <DataColumn>(); int index = 0; foreach (var column in tableSchema.ColumnInfo) { columns.Add(column.ToParquetColumn(mappings, index, rows)); index++; } Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray())); using (ParquetWriter writer = new ParquetWriter(schema, stream)) { writer.CompressionMethod = CompressionMethod.Snappy; using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup()) { foreach (var column in columns) { rowGroupWriter.WriteColumn(column); } } } }
static void ConvertCsvToParquet(string inputFile, string outputFile) { var data = new Dictionary <string, ArrayList>(); using (var reader = new StreamReader(inputFile, true)) { var header = reader.ReadLine(); var columns = header.Split(","); for (int i = 0; i < columns.Length; i++) { columns[i] = columns[i].Trim(); } while (!reader.EndOfStream) { var line = reader.ReadLine(); if (String.IsNullOrEmpty(line)) { continue; } var parts = line.Split(","); for (int i = 0; i < parts.Length && i < columns.Length; i++) { var column = columns[i]; if (parquet_types.ContainsKey(column)) { if (!data.ContainsKey(column)) { data.Add(column, new ArrayList()); } data[column].Add(ParseValue(parquet_types[column], parts[i])); } } } } var datacolumns = parquet_types.Select( x => new DataColumn(CreateParquetField(x.Key, x.Value), data[x.Key].ToArray(ConvertParquetType(x.Value))) ).ToArray(); var schema = new Schema(datacolumns.Select(x => (Field)x.Field).ToArray()); using (Stream fileStream = System.IO.File.OpenWrite(outputFile)) { using (var parquetWriter = new ParquetWriter(schema, fileStream)) { // create a new row group in the file using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup()) { foreach (var column in datacolumns) { groupWriter.WriteColumn(column); } } } } }
public void Write_multiple_row_groups_to_forward_only_stream() { var ms = new MemoryStream(); var forwardOnly = new WriteableNonSeekableStream(ms); var schema = new Schema( new DataField <int>("id"), new DataField <string>("nonsense")); using (var writer = new ParquetWriter(schema, forwardOnly)) { using (ParquetRowGroupWriter rgw = writer.CreateRowGroup(1)) { rgw.WriteColumn(new DataColumn((DataField)schema[0], new[] { 1 })); rgw.WriteColumn(new DataColumn((DataField)schema[1], new[] { "1" })); } using (ParquetRowGroupWriter rgw = writer.CreateRowGroup(1)) { rgw.WriteColumn(new DataColumn((DataField)schema[0], new[] { 2 })); rgw.WriteColumn(new DataColumn((DataField)schema[1], new[] { "2" })); } } ms.Position = 0; using (var reader = new ParquetReader(ms)) { Assert.Equal(2, reader.RowGroupCount); using (ParquetRowGroupReader rgr = reader.OpenRowGroupReader(0)) { Assert.Equal(1, rgr.RowCount); DataColumn column = rgr.ReadColumn((DataField)schema[0]); Assert.Equal(1, column.Data.GetValue(0)); } using (ParquetRowGroupReader rgr = reader.OpenRowGroupReader(1)) { Assert.Equal(1, rgr.RowCount); DataColumn column = rgr.ReadColumn((DataField)schema[0]); Assert.Equal(2, column.Data.GetValue(0)); } } }
public static void BuildParquetFile(DataColumn license, DataColumn sensor, DataColumn time, string outPath) { var schema = new Schema(license.Field, sensor.Field, time.Field); using (Stream fileStream = File.Create(outPath)) { using (var parquetWriter = new ParquetWriter(schema, fileStream)) { parquetWriter.CompressionMethod = CompressionMethod.Gzip; using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup()) { groupWriter.WriteColumn(license); groupWriter.WriteColumn(sensor); groupWriter.WriteColumn(time); } } } }
//TODO: Implement this class from scratch by leveraging grouping queries in the DB engine //TODO: Unit & Integration Test //https://stackoverflow.com/questions/50933429/how-to-view-apache-parquet-file-in-windows //https://github.com/elastacloud/parquet-dotnet public void WriteData(IEnumerable <PriceForecast> data, string basePath) { var forecastsByCountry = data.GroupBy(f => f.CountryCode); foreach (var countryGroup in forecastsByCountry) { var country = countryGroup.Key; var forecastsByCategory = countryGroup.ToList().GroupBy(f => f.Category); foreach (var categoryGroup in forecastsByCategory) { var category = categoryGroup.Key; var forecastsByYear = categoryGroup.ToList().GroupBy(f => f.ForecastedDate.Year); foreach (var yearGroup in forecastsByYear) { var year = yearGroup.Key; var forecastsByMonth = yearGroup.ToList().GroupBy(f => f.ForecastedDate.Month); foreach (var monthGroup in forecastsByMonth) { var month = monthGroup.Key; var forecasts = monthGroup.ToList(); var dirPath = $"{basePath}/Country={country}/Category={category}/Year={year}/Month={month}/"; var filePath = dirPath + "forecast.parquet"; //TODO: automating schema generation using reflection and attributes var columns = new DataColumn[] { new DataColumn(ParquetSchemaHelper.ForecastDateField, forecasts.Select(f => f.ForecastDateTime.ToString()).ToArray()), new DataColumn(ParquetSchemaHelper.ForecastModelField, forecasts.Select(f => f.ForecastModel).ToArray()), new DataColumn(ParquetSchemaHelper.MarketField, forecasts.Select(f => f.Market).ToArray()), new DataColumn(ParquetSchemaHelper.ProductField, forecasts.Select(f => f.Product).ToArray()), new DataColumn(ParquetSchemaHelper.CountryField, forecasts.Select(f => f.CountryCode).ToArray()), new DataColumn(ParquetSchemaHelper.ForecastedDateField, forecasts.Select(f => f.ForecastedDate.ToString()).ToArray()), new DataColumn(ParquetSchemaHelper.CategoryField, forecasts.Select(f => f.Category).ToArray()), new DataColumn(ParquetSchemaHelper.PriceField, forecasts.Select(f => f.Price).ToArray()) }; var schema = new Schema(columns.Select(c => c.Field).ToArray()); DirectoryInfo di = Directory.CreateDirectory(dirPath);//safe using Stream fileStream = System.IO.File.OpenWrite(filePath); using var parquetWriter = new ParquetWriter(schema, fileStream); using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup()) { foreach (var col in columns) { groupWriter.WriteColumn(col); } } } } } } return; }
public void Write_in_small_row_groups() { //write a single file having 3 row groups var id = new DataField <int>("id"); var ms = new MemoryStream(); using (var writer = new ParquetWriter(new Schema(id), ms)) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new int[] { 1 })); } using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new int[] { 2 })); } using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new int[] { 3 })); } } //read the file back and validate ms.Position = 0; using (var reader = new ParquetReader(ms)) { Assert.Equal(3, reader.RowGroupCount); using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0)) { Assert.Equal(1, rg.RowCount); DataColumn dc = rg.ReadColumn(id); Assert.Equal(new int[] { 1 }, dc.Data); } using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(1)) { Assert.Equal(1, rg.RowCount); DataColumn dc = rg.ReadColumn(id); Assert.Equal(new int[] { 2 }, dc.Data); } using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(2)) { Assert.Equal(1, rg.RowCount); DataColumn dc = rg.ReadColumn(id); Assert.Equal(new int[] { 3 }, dc.Data); } } }
public void Cannot_write_columns_in_wrong_order() { var schema = new Schema(new DataField <int>("id"), new DataField <int>("id2")); using (var writer = new ParquetWriter(schema, new MemoryStream())) { using (ParquetRowGroupWriter gw = writer.CreateRowGroup()) { Assert.Throws <ArgumentException>(() => { gw.WriteColumn(new DataColumn((DataField)schema[1], new int[] { 1 })); }); } } }
public static void WriteParquetColumns(this Stream stream, List <DataColumn> columns) { Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray())); using (ParquetWriter writer = new ParquetWriter(schema, stream)) { writer.CompressionMethod = CompressionMethod.Snappy; using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup()) // items.Count() { foreach (var column in columns) { rowGroupWriter.WriteColumn(column); } } } }
public static void WriteParquet <T>(this Stream stream, IEnumerable <T> items) where T : class { Type classType = typeof(T); var properties = classType.GetProperties(); List <DataColumn> columns = new List <DataColumn>(); foreach (var prop in properties) { if (prop.PropertyType == DateTimeType) { columns.Add(new DataColumn( new DateTimeDataField(prop.Name, DateTimeFormat.DateAndTime), items.Select(item => new DateTimeOffset(((DateTime)prop.GetValue(item)))).ToArray() )); } else { var genericArguments = new Type[] { prop.PropertyType }; var genericType = DataFieldGenericType.MakeGenericType(genericArguments); var genericConstructor = genericType.GetConstructor(DataFieldConstructorGenericArguments); DataField field = genericConstructor.Invoke(new object[] { prop.Name }) as DataField; var dataSource = items.Select(item => prop.GetValue(item)); var castMethod = CastMethodGeneric.MakeGenericMethod(genericArguments); var toArrayMethod = ToArrayMethodGeneric.MakeGenericMethod(genericArguments); var data = toArrayMethod.Invoke(null, new object[] { castMethod.Invoke(null, new object[] { dataSource }) }) as Array; var column = new DataColumn(field, data); columns.Add(column); } } Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray())); using (ParquetWriter writer = new ParquetWriter(schema, stream)) { writer.CompressionMethod = CompressionMethod.Snappy; using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup()) // items.Count() { foreach (var column in columns) { rowGroupWriter.WriteColumn(column); } } } }
private static void WriteDataInFile(string path, List <DataColumn> schemaColumns) { var schema = new Schema(schemaColumns.ConvertAll(col => col.Field)); using (Stream fileStream = File.Create(path)) { using (var parquetWriter = new ParquetWriter(schema, fileStream)) { using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup()) { foreach (var column in schemaColumns) { groupWriter.WriteColumn(column); } } } } }
public void Append_to_file_reads_all_data() { //write a file with a single row group var id = new DataField <int>("id"); var ms = new MemoryStream(); using (var writer = new ParquetWriter(new Schema(id), ms)) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new int[] { 1, 2 })); } } //append to this file. Note that you cannot append to existing row group, therefore create a new one ms.Position = 0; using (var writer = new ParquetWriter(new Schema(id), ms, append: true)) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { rg.WriteColumn(new DataColumn(id, new int[] { 3, 4 })); } } //check that this file now contains two row groups and all the data is valid ms.Position = 0; using (var reader = new ParquetReader(ms)) { Assert.Equal(2, reader.RowGroupCount); using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0)) { Assert.Equal(2, rg.RowCount); Assert.Equal(new int[] { 1, 2 }, rg.ReadColumn(id).Data); } using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(1)) { Assert.Equal(2, rg.RowCount); Assert.Equal(new int[] { 3, 4 }, rg.ReadColumn(id).Data); } } }
protected object WriteReadSingle(DataField field, object value, CompressionMethod compressionMethod = CompressionMethod.None, int compressionLevel = -1) { //for sanity, use disconnected streams byte[] data; using (var ms = new MemoryStream()) { // write single value using (var writer = new ParquetWriter(new Schema(field), ms)) { writer.CompressionMethod = compressionMethod; writer.CompressionLevel = compressionLevel; using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { Array dataArray = Array.CreateInstance(field.ClrNullableIfHasNullsType, 1); dataArray.SetValue(value, 0); var column = new DataColumn(field, dataArray); rg.WriteColumn(column); } } data = ms.ToArray(); } using (var ms = new MemoryStream(data)) { // read back single value ms.Position = 0; using (var reader = new ParquetReader(ms)) { using (ParquetRowGroupReader rowGroupReader = reader.OpenRowGroupReader(0)) { DataColumn column = rowGroupReader.ReadColumn(field); return(column.Data.GetValue(0)); } } } }
private static void ReadLargeFile(out TimeSpan readTime, out TimeSpan uncompressedWriteTime, out TimeSpan gzipWriteTime) { Schema schema; DataColumn[] columns; using (var time = new TimeMeasure()) { using (var reader = ParquetReader.OpenFromFile(@"C:\dev\parquet-dotnet\src\Parquet.Test\data\customer.impala.parquet", new ParquetOptions { TreatByteArrayAsString = true })) { schema = reader.Schema; var cl = new List <DataColumn>(); using (ParquetRowGroupReader rgr = reader.OpenRowGroupReader(0)) { foreach (DataField field in reader.Schema.GetDataFields()) { DataColumn dataColumn = rgr.ReadColumn(field); cl.Add(dataColumn); } } columns = cl.ToArray(); } readTime = time.Elapsed; } using (FileStream dest = F.OpenWrite("perf.uncompressed.parquet")) { using (var time = new TimeMeasure()) { using (var writer = new ParquetWriter(schema, dest)) { writer.CompressionMethod = CompressionMethod.None; using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { foreach (DataColumn dc in columns) { rg.WriteColumn(dc); } } } uncompressedWriteTime = time.Elapsed; } } using (FileStream dest = F.OpenWrite("perf.gzip.parquet")) { using (var time = new TimeMeasure()) { using (var writer = new ParquetWriter(schema, dest)) { writer.CompressionMethod = CompressionMethod.Gzip; using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { foreach (DataColumn dc in columns) { rg.WriteColumn(dc); } } } gzipWriteTime = time.Elapsed; } } }
public void SaveParquet(IEnumerable <TrainingWindTurbineRecord> records, string outputFile) { //create data columns with schema metadata and the data you need var turbineId = new DataColumn( new DataField <int>("TurbineId"), records.Select(x => x.TurbineId).ToArray()); var gearboxOilLevel = new DataColumn( new DataField <double>("GearboxOilLevel"), records.Select(x => x.GearboxOilLevel).ToArray()); var gearboxOilTemp = new DataColumn( new DataField <double>("GearboxOilTemp"), records.Select(x => x.GearboxOilTemp).ToArray()); var generatorActivePower = new DataColumn( new DataField <double>("GeneratorActivePower"), records.Select(x => x.GeneratorActivePower).ToArray()); var generatorSpeed = new DataColumn( new DataField <double>("GeneratorSpeed"), records.Select(x => x.GeneratorSpeed).ToArray()); var generatorTemp = new DataColumn( new DataField <double>("GeneratorTemp"), records.Select(x => x.GeneratorTemp).ToArray()); var generatorTorque = new DataColumn( new DataField <double>("GeneratorTorque"), records.Select(x => x.GeneratorTorque).ToArray()); var gridFrequency = new DataColumn( new DataField <double>("GridFrequency"), records.Select(x => x.GridFrequency).ToArray()); var gridVoltage = new DataColumn( new DataField <double>("GridVoltage"), records.Select(x => x.GridVoltage).ToArray()); var hydraulicOilPressure = new DataColumn( new DataField <double>("HydraulicOilPressure"), records.Select(x => x.HydraulicOilPressure).ToArray()); var nacelleAngle = new DataColumn( new DataField <double>("NacelleAngle"), records.Select(x => x.NacelleAngle).ToArray()); var overallWindDirection = new DataColumn( new DataField <double>("OverallWindDirection"), records.Select(x => x.OverallWindDirection).ToArray()); var overalWindSpeedStdDev = new DataColumn( new DataField <double>("WindSpeedStdDev"), records.Select(x => x.WindSpeedStdDev).ToArray()); var precipitation = new DataColumn( new DataField <bool>("Precipitation"), records.Select(x => x.Precipitation).ToArray()); var turbineWindDirection = new DataColumn( new DataField <double>("TurbineWindDirection"), records.Select(x => x.TurbineWindDirection).ToArray()); var turbineSpeedStdDev = new DataColumn( new DataField <double>("TurbineSpeedStdDev"), records.Select(x => x.TurbineSpeedStdDev).ToArray()); var windSpeedAverage = new DataColumn( new DataField <double>("WindSpeedAverage"), records.Select(x => x.WindSpeedAverage).ToArray()); var windTempAverage = new DataColumn( new DataField <double>("WindTempAverage"), records.Select(x => x.WindTempAverage).ToArray()); var alterBlades = new DataColumn( new DataField <bool>("AlterBlades"), records.Select(x => x.AlterBlades).ToArray()); var pitchAngle = new DataColumn( new DataField <double>("PitchAngle"), records.Select(x => x.PitchAngle).ToArray()); var vibration = new DataColumn( new DataField <double>("Vibration"), records.Select(x => x.Vibration).ToArray()); var turbineSpeedAverage = new DataColumn( new DataField <double>("TurbineSpeedAverage"), records.Select(x => x.TurbineSpeedAverage).ToArray()); // create file schema var schema = new Schema(turbineId.Field, gearboxOilLevel.Field, gearboxOilTemp.Field, generatorActivePower.Field, generatorSpeed.Field, generatorTemp.Field, generatorTorque.Field, gridFrequency.Field, gridVoltage.Field, hydraulicOilPressure.Field, nacelleAngle.Field, overallWindDirection.Field, overalWindSpeedStdDev.Field, precipitation.Field, turbineWindDirection.Field, turbineSpeedStdDev.Field, windSpeedAverage.Field, windTempAverage.Field, pitchAngle.Field, vibration.Field, turbineSpeedAverage.Field, alterBlades.Field); var outputPath = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location); using (Stream fileStream = File.OpenWrite(Path.Combine(outputPath, $"{outputFile}.parquet"))) { using (var parquetWriter = new ParquetWriter(schema, fileStream)) { // create a new row group in the file using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup()) { groupWriter.WriteColumn(turbineId); groupWriter.WriteColumn(gearboxOilLevel); groupWriter.WriteColumn(gearboxOilTemp); groupWriter.WriteColumn(generatorActivePower); groupWriter.WriteColumn(generatorSpeed); groupWriter.WriteColumn(generatorTemp); groupWriter.WriteColumn(generatorTorque); groupWriter.WriteColumn(gridFrequency); groupWriter.WriteColumn(gridVoltage); groupWriter.WriteColumn(hydraulicOilPressure); groupWriter.WriteColumn(nacelleAngle); groupWriter.WriteColumn(overallWindDirection); groupWriter.WriteColumn(overalWindSpeedStdDev); groupWriter.WriteColumn(precipitation); groupWriter.WriteColumn(turbineWindDirection); groupWriter.WriteColumn(turbineSpeedStdDev); groupWriter.WriteColumn(windSpeedAverage); groupWriter.WriteColumn(windTempAverage); groupWriter.WriteColumn(pitchAngle); groupWriter.WriteColumn(vibration); groupWriter.WriteColumn(turbineSpeedAverage); groupWriter.WriteColumn(alterBlades); } } } }
private void WriteGroup(ParquetWriter parquetWriter) { AddMandatoryFields(); using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup()) { foreach (var kvp in _group.Data) { if (Fields.TryGetValue(kvp.Key, out var field) == false) { continue; } var data = kvp.Value; Array array; switch (field.DataType) { case DataType.Boolean: array = _boolArr ??= new bool[data.Count]; break; case DataType.Byte: array = _byteArr ??= new byte[data.Count]; break; case DataType.SignedByte: array = _sbyteArr ??= new sbyte[data.Count]; break; case DataType.Short: array = _shortArr ??= new short[data.Count]; break; case DataType.Int32: array = _intArr ??= new int[data.Count]; break; case DataType.Int64: array = _longArr ??= new long[data.Count]; break; case DataType.UnsignedInt16: array = _ushortArr ??= new ushort[data.Count]; break; case DataType.UnsignedInt32: array = _uintArr ??= new uint[data.Count]; break; case DataType.UnsignedInt64: array = _ulongArr ??= new ulong[data.Count]; break; case DataType.Float: array = _floatArr ??= new float[data.Count]; break; case DataType.Double: array = _doubleArr ??= new double[data.Count]; break; case DataType.Decimal: array = _decimalArr ??= new decimal[data.Count]; break; case DataType.String: array = _strArr ??= new string[data.Count]; break; case DataType.DateTimeOffset: array = _dtoArr ??= new DateTimeOffset[data.Count]; break; case DataType.TimeSpan: array = _tsArr ??= new TimeSpan[data.Count]; break; default: ThrowUnsupportedDataType(field.DataType); return; } Debug.Assert(array.Length == data.Count, $"Invalid field data on property '{kvp.Key}'"); data.CopyTo(array, 0); groupWriter.WriteColumn(new DataColumn(field, array)); } } _boolArr = null; _strArr = null; _dtoArr = null; _tsArr = null; _byteArr = null; _sbyteArr = null; _shortArr = null; _intArr = null; _longArr = null; _ushortArr = null; _uintArr = null; _ulongArr = null; _floatArr = null; _doubleArr = null; _decimalArr = null; }
/// <summary> /// Writes dataLen rows and typed columns to the file. /// </summary> /// <param name="csvColumns">Processed CSV data</param> /// <param name="dataLen">Row count</param> /// <param name="writer">ParquetWriter</param> /// <param name="fields">Field structure</param> /// <param name="config">Config structure</param> public static void WriteGroup(List <Object> csvColumns, long dataLen, ParquetWriter writer, List <DataField> fields, Config config) { using (ParquetRowGroupWriter rg = writer.CreateRowGroup()) { for (int i = 0; i < fields.Count; i++) { if (fields[i].HasNulls) { switch (fields[i].DataType) { case DataType.Boolean: rg.WriteColumn(new DataColumn(fields[i], ((bool?[])csvColumns[i]))); break; case DataType.DateTimeOffset: rg.WriteColumn(new DataColumn(fields[i], ((DateTimeOffset?[])csvColumns[i]))); break; case DataType.Decimal: rg.WriteColumn(new DataColumn(fields[i], ((decimal?[])csvColumns[i]))); break; case DataType.Double: rg.WriteColumn(new DataColumn(fields[i], ((double?[])csvColumns[i]))); break; case DataType.Float: rg.WriteColumn(new DataColumn(fields[i], ((float?[])csvColumns[i]))); break; case DataType.Int16: rg.WriteColumn(new DataColumn(fields[i], ((Int16?[])csvColumns[i]))); break; case DataType.Int32: rg.WriteColumn(new DataColumn(fields[i], ((Int32?[])csvColumns[i]))); break; case DataType.Int64: rg.WriteColumn(new DataColumn(fields[i], ((Int64?[])csvColumns[i]))); break; case DataType.String: rg.WriteColumn(new DataColumn(fields[i], ((string[])csvColumns[i]))); break; default: throw new ArgumentOutOfRangeException(fields[i].DataType.ToString()); } } else { switch (fields[i].DataType) { case DataType.Boolean: rg.WriteColumn(new DataColumn(fields[i], ((bool[])csvColumns[i]))); break; case DataType.DateTimeOffset: rg.WriteColumn(new DataColumn(fields[i], ((DateTimeOffset[])csvColumns[i]))); break; case DataType.Decimal: rg.WriteColumn(new DataColumn(fields[i], ((decimal[])csvColumns[i]))); break; case DataType.Double: rg.WriteColumn(new DataColumn(fields[i], ((double[])csvColumns[i]))); break; case DataType.Float: rg.WriteColumn(new DataColumn(fields[i], ((float[])csvColumns[i]))); break; case DataType.Int16: rg.WriteColumn(new DataColumn(fields[i], ((Int16[])csvColumns[i]))); break; case DataType.Int32: rg.WriteColumn(new DataColumn(fields[i], ((Int32[])csvColumns[i]))); break; case DataType.Int64: rg.WriteColumn(new DataColumn(fields[i], ((Int64[])csvColumns[i]))); break; case DataType.String: rg.WriteColumn(new DataColumn(fields[i], ((string[])csvColumns[i]))); break; default: throw new ArgumentOutOfRangeException(fields[i].DataType.ToString()); } } } } }