public static Frame ConvertFromParquet(PDS pds) { IEnumerable <Series> allSeries = pds.Schema.Elements .Select((se, i) => Series.FromList(pds.GetColumn(i), se.ElementType, se.Name)); return(new Frame(allSeries)); }
public static DataTable ParquetDataSetToDataTable(Parquet.Data.DataSet dataset) { DataTable datatable = new DataTable(); if (dataset != null) { if (dataset.Schema.GetDataFields().Count == dataset.FieldCount) { List <int> datetimeOffsetFieldIndexes = new List <int>(); int index = 0; foreach (Parquet.Data.DataField field in dataset.Schema.GetDataFields()) { Type columnType = null; switch (field.DataType) { case Parquet.Data.DataType.Boolean: columnType = typeof(bool); break; case Parquet.Data.DataType.Byte: columnType = typeof(sbyte); break; case Parquet.Data.DataType.ByteArray: columnType = typeof(sbyte[]); break; case Parquet.Data.DataType.DateTimeOffset: //Let's treat DateTimeOffsets as DateTime columnType = typeof(DateTime); datetimeOffsetFieldIndexes.Add(index); break; case Parquet.Data.DataType.Decimal: columnType = typeof(decimal); break; case Parquet.Data.DataType.Double: columnType = typeof(double); break; case Parquet.Data.DataType.Float: columnType = typeof(float); break; case Parquet.Data.DataType.Short: case Parquet.Data.DataType.Int16: case Parquet.Data.DataType.Int32: case Parquet.Data.DataType.UnsignedInt16: columnType = typeof(int); break; case Parquet.Data.DataType.Int64: columnType = typeof(long); break; case Parquet.Data.DataType.UnsignedByte: columnType = typeof(byte); break; case Parquet.Data.DataType.String: default: columnType = typeof(string); break; } DataColumn newColumn = datatable.Columns.Add(field.Name, columnType); newColumn.AllowDBNull = field.HasNulls; index++; } foreach (Parquet.Data.Row row in dataset) { DataRow dataRow = datatable.NewRow(); object[] rawValues = row.RawValues; //Convert DateTimeOffsets to DateTime foreach (int datetimeOffsetIndex in datetimeOffsetFieldIndexes) { if (rawValues[datetimeOffsetIndex] != null) { rawValues[datetimeOffsetIndex] = ((DateTimeOffset)rawValues[datetimeOffsetIndex]).DateTime; //the DateTime property ignores the Offset value. Is there any instance where a parquet file can have an offset? } } dataRow.ItemArray = rawValues; datatable.Rows.Add(dataRow); } } else { throw new ArgumentException("The provided dataset has some unsupported data types such as Lists, Maps or Structs"); } } return(datatable); }