Пример #1
0
        public static Frame ConvertFromParquet(PDS pds)
        {
            IEnumerable <Series> allSeries =
                pds.Schema.Elements
                .Select((se, i) => Series.FromList(pds.GetColumn(i), se.ElementType, se.Name));

            return(new Frame(allSeries));
        }
Пример #2
0
        public static DataTable ParquetDataSetToDataTable(Parquet.Data.DataSet dataset)
        {
            DataTable datatable = new DataTable();

            if (dataset != null)
            {
                if (dataset.Schema.GetDataFields().Count == dataset.FieldCount)
                {
                    List <int> datetimeOffsetFieldIndexes = new List <int>();
                    int        index = 0;
                    foreach (Parquet.Data.DataField field in dataset.Schema.GetDataFields())
                    {
                        Type columnType = null;
                        switch (field.DataType)
                        {
                        case Parquet.Data.DataType.Boolean:
                            columnType = typeof(bool);
                            break;

                        case Parquet.Data.DataType.Byte:
                            columnType = typeof(sbyte);
                            break;

                        case Parquet.Data.DataType.ByteArray:
                            columnType = typeof(sbyte[]);
                            break;

                        case Parquet.Data.DataType.DateTimeOffset:
                            //Let's treat DateTimeOffsets as DateTime
                            columnType = typeof(DateTime);
                            datetimeOffsetFieldIndexes.Add(index);
                            break;

                        case Parquet.Data.DataType.Decimal:
                            columnType = typeof(decimal);
                            break;

                        case Parquet.Data.DataType.Double:
                            columnType = typeof(double);
                            break;

                        case Parquet.Data.DataType.Float:
                            columnType = typeof(float);
                            break;

                        case Parquet.Data.DataType.Short:
                        case Parquet.Data.DataType.Int16:
                        case Parquet.Data.DataType.Int32:
                        case Parquet.Data.DataType.UnsignedInt16:
                            columnType = typeof(int);
                            break;

                        case Parquet.Data.DataType.Int64:
                            columnType = typeof(long);
                            break;

                        case Parquet.Data.DataType.UnsignedByte:
                            columnType = typeof(byte);
                            break;

                        case Parquet.Data.DataType.String:
                        default:
                            columnType = typeof(string);
                            break;
                        }

                        DataColumn newColumn = datatable.Columns.Add(field.Name, columnType);
                        newColumn.AllowDBNull = field.HasNulls;
                        index++;
                    }

                    foreach (Parquet.Data.Row row in dataset)
                    {
                        DataRow  dataRow   = datatable.NewRow();
                        object[] rawValues = row.RawValues;

                        //Convert DateTimeOffsets to DateTime
                        foreach (int datetimeOffsetIndex in datetimeOffsetFieldIndexes)
                        {
                            if (rawValues[datetimeOffsetIndex] != null)
                            {
                                rawValues[datetimeOffsetIndex] = ((DateTimeOffset)rawValues[datetimeOffsetIndex]).DateTime; //the DateTime property ignores the Offset value. Is there any instance where a parquet file can have an offset?
                            }
                        }

                        dataRow.ItemArray = rawValues;

                        datatable.Rows.Add(dataRow);
                    }
                }
                else
                {
                    throw new ArgumentException("The provided dataset has some unsupported data types such as Lists, Maps or Structs");
                }
            }
            return(datatable);
        }