Exemple #1
0
        /// <summary>
        ///
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="input"></param>
        /// <returns></returns>
        public static IEnumerable <T[]> DeserializeGroups <T>(Stream input) where T : new()
        {
            var bridge = new ClrBridge(typeof(T));

            using (var reader = new ParquetReader(input))
            {
                Schema      fileSchema = reader.Schema;
                DataField[] dataFields = fileSchema.GetDataFields();

                for (int i = 0; i < reader.RowGroupCount; i++)
                {
                    using (ParquetRowGroupReader groupReader = reader.OpenRowGroupReader(i))
                    {
                        DataColumn[] groupColumns = dataFields
                                                    .Select(df => groupReader.ReadColumn(df))
                                                    .ToArray();

                        T[] rb = new T[groupReader.RowCount];
                        for (int ie = 0; ie < rb.Length; ie++)
                        {
                            rb[ie] = new T();
                        }

                        for (int ic = 0; ic < groupColumns.Length; ic++)
                        {
                            bridge.AssignColumn(groupColumns[ic], rb);
                        }

                        yield return(rb);
                    }
                }
            }
        }
        /// <summary>
        /// Reads the first row group as a table
        /// </summary>
        /// <param name="reader">Open reader</param>
        /// <returns></returns>
        public static Table ReadAsTable(this ParquetReader reader)
        {
            Table result = null;

            for (int i = 0; i < reader.RowGroupCount; i++)
            {
                using (ParquetRowGroupReader rowGroupReader = reader.OpenRowGroupReader(i))
                {
                    DataColumn[] allData = reader.Schema.GetDataFields().Select(df => rowGroupReader.ReadColumn(df)).ToArray();

                    var t = new Table(reader.Schema, allData, rowGroupReader.RowCount);

                    if (result == null)
                    {
                        result = t;
                    }
                    else
                    {
                        foreach (Row row in t)
                        {
                            result.Add(row);
                        }
                    }
                }
            }

            return(result);
        }
        /// <summary>
        /// Reads the first row group from a file
        /// </summary>
        /// <param name="stream"></param>
        /// <param name="schema"></param>
        /// <param name="columns"></param>
        public static void ReadSingleRowGroupParquetFile(this Stream stream, out Schema schema, out DataColumn[] columns)
        {
            using (var reader = new ParquetReader(stream))
            {
                schema = reader.Schema;

                using (ParquetRowGroupReader rgr = reader.OpenRowGroupReader(0))
                {
                    DataField[] dataFields = schema.GetDataFields();
                    columns = new DataColumn[dataFields.Length];

                    for (int i = 0; i < dataFields.Length; i++)
                    {
                        columns[i] = rgr.ReadColumn(dataFields[i]);
                    }
                }
            }
        }
Exemple #4
0
        private static T[] ReadAndDeserializeByRowGroup <T>(int rowGroupIndex, ParquetReader reader, DataField[] dataFields) where T : new()
        {
            var bridge = new ClrBridge(typeof(T));

            using (ParquetRowGroupReader groupReader = reader.OpenRowGroupReader(rowGroupIndex))
            {
                DataColumn[] groupColumns = dataFields
                                            .Select(df => groupReader.ReadColumn(df))
                                            .ToArray();

                T[] rb = new T[groupReader.RowCount];
                for (int ie = 0; ie < rb.Length; ie++)
                {
                    rb[ie] = new T();
                }

                for (int ic = 0; ic < groupColumns.Length; ic++)
                {
                    bridge.AssignColumn(groupColumns[ic], rb);
                }
                return(rb);
            }
        }