Ejemplo n.º 1
0
        /// <summary>
        ///
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="input"></param>
        /// <returns></returns>
        public static IEnumerable <T[]> DeserializeGroups <T>(Stream input) where T : new()
        {
            var bridge = new ClrBridge(typeof(T));

            using (var reader = new ParquetReader(input))
            {
                Schema      fileSchema = reader.Schema;
                DataField[] dataFields = fileSchema.GetDataFields();

                for (int i = 0; i < reader.RowGroupCount; i++)
                {
                    using (ParquetRowGroupReader groupReader = reader.OpenRowGroupReader(i))
                    {
                        DataColumn[] groupColumns = dataFields
                                                    .Select(df => groupReader.ReadColumn(df))
                                                    .ToArray();

                        T[] rb = new T[groupReader.RowCount];
                        for (int ie = 0; ie < rb.Length; ie++)
                        {
                            rb[ie] = new T();
                        }

                        for (int ic = 0; ic < groupColumns.Length; ic++)
                        {
                            bridge.AssignColumn(groupColumns[ic], rb);
                        }

                        yield return(rb);
                    }
                }
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Reads the first row group as a table
        /// </summary>
        /// <param name="reader">Open reader</param>
        /// <returns></returns>
        public static Table ReadAsTable(this ParquetReader reader)
        {
            Table result = null;

            for (int i = 0; i < reader.RowGroupCount; i++)
            {
                using (ParquetRowGroupReader rowGroupReader = reader.OpenRowGroupReader(i))
                {
                    DataColumn[] allData = reader.Schema.GetDataFields().Select(df => rowGroupReader.ReadColumn(df)).ToArray();

                    var t = new Table(reader.Schema, allData, rowGroupReader.RowCount);

                    if (result == null)
                    {
                        result = t;
                    }
                    else
                    {
                        foreach (Row row in t)
                        {
                            result.Add(row);
                        }
                    }
                }
            }

            return(result);
        }
Ejemplo n.º 3
0
        public static IEnumerable <T> Deserialize <T>(Stream input) where T : new()
        {
            var result = new List <T>();

            IColumnClrMapper mapper = new SlowReflectionColumnClrMapper(typeof(T));

            using (var reader = new ParquetReader3(input))
            {
                Schema           fileSchema = reader.Schema;
                List <DataField> dataFields = fileSchema.GetDataFields();

                for (int i = 0; i < reader.RowGroupCount; i++)
                {
                    using (ParquetRowGroupReader groupReader = reader.OpenRowGroupReader(i))
                    {
                        List <DataColumn> groupColumns = dataFields
                                                         .Select(groupReader.ReadColumn)
                                                         .ToList();

                        IReadOnlyCollection <T> groupClrObjects = mapper.CreateClassInstances <T>(groupColumns);

                        result.AddRange(groupClrObjects);
                    }
                }
            }

            return(result);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Reads entire row group's data columns in one go.
        /// </summary>
        /// <param name="rowGroupIndex">Index of the row group. Default to the first row group if not specified.</param>
        /// <returns></returns>
        public DataColumn[] ReadEntireRowGroup(int rowGroupIndex = 0)
        {
            DataField[]  dataFields = Schema.GetDataFields();
            DataColumn[] result     = new DataColumn[dataFields.Length];

            using (ParquetRowGroupReader reader = OpenRowGroupReader(rowGroupIndex))
            {
                for (int i = 0; i < dataFields.Length; i++)
                {
                    DataColumn column = reader.ReadColumn(dataFields[i]);
                    result[i] = column;
                }
            }

            return(result);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Reads the first row group from a file
        /// </summary>
        /// <param name="stream"></param>
        /// <param name="schema"></param>
        /// <param name="columns"></param>
        public static void ReadSingleRowGroupParquetFile(this Stream stream, out Schema schema, out DataColumn[] columns)
        {
            using (var reader = new ParquetReader(stream))
            {
                schema = reader.Schema;

                using (ParquetRowGroupReader rgr = reader.OpenRowGroupReader(0))
                {
                    DataField[] dataFields = schema.GetDataFields();
                    columns = new DataColumn[dataFields.Length];

                    for (int i = 0; i < dataFields.Length; i++)
                    {
                        columns[i] = rgr.ReadColumn(dataFields[i]);
                    }
                }
            }
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Reads an open row group as a Table
        /// </summary>
        /// <param name="rowGroupReader">Open row group reader</param>
        /// <param name="schema">File schema</param>
        /// <param name="rowCount">Number of rows to include, if NULL all rows will be included</param>
        /// <returns></returns>
        public static Table ReadAsTable(this ParquetRowGroupReader rowGroupReader, Schema schema, int?rowCount)
        {
            Table result = null;

            DataColumn[] allData = schema.GetDataFields().Select(df => rowGroupReader.ReadColumn(df)).ToArray();

            var t = new Table(schema, allData, rowCount ?? rowGroupReader.RowCount);

            if (result == null)
            {
                result = t;
            }
            else
            {
                foreach (Row row in t)
                {
                    result.Add(row);
                }
            }

            return(result);
        }
Ejemplo n.º 7
0
        private static T[] ReadAndDeserializeByRowGroup <T>(int rowGroupIndex, ParquetReader reader, DataField[] dataFields) where T : new()
        {
            var bridge = new ClrBridge(typeof(T));

            using (ParquetRowGroupReader groupReader = reader.OpenRowGroupReader(rowGroupIndex))
            {
                DataColumn[] groupColumns = dataFields
                                            .Select(df => groupReader.ReadColumn(df))
                                            .ToArray();

                T[] rb = new T[groupReader.RowCount];
                for (int ie = 0; ie < rb.Length; ie++)
                {
                    rb[ie] = new T();
                }

                for (int ic = 0; ic < groupColumns.Length; ic++)
                {
                    bridge.AssignColumn(groupColumns[ic], rb);
                }
                return(rb);
            }
        }