Пример #1
0
        /// <summary>
        /// Reads data from parquet stream
        /// </summary>
        /// <typeparam name="TModel">Type of model</typeparam>
        /// <param name="mapConfig">Mapping configuration</param>
        /// <param name="fileStream">Parquet stream</param>
        /// <returns>parsed data</returns>
        public TModel[] Read <TModel>(MapperConfig <TModel> mapConfig, Stream fileStream)
            where TModel : new()
        {
            using var parquetReader = new ParquetReader(fileStream);
            var dataFields = parquetReader.Schema.GetDataFields();

            long modelOffset = 0;
            var  resArr      = CreateArray <TModel>(parquetReader.ThriftMetadata.Num_rows);

            for (int i = 0; i < parquetReader.RowGroupCount; i++)
            {
                using var groupReader = parquetReader.OpenRowGroupReader(i);
                var columns = dataFields.Where(w => mapConfig.Contains(w.Name)).Select(groupReader.ReadColumn).ToArray();

                ReadColumns(mapConfig, resArr, columns, modelOffset);

                // increment offset to read next rowGroup
                modelOffset += groupReader.RowCount;
            }

            return(resArr);
        }