public DataColumnReader( DataField dataField, Stream inputStream, Thrift.ColumnChunk thriftColumnChunk, ThriftFooter footer, ParquetOptions parquetOptions) { _dataField = dataField ?? throw new ArgumentNullException(nameof(dataField)); _inputStream = inputStream ?? throw new ArgumentNullException(nameof(inputStream)); _thriftColumnChunk = thriftColumnChunk ?? throw new ArgumentNullException(nameof(thriftColumnChunk)); _footer = footer ?? throw new ArgumentNullException(nameof(footer)); _parquetOptions = parquetOptions ?? throw new ArgumentNullException(nameof(parquetOptions)); _thriftStream = new ThriftStream(inputStream); _footer.GetLevels(_thriftColumnChunk, out int mrl, out int mdl); _maxRepetitionLevel = mrl; _maxDefinitionLevel = mdl; _thriftSchemaElement = _footer.GetSchemaElement(_thriftColumnChunk); _dataTypeHandler = DataTypeFactory.Match(_thriftSchemaElement, _parquetOptions); }
public ColumnarWriter(Stream output, ThriftStream thriftStream, ThriftFooter footer, Thrift.SchemaElement tse, List <string> path, CompressionMethod compressionMethod, ParquetOptions formatOptions, WriterOptions writerOptions) { _output = output; _thriftStream = thriftStream; _footer = footer; _tse = tse; _compressionMethod = compressionMethod; _formatOptions = formatOptions; _writerOptions = writerOptions; _dataTypeHandler = DataTypeFactory.Match(tse, _formatOptions); _chunk = _footer.CreateColumnChunk(_compressionMethod, _output, _tse.Type, path, 0); _ph = _footer.CreateDataPage(0); _footer.GetLevels(_chunk, out int maxRepetitionLevel, out int maxDefinitionLevel); _maxRepetitionLevel = maxRepetitionLevel; _maxDefinitionLevel = maxDefinitionLevel; }
internal ParquetRowGroupWriter(Schema schema, Stream stream, ThriftStream thriftStream, ThriftFooter footer, CompressionMethod compressionMethod, ParquetOptions formatOptions, int rowCount) { _schema = schema ?? throw new ArgumentNullException(nameof(schema)); _stream = stream ?? throw new ArgumentNullException(nameof(stream)); _thriftStream = thriftStream ?? throw new ArgumentNullException(nameof(thriftStream)); _footer = footer ?? throw new ArgumentNullException(nameof(footer)); _compressionMethod = compressionMethod; _formatOptions = formatOptions; _rowCount = rowCount; _thriftRowGroup = _footer.AddRowGroup(); _thriftRowGroup.Num_rows = _rowCount; _rgStartPos = _stream.Position; _thriftRowGroup.Columns = new List <Thrift.ColumnChunk>(); _thschema = _footer.GetWriteableSchema().ToList(); }
public long Write(ThriftStream thriftStream) { return(thriftStream.Write(_fileMeta)); }
public PColumn(Thrift.ColumnChunk thriftChunk, SchemaElement schema, Stream inputStream, ThriftStream thriftStream, ParquetOptions options) { _thriftChunk = thriftChunk; _thrift = thriftStream; _schema = schema; _inputStream = inputStream; _options = options; _plainReader = new PlainValuesReader(options); }
public PColumn(Thrift.ColumnChunk thriftChunk, Schema schema, Stream inputStream, ThriftStream thriftStream, ParquetOptions options) { if (thriftChunk.Meta_data.Path_in_schema.Count != 1) { throw new NotImplementedException("path in scheme is not flat"); } _thriftChunk = thriftChunk; _thrift = thriftStream; _schema = schema; _inputStream = inputStream; _schemaElement = _schema[_thriftChunk]; _options = options; _plainReader = new PlainValuesReader(options); }