public DataColumnReader(
            DataField dataField,
            Stream inputStream,
            Thrift.ColumnChunk thriftColumnChunk,
            ThriftFooter footer,
            ParquetOptions parquetOptions)
        {
            _dataField         = dataField ?? throw new ArgumentNullException(nameof(dataField));
            _inputStream       = inputStream ?? throw new ArgumentNullException(nameof(inputStream));
            _thriftColumnChunk = thriftColumnChunk ?? throw new ArgumentNullException(nameof(thriftColumnChunk));
            _footer            = footer ?? throw new ArgumentNullException(nameof(footer));
            _parquetOptions    = parquetOptions ?? throw new ArgumentNullException(nameof(parquetOptions));

            _thriftStream = new ThriftStream(inputStream);
            _footer.GetLevels(_thriftColumnChunk, out int mrl, out int mdl);
            _maxRepetitionLevel  = mrl;
            _maxDefinitionLevel  = mdl;
            _thriftSchemaElement = _footer.GetSchemaElement(_thriftColumnChunk);
            _dataTypeHandler     = DataTypeFactory.Match(_thriftSchemaElement, _parquetOptions);
        }
        public ColumnarWriter(Stream output, ThriftStream thriftStream,
                              ThriftFooter footer,
                              Thrift.SchemaElement tse, List <string> path,
                              CompressionMethod compressionMethod,
                              ParquetOptions formatOptions,
                              WriterOptions writerOptions)
        {
            _output            = output;
            _thriftStream      = thriftStream;
            _footer            = footer;
            _tse               = tse;
            _compressionMethod = compressionMethod;
            _formatOptions     = formatOptions;
            _writerOptions     = writerOptions;
            _dataTypeHandler   = DataTypeFactory.Match(tse, _formatOptions);

            _chunk = _footer.CreateColumnChunk(_compressionMethod, _output, _tse.Type, path, 0);
            _ph    = _footer.CreateDataPage(0);
            _footer.GetLevels(_chunk, out int maxRepetitionLevel, out int maxDefinitionLevel);
            _maxRepetitionLevel = maxRepetitionLevel;
            _maxDefinitionLevel = maxDefinitionLevel;
        }
예제 #3
0
        internal ParquetRowGroupWriter(Schema schema,
                                       Stream stream,
                                       ThriftStream thriftStream,
                                       ThriftFooter footer,
                                       CompressionMethod compressionMethod,
                                       ParquetOptions formatOptions,
                                       int rowCount)
        {
            _schema            = schema ?? throw new ArgumentNullException(nameof(schema));
            _stream            = stream ?? throw new ArgumentNullException(nameof(stream));
            _thriftStream      = thriftStream ?? throw new ArgumentNullException(nameof(thriftStream));
            _footer            = footer ?? throw new ArgumentNullException(nameof(footer));
            _compressionMethod = compressionMethod;
            _formatOptions     = formatOptions;
            _rowCount          = rowCount;

            _thriftRowGroup          = _footer.AddRowGroup();
            _thriftRowGroup.Num_rows = _rowCount;
            _rgStartPos             = _stream.Position;
            _thriftRowGroup.Columns = new List <Thrift.ColumnChunk>();
            _thschema = _footer.GetWriteableSchema().ToList();
        }
예제 #4
0
 public long Write(ThriftStream thriftStream)
 {
     return(thriftStream.Write(_fileMeta));
 }
예제 #5
0
        public PColumn(Thrift.ColumnChunk thriftChunk, SchemaElement schema, Stream inputStream, ThriftStream thriftStream, ParquetOptions options)
        {
            _thriftChunk = thriftChunk;
            _thrift      = thriftStream;
            _schema      = schema;
            _inputStream = inputStream;
            _options     = options;

            _plainReader = new PlainValuesReader(options);
        }
예제 #6
0
        public PColumn(Thrift.ColumnChunk thriftChunk, Schema schema, Stream inputStream, ThriftStream thriftStream, ParquetOptions options)
        {
            if (thriftChunk.Meta_data.Path_in_schema.Count != 1)
            {
                throw new NotImplementedException("path in scheme is not flat");
            }

            _thriftChunk   = thriftChunk;
            _thrift        = thriftStream;
            _schema        = schema;
            _inputStream   = inputStream;
            _schemaElement = _schema[_thriftChunk];
            _options       = options;

            _plainReader = new PlainValuesReader(options);
        }