Exemplo n.º 1
0
        public ColumnarReader(Stream inputStream, Thrift.ColumnChunk thriftColumnChunk, ThriftFooter footer, ParquetOptions parquetOptions)
        {
            _inputStream       = inputStream ?? throw new ArgumentNullException(nameof(inputStream));
            _thriftColumnChunk = thriftColumnChunk ?? throw new ArgumentNullException(nameof(thriftColumnChunk));
            _footer            = footer ?? throw new ArgumentNullException(nameof(footer));
            _parquetOptions    = parquetOptions ?? throw new ArgumentNullException(nameof(parquetOptions));

            _thriftStream = new ThriftStream(inputStream);
        }
Exemplo n.º 2
0
        public ColumnarReader(Stream inputStream, Thrift.ColumnChunk thriftColumnChunk, ThriftFooter footer, ParquetOptions parquetOptions)
        {
            _inputStream       = inputStream ?? throw new ArgumentNullException(nameof(inputStream));
            _thriftColumnChunk = thriftColumnChunk ?? throw new ArgumentNullException(nameof(thriftColumnChunk));
            _footer            = footer ?? throw new ArgumentNullException(nameof(footer));
            _parquetOptions    = parquetOptions ?? throw new ArgumentNullException(nameof(parquetOptions));

            _thriftStream = new ThriftStream(inputStream);
            _footer.GetLevels(_thriftColumnChunk, out int mrl, out int mdl);
            _maxRepetitionLevel  = mrl;
            _maxDefinitionLevel  = mdl;
            _thriftSchemaElement = _footer.GetSchemaElement(_thriftColumnChunk);
            _dataTypeHandler     = DataTypeFactory.Match(_thriftSchemaElement, _parquetOptions);
        }
Exemplo n.º 3
0
 public DataColumnWriter(
     Stream stream,
     ThriftStream thriftStream,
     ThriftFooter footer,
     Thrift.SchemaElement schemaElement,
     CompressionMethod compressionMethod,
     int rowCount)
 {
     _stream            = stream;
     _thriftStream      = thriftStream;
     _footer            = footer;
     _schemaElement     = schemaElement;
     _compressionMethod = compressionMethod;
     _rowCount          = rowCount;
 }
Exemplo n.º 4
0
        internal ParquetRowGroupReader(
            Thrift.RowGroup rowGroup,
            ThriftFooter footer,
            Stream stream, ThriftStream thriftStream,
            ParquetOptions parquetOptions)
        {
            _rowGroup       = rowGroup ?? throw new ArgumentNullException(nameof(rowGroup));
            _footer         = footer ?? throw new ArgumentNullException(nameof(footer));
            _stream         = stream ?? throw new ArgumentNullException(nameof(stream));
            _thriftStream   = thriftStream ?? throw new ArgumentNullException(nameof(thriftStream));
            _parquetOptions = parquetOptions ?? throw new ArgumentNullException(nameof(parquetOptions));

            //cache chunks
            foreach (Thrift.ColumnChunk thriftChunk in _rowGroup.Columns)
            {
                string path = thriftChunk.GetPath();
                _pathToChunk[path] = thriftChunk;
            }
        }
        public ColumnarWriter(Stream output, ThriftStream thriftStream,
                              ThriftFooter footer,
                              Thrift.SchemaElement tse, List <string> path,
                              CompressionMethod compressionMethod,
                              ParquetOptions formatOptions,
                              WriterOptions writerOptions)
        {
            _output            = output;
            _thriftStream      = thriftStream;
            _footer            = footer;
            _tse               = tse;
            _compressionMethod = compressionMethod;
            _formatOptions     = formatOptions;
            _writerOptions     = writerOptions;
            _dataTypeHandler   = DataTypeFactory.Match(tse, _formatOptions);

            _chunk = _footer.CreateColumnChunk(_compressionMethod, _output, _tse.Type, path, 0);
            _ph    = _footer.CreateDataPage(0);
            _footer.GetLevels(_chunk, out int maxRepetitionLevel, out int maxDefinitionLevel);
            _maxRepetitionLevel = maxRepetitionLevel;
            _maxDefinitionLevel = maxDefinitionLevel;
        }
Exemplo n.º 6
0
        internal ParquetRowGroupWriter(Schema schema,
                                       Stream stream,
                                       ThriftStream thriftStream,
                                       ThriftFooter footer,
                                       CompressionMethod compressionMethod,
                                       ParquetOptions formatOptions,
                                       int rowCount)
        {
            _schema            = schema ?? throw new ArgumentNullException(nameof(schema));
            _stream            = stream ?? throw new ArgumentNullException(nameof(stream));
            _thriftStream      = thriftStream ?? throw new ArgumentNullException(nameof(thriftStream));
            _footer            = footer ?? throw new ArgumentNullException(nameof(footer));
            _compressionMethod = compressionMethod;
            _formatOptions     = formatOptions;
            _rowCount          = rowCount;

            _thriftRowGroup          = _footer.AddRowGroup();
            _thriftRowGroup.Num_rows = _rowCount;
            _rgStartPos             = _stream.Position;
            _thriftRowGroup.Columns = new List <Thrift.ColumnChunk>();
            _thschema = _footer.GetWriteableSchema().ToList();
        }