C# (CSharp) ThriftFooter.AddRowGroup Examples

Programming Language: C# (CSharp)

Class/Type: ThriftFooter

Method/Function: AddRowGroup

Examples at hotexamples.com: 2

C# (CSharp) ThriftFooter.AddRowGroup - 2 examples found. These are the top rated real world C# (CSharp) examples of ThriftFooter.AddRowGroup extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CreateModelSchema(3)

Write(3)

Add(2)

AddRowGroup(2)

GetPath(2)

GetWriteableSchema(2)

Example #1

Show file

File: ParquetRowGroupWriter.cs Project: yvern/parquet-dotnet

        internal ParquetRowGroupWriter(Schema schema,
                                       Stream stream,
                                       ThriftStream thriftStream,
                                       ThriftFooter footer,
                                       CompressionMethod compressionMethod,
                                       ParquetOptions formatOptions)
        {
            _schema            = schema ?? throw new ArgumentNullException(nameof(schema));
            _stream            = stream ?? throw new ArgumentNullException(nameof(stream));
            _thriftStream      = thriftStream ?? throw new ArgumentNullException(nameof(thriftStream));
            _footer            = footer ?? throw new ArgumentNullException(nameof(footer));
            _compressionMethod = compressionMethod;
            _formatOptions     = formatOptions;

            _thriftRowGroup         = _footer.AddRowGroup();
            _rgStartPos             = _stream.Position;
            _thriftRowGroup.Columns = new List <Thrift.ColumnChunk>();
            _thschema = _footer.GetWriteableSchema();
        }

Example #2

Show file

File: ParquetWriter.cs Project: 0x0L/parquet-dotnet

        /// <summary>
        /// Write out dataset to the output stream
        /// </summary>
        /// <param name="dataSet">Dataset to write</param>
        /// <param name="compression">Compression method</param>
        /// <param name="append">When true, appends to the file, otherwise creates a new file.</param>
        public void Write(DataSet dataSet, CompressionMethod compression = CompressionMethod.Gzip, bool append = false)
        {
            PrepareFile(dataSet, append);
            _footer.CustomMetadata = dataSet.Metadata.Custom;

            int offset = 0;
            int count;
            List <Thrift.SchemaElement> writeableSchema = _footer.GetWriteableSchema().ToList();

            do
            {
                count = Math.Min(_writerOptions.RowGroupsSize, dataSet.Count - offset);
                Thrift.RowGroup rg         = _footer.AddRowGroup();
                long            rgStartPos = Stream.Position;

                rg.Columns = new List <Thrift.ColumnChunk>();

                foreach (Thrift.SchemaElement tse in writeableSchema)
                {
                    List <string> path     = _footer.GetPath(tse);
                    string        flatPath = string.Join(Schema.PathSeparator, path);
                    var           cw       = new ColumnarWriter(Stream, ThriftStream, _footer, tse, path, compression, _formatOptions, _writerOptions);

                    IList values             = dataSet.GetColumn(flatPath, offset, count);
                    Thrift.ColumnChunk chunk = cw.Write(offset, count, values);
                    rg.Columns.Add(chunk);
                }

                //row group's size is a sum of _uncompressed_ sizes of all columns in it, including the headers
                //luckily ColumnChunk already contains sizes of page+header in it's meta
                rg.Total_byte_size = rg.Columns.Sum(c => c.Meta_data.Total_compressed_size);
                rg.Num_rows        = count;

                offset += _writerOptions.RowGroupsSize;
            }while (offset < dataSet.Count);

            _dataWritten = true;
        }