C# (CSharp) ThriftFooter.GetPath Examples

Programming Language: C# (CSharp)

Class/Type: ThriftFooter

Method/Function: GetPath

Examples at hotexamples.com: 2

C# (CSharp) ThriftFooter.GetPath - 2 examples found. These are the top rated real world C# (CSharp) examples of ThriftFooter.GetPath extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CreateModelSchema(3)

Write(3)

Add(2)

AddRowGroup(2)

GetPath(2)

GetWriteableSchema(2)

Example #1

Show file

        /// <summary>
        /// Writes next data column to parquet stream. Note that columns must be written in the order they are declared in the
        /// file schema.
        /// </summary>
        /// <param name="column"></param>
        public void WriteColumn(DataColumn column)
        {
            if (column == null)
            {
                throw new ArgumentNullException(nameof(column));
            }

            if (RowCount == null)
            {
                if (column.Data.Length > 0 || column.Field.MaxRepetitionLevel == 0)
                {
                    RowCount = column.CalculateRowCount();
                }
            }

            Thrift.SchemaElement tse = _thschema[_colIdx];
            if (!column.Field.Equals(tse))
            {
                throw new ArgumentException($"cannot write this column, expected '{tse.Name}', passed: '{column.Field.Name}'", nameof(column));
            }
            IDataTypeHandler dataTypeHandler = DataTypeFactory.Match(tse, _formatOptions);

            _colIdx += 1;

            List <string> path = _footer.GetPath(tse);

            var writer = new DataColumnWriter(_stream, _thriftStream, _footer, tse,
                                              _compressionMethod, _compressionLevel,
                                              (int)(RowCount ?? 0));

            Thrift.ColumnChunk chunk = writer.Write(path, column, dataTypeHandler);
            _thriftRowGroup.Columns.Add(chunk);
        }

Example #2

Show file

File: ParquetWriter.cs Project: 0x0L/parquet-dotnet

        /// <summary>
        /// Write out dataset to the output stream
        /// </summary>
        /// <param name="dataSet">Dataset to write</param>
        /// <param name="compression">Compression method</param>
        /// <param name="append">When true, appends to the file, otherwise creates a new file.</param>
        public void Write(DataSet dataSet, CompressionMethod compression = CompressionMethod.Gzip, bool append = false)
        {
            PrepareFile(dataSet, append);
            _footer.CustomMetadata = dataSet.Metadata.Custom;

            int offset = 0;
            int count;
            List <Thrift.SchemaElement> writeableSchema = _footer.GetWriteableSchema().ToList();

            do
            {
                count = Math.Min(_writerOptions.RowGroupsSize, dataSet.Count - offset);
                Thrift.RowGroup rg         = _footer.AddRowGroup();
                long            rgStartPos = Stream.Position;

                rg.Columns = new List <Thrift.ColumnChunk>();

                foreach (Thrift.SchemaElement tse in writeableSchema)
                {
                    List <string> path     = _footer.GetPath(tse);
                    string        flatPath = string.Join(Schema.PathSeparator, path);
                    var           cw       = new ColumnarWriter(Stream, ThriftStream, _footer, tse, path, compression, _formatOptions, _writerOptions);

                    IList values             = dataSet.GetColumn(flatPath, offset, count);
                    Thrift.ColumnChunk chunk = cw.Write(offset, count, values);
                    rg.Columns.Add(chunk);
                }

                //row group's size is a sum of _uncompressed_ sizes of all columns in it, including the headers
                //luckily ColumnChunk already contains sizes of page+header in it's meta
                rg.Total_byte_size = rg.Columns.Sum(c => c.Meta_data.Total_compressed_size);
                rg.Num_rows        = count;

                offset += _writerOptions.RowGroupsSize;
            }while (offset < dataSet.Count);

            _dataWritten = true;
        }