コード例 #1
0
 /// <summary>
 /// Writes entire table in a single row group
 /// </summary>
 /// <param name="writer"></param>
 /// <param name="table"></param>
 public static void Write(this ParquetWriter writer, Table table)
 {
     using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup())
     {
         rowGroupWriter.Write(table);
     }
 }
コード例 #2
0
 /// <summary>
 /// Writes table to this row group
 /// </summary>
 /// <param name="writer"></param>
 /// <param name="table"></param>
 public static void Write(this ParquetRowGroupWriter writer, Table table)
 {
     foreach (DataColumn dc in table.ExtractDataColumns())
     {
         writer.WriteColumn(dc);
     }
 }
コード例 #3
0
        /// <summary>
        /// Creates a new row group and a writer for it.
        /// </summary>
        public ParquetRowGroupWriter CreateRowGroup()
        {
            _dataWritten = true;

            var writer = new ParquetRowGroupWriter(_schema, Stream, ThriftStream, _footer, CompressionMethod, _formatOptions);

            _openedWriters.Add(writer);

            return(writer);
        }
コード例 #4
0
        /// <summary>
        /// Serialises a collection of classes into a Parquet stream
        /// </summary>
        /// <typeparam name="T">Class type</typeparam>
        /// <param name="objectInstances">Collection of classes</param>
        /// <param name="destination">Destination stream</param>
        /// <param name="schema">Optional schema to use. When not specified the class schema will be discovered and everything possible will be
        /// written to the stream. If you want to write only a subset of class properties please specify the schema yourself.
        /// </param>
        /// <param name="compressionMethod"><see cref="CompressionMethod"/></param>
        /// <param name="rowGroupSize"></param>
        /// <param name="append"></param>
        /// <returns></returns>
        public static Schema Serialize <T>(IEnumerable <T> objectInstances, Stream destination,
                                           Schema schema = null,
                                           CompressionMethod compressionMethod = CompressionMethod.Snappy,
                                           int rowGroupSize = 5000,
                                           bool append      = false)
            where T : new()
        {
            if (objectInstances == null)
            {
                throw new ArgumentNullException(nameof(objectInstances));
            }
            if (destination == null)
            {
                throw new ArgumentNullException(nameof(destination));
            }
            if (!destination.CanWrite)
            {
                throw new ArgumentException("stream must be writeable", nameof(destination));
            }

            //if schema is not passed reflect it
            if (schema == null)
            {
                schema = SchemaReflector.Reflect <T>();
            }

            using (var writer = new ParquetWriter(schema, destination, append: append))
            {
                writer.CompressionMethod = compressionMethod;

                DataField[] dataFields = schema.GetDataFields();

                foreach (IEnumerable <T> batch in objectInstances.Batch(rowGroupSize))
                {
                    var bridge     = new ClrBridge(typeof(T));
                    T[] batchArray = batch.ToArray();

                    DataColumn[] columns = dataFields
                                           .Select(df => bridge.BuildColumn(df, batchArray, batchArray.Length))
                                           .ToArray();

                    using (ParquetRowGroupWriter groupWriter = writer.CreateRowGroup())
                    {
                        foreach (DataColumn dataColumn in columns)
                        {
                            groupWriter.WriteColumn(dataColumn);
                        }
                    }
                }
            }

            return(schema);
        }
コード例 #5
0
        public static Schema Serialize <T>(IEnumerable <T> objectInstances, Stream destination,
                                           Schema schema = null,
                                           WriterOptions writerOptions         = null,
                                           CompressionMethod compressionMethod = CompressionMethod.Snappy)
            where T : new()
        {
            if (objectInstances == null)
            {
                throw new ArgumentNullException(nameof(objectInstances));
            }
            if (destination == null)
            {
                throw new ArgumentNullException(nameof(destination));
            }
            if (!destination.CanWrite)
            {
                throw new ArgumentException("stream must be writeable", nameof(destination));
            }

            //if schema is not passed reflect it
            if (schema == null)
            {
                schema = SchemaReflector.Reflect <T>();
            }

            if (writerOptions == null)
            {
                writerOptions = new WriterOptions();
            }

            var extractor = new ColumnExtractor();

            using (var writer = new ParquetWriter3(schema, destination, writerOptions: writerOptions))
            {
                writer.CompressionMethod = compressionMethod;

                foreach (IEnumerable <T> batch in objectInstances.Batch(writerOptions.RowGroupsSize))
                {
                    IReadOnlyCollection <DataColumn> columns = extractor.ExtractColumns(batch, schema);

                    using (ParquetRowGroupWriter groupWriter = writer.CreateRowGroup(batch.Count()))
                    {
                        foreach (DataColumn dataColumn in columns)
                        {
                            groupWriter.Write(dataColumn);
                        }
                    }
                }
            }

            return(schema);
        }
コード例 #6
0
 /// <summary>
 /// Writes a file with a single row group
 /// </summary>
 public static void WriteSingleRowGroupParquetFile(this Stream stream, Schema schema, params DataColumn[] columns)
 {
     using (var writer = new ParquetWriter(schema, stream))
     {
         writer.CompressionMethod = CompressionMethod.None;
         using (ParquetRowGroupWriter rgw = writer.CreateRowGroup())
         {
             foreach (DataColumn column in columns)
             {
                 rgw.WriteColumn(column);
             }
         }
     }
 }