コード例 #1
0
 /// <summary>
 /// Writes table to this row group
 /// </summary>
 /// <param name="writer"></param>
 /// <param name="table"></param>
 public static void Write(this ParquetRowGroupWriter writer, Table table)
 {
     foreach (DataColumn dc in table.ExtractDataColumns())
     {
         writer.WriteColumn(dc);
     }
 }
コード例 #2
0
        /// <summary>
        /// Serialises a collection of classes into a Parquet stream
        /// </summary>
        /// <typeparam name="T">Class type</typeparam>
        /// <param name="objectInstances">Collection of classes</param>
        /// <param name="destination">Destination stream</param>
        /// <param name="schema">Optional schema to use. When not specified the class schema will be discovered and everything possible will be
        /// written to the stream. If you want to write only a subset of class properties please specify the schema yourself.
        /// </param>
        /// <param name="compressionMethod"><see cref="CompressionMethod"/></param>
        /// <param name="rowGroupSize"></param>
        /// <param name="append"></param>
        /// <returns></returns>
        public static Schema Serialize <T>(IEnumerable <T> objectInstances, Stream destination,
                                           Schema schema = null,
                                           CompressionMethod compressionMethod = CompressionMethod.Snappy,
                                           int rowGroupSize = 5000,
                                           bool append      = false)
            where T : new()
        {
            if (objectInstances == null)
            {
                throw new ArgumentNullException(nameof(objectInstances));
            }
            if (destination == null)
            {
                throw new ArgumentNullException(nameof(destination));
            }
            if (!destination.CanWrite)
            {
                throw new ArgumentException("stream must be writeable", nameof(destination));
            }

            //if schema is not passed reflect it
            if (schema == null)
            {
                schema = SchemaReflector.Reflect <T>();
            }

            using (var writer = new ParquetWriter(schema, destination, append: append))
            {
                writer.CompressionMethod = compressionMethod;

                DataField[] dataFields = schema.GetDataFields();

                foreach (IEnumerable <T> batch in objectInstances.Batch(rowGroupSize))
                {
                    var bridge     = new ClrBridge(typeof(T));
                    T[] batchArray = batch.ToArray();

                    DataColumn[] columns = dataFields
                                           .Select(df => bridge.BuildColumn(df, batchArray, batchArray.Length))
                                           .ToArray();

                    using (ParquetRowGroupWriter groupWriter = writer.CreateRowGroup())
                    {
                        foreach (DataColumn dataColumn in columns)
                        {
                            groupWriter.WriteColumn(dataColumn);
                        }
                    }
                }
            }

            return(schema);
        }
コード例 #3
0
 /// <summary>
 /// Writes a file with a single row group
 /// </summary>
 public static void WriteSingleRowGroupParquetFile(this Stream stream, Schema schema, params DataColumn[] columns)
 {
     using (var writer = new ParquetWriter(schema, stream))
     {
         writer.CompressionMethod = CompressionMethod.None;
         using (ParquetRowGroupWriter rgw = writer.CreateRowGroup())
         {
             foreach (DataColumn column in columns)
             {
                 rgw.WriteColumn(column);
             }
         }
     }
 }