Exemple #1
0
        public static Schema Serialize <T>(IEnumerable <T> objectInstances, Stream destination,
                                           Schema schema = null,
                                           WriterOptions writerOptions         = null,
                                           CompressionMethod compressionMethod = CompressionMethod.Snappy)
            where T : new()
        {
            if (objectInstances == null)
            {
                throw new ArgumentNullException(nameof(objectInstances));
            }
            if (destination == null)
            {
                throw new ArgumentNullException(nameof(destination));
            }
            if (!destination.CanWrite)
            {
                throw new ArgumentException("stream must be writeable", nameof(destination));
            }

            //if schema is not passed reflect it
            if (schema == null)
            {
                schema = SchemaReflector.Reflect <T>();
            }

            if (writerOptions == null)
            {
                writerOptions = new WriterOptions();
            }

            var extractor = new ColumnExtractor();

            using (var writer = new ParquetWriter3(schema, destination, writerOptions: writerOptions))
            {
                writer.CompressionMethod = compressionMethod;

                foreach (IEnumerable <T> batch in objectInstances.Batch(writerOptions.RowGroupsSize))
                {
                    IReadOnlyCollection <DataColumn> columns = extractor.ExtractColumns(batch, schema);

                    using (ParquetRowGroupWriter groupWriter = writer.CreateRowGroup(batch.Count()))
                    {
                        foreach (DataColumn dataColumn in columns)
                        {
                            groupWriter.Write(dataColumn);
                        }
                    }
                }
            }

            return(schema);
        }
Exemple #2
0
 public static void WriteSingleRowGroup(this Stream stream, Schema schema, int rowCount, params DataColumn[] columns)
 {
     using (var writer = new ParquetWriter3(schema, stream))
     {
         writer.CompressionMethod = CompressionMethod.None;
         using (ParquetRowGroupWriter rgw = writer.CreateRowGroup(rowCount))
         {
             foreach (DataColumn column in columns)
             {
                 rgw.Write(column);
             }
         }
     }
 }