示例#1
0
 /// <summary>
 /// Writes <see cref="DataSet"/> to a target file
 /// </summary>
 /// <param name="dataSet"><see cref="DataSet"/> to write</param>
 /// <param name="fileName">Path to a file to write to.</param>
 /// <param name="compression">Compression method</param>
 /// <param name="formatOptions">Parquet options, optional.</param>
 /// <param name="writerOptions">Writer options, optional.</param>
 /// <param name="append">When true, assumes that this stream contains existing file and appends data to it, otherwise writes a new Parquet file.</param>
 public static void WriteFile(
     DataSet dataSet, string fileName, CompressionMethod compression = CompressionMethod.Gzip,
     ParquetOptions formatOptions = null, WriterOptions writerOptions = null, bool append = false)
 {
     using (Stream fs = System.IO.File.Create(fileName))
     {
         using (var writer = new ParquetWriter(fs, formatOptions, writerOptions))
         {
             writer.Write(dataSet, compression);
         }
     }
 }
示例#2
0
        public static Schema Serialize <T>(IEnumerable <T> objectInstances, Stream destination,
                                           Schema schema = null,
                                           WriterOptions writerOptions         = null,
                                           CompressionMethod compressionMethod = CompressionMethod.Snappy)
            where T : new()
        {
            if (objectInstances == null)
            {
                throw new ArgumentNullException(nameof(objectInstances));
            }
            if (destination == null)
            {
                throw new ArgumentNullException(nameof(destination));
            }
            if (!destination.CanWrite)
            {
                throw new ArgumentException("stream must be writeable", nameof(destination));
            }

            //if schema is not passed reflect it
            if (schema == null)
            {
                schema = SchemaReflector.Reflect <T>();
            }

            if (writerOptions == null)
            {
                writerOptions = new WriterOptions();
            }

            var extractor = new ColumnExtractor();

            using (var writer = new ParquetWriter3(schema, destination, writerOptions: writerOptions))
            {
                writer.CompressionMethod = compressionMethod;

                foreach (IEnumerable <T> batch in objectInstances.Batch(writerOptions.RowGroupsSize))
                {
                    IReadOnlyCollection <DataColumn> columns = extractor.ExtractColumns(batch, schema);

                    using (ParquetRowGroupWriter groupWriter = writer.CreateRowGroup(batch.Count()))
                    {
                        foreach (DataColumn dataColumn in columns)
                        {
                            groupWriter.Write(dataColumn);
                        }
                    }
                }
            }

            return(schema);
        }
示例#3
0
        /// <summary>
        /// Creates an instance of parquet writer on top of a stream
        /// </summary>
        /// <param name="output">Writeable, seekable stream</param>
        /// <param name="formatOptions">Additional options</param>
        /// <param name="writerOptions">The writer options.</param>
        /// <exception cref="ArgumentNullException">Output is null.</exception>
        /// <exception cref="ArgumentException">Output stream is not writeable</exception>
        public ParquetWriter(Stream output, ParquetOptions formatOptions = null, WriterOptions writerOptions = null)
            : base(new GapStream(output))
        {
            if (output == null)
            {
                throw new ArgumentNullException(nameof(output));
            }

            if (!output.CanWrite)
            {
                throw new ArgumentException("stream is not writeable", nameof(output));
            }
            _formatOptions = formatOptions ?? new ParquetOptions();
            _writerOptions = writerOptions ?? new WriterOptions();
        }
示例#4
0
        /// <summary>
        /// Creates an instance of parquet writer on top of a stream
        /// </summary>
        /// <param name="output">Writeable, seekable stream</param>
        /// <param name="formatOptions">Additional options</param>
        /// <param name="writerOptions">The writer options.</param>
        /// <exception cref="ArgumentNullException">Output is null.</exception>
        /// <exception cref="ArgumentException">Output stream is not writeable</exception>
        public ParquetWriter(Stream output, ParquetOptions formatOptions = null, WriterOptions writerOptions = null) : base(output)
        {
            _output = output ?? throw new ArgumentNullException(nameof(output));
            if (!output.CanWrite)
            {
                throw new ArgumentException("stream is not writeable", nameof(output));
            }
            _formatOptions = formatOptions ?? new ParquetOptions();
            _writerOptions = writerOptions ?? new WriterOptions();
            _meta          = new FileMetadataBuilder(_writerOptions);

            _plainWriter = new PlainValuesWriter(_formatOptions);
            _rleWriter   = new RunLengthBitPackingHybridValuesWriter();
            _dicWriter   = new PlainDictionaryValuesWriter(_rleWriter);
        }
示例#5
0
        /// <summary>
        /// Creates an instance of parquet writer on top of a stream
        /// </summary>
        /// <param name="output">Writeable, seekable stream</param>
        /// <param name="formatOptions">Additional options</param>
        /// <param name="writerOptions">The writer options.</param>
        /// <exception cref="ArgumentNullException">Output is null.</exception>
        /// <exception cref="ArgumentException">Output stream is not writeable</exception>
        public ParquetWriter(Stream output, ParquetOptions formatOptions = null, WriterOptions writerOptions = null)
            : base(new PositionTrackingStream(output))
        {
            if (output == null)
            {
                throw new ArgumentNullException(nameof(output));
            }

            if (!output.CanWrite)
            {
                throw new ArgumentException("stream is not writeable", nameof(output));
            }
            _formatOptions = formatOptions ?? new ParquetOptions();
            _writerOptions = writerOptions ?? new WriterOptions();
            _meta          = new FileMetadataBuilder();
        }
示例#6
0
        /// <summary>
        /// Creates an instance of parquet writer on top of a stream
        /// </summary>
        /// <param name="schema"></param>
        /// <param name="output">Writeable, seekable stream</param>
        /// <param name="formatOptions">Additional options</param>
        /// <param name="writerOptions">The writer options.</param>
        /// <param name="append"></param>
        /// <exception cref="ArgumentNullException">Output is null.</exception>
        /// <exception cref="ArgumentException">Output stream is not writeable</exception>
        public ParquetWriter3(Schema schema, Stream output, ParquetOptions formatOptions = null, WriterOptions writerOptions = null, bool append = false)
            : base(new GapStream(output))
        {
            if (output == null)
            {
                throw new ArgumentNullException(nameof(output));
            }

            if (!output.CanWrite)
            {
                throw new ArgumentException("stream is not writeable", nameof(output));
            }
            _schema        = schema ?? throw new ArgumentNullException(nameof(schema));
            _formatOptions = formatOptions ?? new ParquetOptions();
            _writerOptions = writerOptions ?? new WriterOptions();

            PrepareFile(append);
        }
示例#7
0
 /// <summary>
 /// Writes <see cref="DataSet"/> to a target stream
 /// </summary>
 /// <param name="dataSet"><see cref="DataSet"/> to write</param>
 /// <param name="destination">Destination stream</param>
 /// <param name="compression">Compression method</param>
 /// <param name="formatOptions">Parquet options, optional.</param>
 /// <param name="writerOptions">Writer options, optional.</param>
 /// <param name="append">When true, assumes that this stream contains existing file and appends data to it, otherwise writes a new Parquet file.</param>
 public static void Write(DataSet dataSet, Stream destination, CompressionMethod compression = CompressionMethod.Gzip, ParquetOptions formatOptions = null, WriterOptions writerOptions = null, bool append = false)
 {
     using (var writer = new ParquetWriter(destination, formatOptions, writerOptions))
     {
         writer.Write(dataSet, compression, append);
     }
 }