Exemple #1
0
        public Task <bool> WriteTable(string fileId, ISessionWriter writer)
        {
            // This stream will be disposed by the sessionWriter
            var ms = new MemoryStream();

            var dataColAndSchema = MakeDataColumnAndSchema();

            using (var tableWriter = new Parquet.ParquetWriter(dataColAndSchema.Schema, ms))
            {
                //tableWriter.CompressionMethod = Parquet.CompressionMethod.Gzip;

                using (var rowGroup = tableWriter.CreateRowGroup())  // Using construction assure correct storage of final rowGroup details in parquet file
                {
                    foreach (var dataCol in dataColAndSchema.DataColumns)
                    {
                        rowGroup.WriteColumn(dataCol);
                    }
                }
            }

            ms.Position = 0;
            writer.StoreFileId(ms, fileId);

            return(Task.FromResult(true));
        }
Exemple #2
0
        public static void UseParquetPushToS3Bucket(this SampleContext context)
        {
            context.PushToS3Bucket = (int numOfFiles, int numOfRecords) =>
            {
                Schema schema = new Schema(
                    new DataField <DateTime>("Timestamp"),
                    new DataField <int>("Priority"),
                    new DataField <string>("Source"),
                    new DataField <string>("Message"),
                    new DataField <IEnumerable <string> >("Tags"),
                    new StructField("InnerData",
                                    new DataField <string>("IpAddress"),
                                    new DataField <string>("Message")
                                    )
                    );

                //Get compression method
                Parquet.CompressionMethod compressionMethod = Parquet.CompressionMethod.None;

                if (!String.IsNullOrEmpty(context.CompressionMethod))
                {
                    if (context.CompressionMethod.ToLower() == "snappy")
                    {
                        compressionMethod = Parquet.CompressionMethod.Snappy;
                    }
                    else if (context.CompressionMethod.ToLower() == "gzip")
                    {
                        compressionMethod = Parquet.CompressionMethod.Gzip;
                    }
                }

                using (var client = new SampleS3Client("parquet", context.S3BucketName, context.S3BucketPath,
                                                       Amazon.RegionEndpoint.USEast1))
                {
                    for (int i = 0; i < numOfFiles; i++)
                    {
                        DateTime randDateTime = SampleData.RandDate();
                        DataSet  ds           = new DataSet(schema);

                        foreach (LogEntry entry in SampleData.GetBunchOfData(numOfRecords, randDateTime))
                        {
                            ds.Add(new Row(entry.Timestamp, entry.Priority, entry.Source, entry.Message, entry.Tags, new Row(entry.InnerData.IpAddress, entry.InnerData.Message)));
                        }

                        using (MemoryStream buffer = new MemoryStream())
                        {
                            using (var writer = new Parquet.ParquetWriter(buffer))
                            {
                                writer.Write(ds, compressionMethod);
                            }

                            //Objects are push sync. to keep the order.
                            client.PutObject(buffer, randDateTime);
                        }

                        SampleContext.ClearConsoleLine();
                        Console.Write($"\r{(i + 1f) / (float)numOfFiles,6:P2}");
                    }
                }
            };
        }