public static void UseAvroPushToS3Bucket(this SampleContext context)
        {
            context.PushToS3Bucket = (int numOfFiles, int numOfRecords) =>
            {
                //Get compression method
                var codec = Codec.Null;

                if (!String.IsNullOrEmpty(context.CompressionMethod))
                {
                    if (context.CompressionMethod.ToLower() == "deflate")
                    {
                        codec = Codec.Deflate;
                    }
                }

                using (var client = new SampleS3Client(context.Format, context.S3BucketName,
                                                       context.S3BucketPath, Amazon.RegionEndpoint.USEast1))
                {
                    for (int i = 0; i < numOfFiles; i++)
                    {
                        DateTime randDateTime = SampleData.RandDate();
                        var      testData     = new List <LogEntry>();

                        testData.AddRange(SampleData.GetBunchOfData(numOfRecords, randDateTime));

                        using (MemoryStream buffer = new MemoryStream())
                        {
                            using (var w = AvroContainer.CreateWriter <LogEntry>(buffer, codec))
                            {
                                using (var writer = new SequentialWriter <LogEntry>(w, 24))
                                {
                                    testData.ForEach(writer.Write);
                                }
                            }

                            //Objects are push sync. to keep the order.
                            client.PutObject(buffer, randDateTime);
                        }

                        SampleContext.ClearConsoleLine();
                        Console.Write($"\r{(i + 1f) / (float)numOfFiles,6:P2}");
                    }
                }
            };
        }
Пример #2
0
        public static void UseParquetPushToS3Bucket(this SampleContext context)
        {
            context.PushToS3Bucket = (int numOfFiles, int numOfRecords) =>
            {
                Schema schema = new Schema(
                    new DataField <DateTime>("Timestamp"),
                    new DataField <int>("Priority"),
                    new DataField <string>("Source"),
                    new DataField <string>("Message"),
                    new DataField <IEnumerable <string> >("Tags"),
                    new StructField("InnerData",
                                    new DataField <string>("IpAddress"),
                                    new DataField <string>("Message")
                                    )
                    );

                //Get compression method
                Parquet.CompressionMethod compressionMethod = Parquet.CompressionMethod.None;

                if (!String.IsNullOrEmpty(context.CompressionMethod))
                {
                    if (context.CompressionMethod.ToLower() == "snappy")
                    {
                        compressionMethod = Parquet.CompressionMethod.Snappy;
                    }
                    else if (context.CompressionMethod.ToLower() == "gzip")
                    {
                        compressionMethod = Parquet.CompressionMethod.Gzip;
                    }
                }

                using (var client = new SampleS3Client("parquet", context.S3BucketName, context.S3BucketPath,
                                                       Amazon.RegionEndpoint.USEast1))
                {
                    for (int i = 0; i < numOfFiles; i++)
                    {
                        DateTime randDateTime = SampleData.RandDate();
                        DataSet  ds           = new DataSet(schema);

                        foreach (LogEntry entry in SampleData.GetBunchOfData(numOfRecords, randDateTime))
                        {
                            ds.Add(new Row(entry.Timestamp, entry.Priority, entry.Source, entry.Message, entry.Tags, new Row(entry.InnerData.IpAddress, entry.InnerData.Message)));
                        }

                        using (MemoryStream buffer = new MemoryStream())
                        {
                            using (var writer = new Parquet.ParquetWriter(buffer))
                            {
                                writer.Write(ds, compressionMethod);
                            }

                            //Objects are push sync. to keep the order.
                            client.PutObject(buffer, randDateTime);
                        }

                        SampleContext.ClearConsoleLine();
                        Console.Write($"\r{(i + 1f) / (float)numOfFiles,6:P2}");
                    }
                }
            };
        }