public async Task download_objects() { var fileName = Guid.NewGuid().ToString(); using (var client = new RavenGoogleCloudClient(GoogleCloudFactAttribute.GoogleCloudSettings)) { try { await client.UploadObjectAsync( fileName, new MemoryStream(Encoding.UTF8.GetBytes("123")) ); var stream = client.DownloadObject(fileName); using (var sr = new StreamReader(stream)) { Assert.Equal("123", sr.ReadToEnd()); } } finally { await client.DeleteObjectAsync(fileName); } } }
protected override Task <Stream> GetStream(string path) { return(Task.FromResult(_client.DownloadObject(path))); }
public async Task SimpleTransformation_NoPartition() { var settings = GetGoogleCloudSettings(); try { using (var store = GetDocumentStore()) { var baseline = new DateTime(2020, 1, 1).ToUniversalTime(); using (var session = store.OpenAsyncSession()) { for (int i = 0; i < 100; i++) { await session.StoreAsync(new Order { Id = $"orders/{i}", OrderedAt = baseline.AddDays(i), ShipVia = $"shippers/{i}", Company = $"companies/{i}" }); } await session.SaveChangesAsync(); } var etlDone = WaitForEtl(store, (n, statistics) => statistics.LoadSuccesses != 0); var script = @" loadToOrders(noPartition(), { OrderDate : this.OrderedAt Company : this.Company, ShipVia : this.ShipVia }); "; SetupGoogleCloudOlapEtl(store, script, settings); etlDone.Wait(TimeSpan.FromMinutes(1)); using (var client = new RavenGoogleCloudClient(settings, DefaultBackupConfiguration)) { var prefix = $"{settings.RemoteFolderName}/{CollectionName}"; var cloudObjects = await client.ListObjectsAsync(prefix); Assert.Equal(1, cloudObjects.Count); var stream = client.DownloadObject(cloudObjects[0].Name); var ms = new MemoryStream(); stream.CopyTo(ms); using (var parquetReader = new ParquetReader(ms)) { Assert.Equal(1, parquetReader.RowGroupCount); var expectedFields = new[] { "OrderDate", "ShipVia", "Company", ParquetTransformedItems.DefaultIdColumn, ParquetTransformedItems.LastModifiedColumn }; Assert.Equal(expectedFields.Length, parquetReader.Schema.Fields.Count); using var rowGroupReader = parquetReader.OpenRowGroupReader(0); foreach (var field in parquetReader.Schema.Fields) { Assert.True(field.Name.In(expectedFields)); var data = rowGroupReader.ReadColumn((DataField)field).Data; Assert.True(data.Length == 100); if (field.Name == ParquetTransformedItems.LastModifiedColumn) { continue; } var count = 0; foreach (var val in data) { if (field.Name == "OrderDate") { var expectedDto = new DateTimeOffset(DateTime.SpecifyKind(baseline.AddDays(count), DateTimeKind.Utc)); Assert.Equal(expectedDto, val); } else { var expected = field.Name switch { ParquetTransformedItems.DefaultIdColumn => $"orders/{count}", "Company" => $"companies/{count}", "ShipVia" => $"shippers/{count}", _ => null }; Assert.Equal(expected, val); } count++; } } } } } } finally { await DeleteObjects(settings); } }
public async Task CanLoadToMultipleTables() { const string salesTableName = "Sales"; var settings = GetGoogleCloudSettings(); try { using (var store = GetDocumentStore()) { var baseline = new DateTime(2020, 1, 1); using (var session = store.OpenAsyncSession()) { for (int i = 0; i < 31; i++) { var orderedAt = baseline.AddDays(i); var lines = new List <OrderLine>(); for (int j = 1; j <= 5; j++) { lines.Add(new OrderLine { Quantity = j * 10, PricePerUnit = i + j, Product = $"Products/{j}" }); } var o = new Order { Id = $"orders/{i}", OrderedAt = orderedAt, RequireAt = orderedAt.AddDays(7), Company = $"companies/{i}", Lines = lines }; await session.StoreAsync(o); } baseline = baseline.AddMonths(1); for (int i = 0; i < 28; i++) { var orderedAt = baseline.AddDays(i); var lines = new List <OrderLine>(); for (int j = 1; j <= 5; j++) { lines.Add(new OrderLine { Quantity = j * 10, PricePerUnit = i + j, Product = $"Products/{j}" }); } var o = new Order { Id = $"orders/{i + 31}", OrderedAt = orderedAt, RequireAt = orderedAt.AddDays(7), Company = $"companies/{i}", Lines = lines }; await session.StoreAsync(o); } await session.SaveChangesAsync(); } var etlDone = WaitForEtl(store, (n, statistics) => statistics.LoadSuccesses != 0); var script = @" var orderData = { Company : this.Company, RequireAt : new Date(this.RequireAt), ItemsCount: this.Lines.length, TotalCost: 0 }; var orderDate = new Date(this.OrderedAt); var year = orderDate.getFullYear(); var month = orderDate.getMonth(); var key = new Date(year, month); for (var i = 0; i < this.Lines.length; i++) { var line = this.Lines[i]; orderData.TotalCost += (line.PricePerUnit * line.Quantity); // load to 'sales' table loadToSales(partitionBy(key), { Qty: line.Quantity, Product: line.Product, Cost: line.PricePerUnit }); } // load to 'orders' table loadToOrders(partitionBy(key), orderData); "; SetupGoogleCloudOlapEtl(store, script, settings); etlDone.Wait(TimeSpan.FromMinutes(1)); using (var client = new RavenGoogleCloudClient(settings, DefaultBackupConfiguration)) { var prefix = $"{settings.RemoteFolderName}/{CollectionName}"; var cloudObjects = await client.ListObjectsAsync(prefix); Assert.Equal(2, cloudObjects.Count); Assert.Contains("2020-01-01", cloudObjects[0].Name); Assert.Contains("2020-02-01", cloudObjects[1].Name); var fullPath = cloudObjects[0].Name; var stream = client.DownloadObject(fullPath); var ms = new MemoryStream(); await stream.CopyToAsync(ms); using (var parquetReader = new ParquetReader(ms)) { Assert.Equal(1, parquetReader.RowGroupCount); var expectedFields = new[] { "Company", "RequireAt", "ItemsCount", "TotalCost", ParquetTransformedItems.DefaultIdColumn, ParquetTransformedItems.LastModifiedColumn }; Assert.Equal(expectedFields.Length, parquetReader.Schema.Fields.Count); using var rowGroupReader = parquetReader.OpenRowGroupReader(0); foreach (var field in parquetReader.Schema.Fields) { Assert.True(field.Name.In(expectedFields)); var data = rowGroupReader.ReadColumn((DataField)field).Data; Assert.True(data.Length == 31); } } } using (var client = new RavenGoogleCloudClient(settings, DefaultBackupConfiguration)) { var prefix = $"{settings.RemoteFolderName}/{salesTableName}"; var cloudObjects = await client.ListObjectsAsync(prefix); Assert.Equal(2, cloudObjects.Count); Assert.Contains("2020-01-01", cloudObjects[0].Name); Assert.Contains("2020-02-01", cloudObjects[1].Name); var fullPath = cloudObjects[1].Name; var stream = client.DownloadObject(fullPath); var ms = new MemoryStream(); await stream.CopyToAsync(ms); using (var parquetReader = new ParquetReader(ms)) { Assert.Equal(1, parquetReader.RowGroupCount); var expectedFields = new[] { "Qty", "Product", "Cost", ParquetTransformedItems.DefaultIdColumn, ParquetTransformedItems.LastModifiedColumn }; Assert.Equal(expectedFields.Length, parquetReader.Schema.Fields.Count); using var rowGroupReader = parquetReader.OpenRowGroupReader(0); foreach (var field in parquetReader.Schema.Fields) { Assert.True(field.Name.In(expectedFields)); var data = rowGroupReader.ReadColumn((DataField)field).Data; Assert.True(data.Length == 28 * 5); } } } } } finally { await DeleteObjects(settings); } }
public async Task SimpleTransformation() { var settings = GetGoogleCloudSettings(); try { using (var store = GetDocumentStore()) { var baseline = new DateTime(2020, 1, 1); using (var session = store.OpenAsyncSession()) { for (int i = 1; i <= 10; i++) { var o = new Order { Id = $"orders/{i}", OrderedAt = baseline.AddDays(i), Company = $"companies/{i}" }; await session.StoreAsync(o); } await session.SaveChangesAsync(); } var etlDone = WaitForEtl(store, (n, statistics) => statistics.LoadSuccesses != 0); var script = @" var orderDate = new Date(this.OrderedAt); var year = orderDate.getFullYear(); var month = orderDate.getMonth(); var key = new Date(year, month); loadToOrders(partitionBy(key), { Company : this.Company, ShipVia : this.ShipVia }) "; SetupGoogleCloudOlapEtl(store, script, settings); etlDone.Wait(TimeSpan.FromMinutes(1)); using (var client = new RavenGoogleCloudClient(settings, DefaultBackupConfiguration)) { var prefix = $"{settings.RemoteFolderName}/{CollectionName}"; var cloudObjects = await client.ListObjectsAsync(prefix); Assert.Equal(1, cloudObjects.Count); var fullPath = cloudObjects[0].Name; var stream = client.DownloadObject(fullPath); var ms = new MemoryStream(); stream.CopyTo(ms); using (var parquetReader = new ParquetReader(ms)) { Assert.Equal(1, parquetReader.RowGroupCount); var expectedFields = new[] { "Company", ParquetTransformedItems.DefaultIdColumn, ParquetTransformedItems.LastModifiedColumn }; Assert.Equal(expectedFields.Length, parquetReader.Schema.Fields.Count); using var rowGroupReader = parquetReader.OpenRowGroupReader(0); foreach (var field in parquetReader.Schema.Fields) { Assert.True(field.Name.In(expectedFields)); var data = rowGroupReader.ReadColumn((DataField)field).Data; Assert.True(data.Length == 10); if (field.Name == ParquetTransformedItems.LastModifiedColumn) { continue; } var count = 1; foreach (var val in data) { switch (field.Name) { case ParquetTransformedItems.DefaultIdColumn: Assert.Equal($"orders/{count}", val); break; case "Company": Assert.Equal($"companies/{count}", val); break; } count++; } } } } } } finally { await DeleteObjects(settings); } }
protected override Task <ZipArchive> GetZipArchive(string filePath) { return(Task.FromResult(new ZipArchive(_client.DownloadObject(filePath), ZipArchiveMode.Read))); }