Пример #1
0
        public async Task download_objects()
        {
            var fileName = Guid.NewGuid().ToString();

            using (var client = new RavenGoogleCloudClient(GoogleCloudFactAttribute.GoogleCloudSettings))
            {
                try
                {
                    await client.UploadObjectAsync(
                        fileName,
                        new MemoryStream(Encoding.UTF8.GetBytes("123"))
                        );

                    var stream = client.DownloadObject(fileName);
                    using (var sr = new StreamReader(stream))
                    {
                        Assert.Equal("123", sr.ReadToEnd());
                    }
                }

                finally
                {
                    await client.DeleteObjectAsync(fileName);
                }
            }
        }
Пример #2
0
 protected override Task <Stream> GetStream(string path)
 {
     return(Task.FromResult(_client.DownloadObject(path)));
 }
Пример #3
0
        public async Task SimpleTransformation_NoPartition()
        {
            var settings = GetGoogleCloudSettings();

            try
            {
                using (var store = GetDocumentStore())
                {
                    var baseline = new DateTime(2020, 1, 1).ToUniversalTime();

                    using (var session = store.OpenAsyncSession())
                    {
                        for (int i = 0; i < 100; i++)
                        {
                            await session.StoreAsync(new Order
                            {
                                Id        = $"orders/{i}",
                                OrderedAt = baseline.AddDays(i),
                                ShipVia   = $"shippers/{i}",
                                Company   = $"companies/{i}"
                            });
                        }

                        await session.SaveChangesAsync();
                    }

                    var etlDone = WaitForEtl(store, (n, statistics) => statistics.LoadSuccesses != 0);

                    var script = @"
loadToOrders(noPartition(),
    {
        OrderDate : this.OrderedAt
        Company : this.Company,
        ShipVia : this.ShipVia
    });
";
                    SetupGoogleCloudOlapEtl(store, script, settings);

                    etlDone.Wait(TimeSpan.FromMinutes(1));

                    using (var client = new RavenGoogleCloudClient(settings, DefaultBackupConfiguration))
                    {
                        var prefix = $"{settings.RemoteFolderName}/{CollectionName}";

                        var cloudObjects = await client.ListObjectsAsync(prefix);

                        Assert.Equal(1, cloudObjects.Count);

                        var stream = client.DownloadObject(cloudObjects[0].Name);
                        var ms     = new MemoryStream();
                        stream.CopyTo(ms);

                        using (var parquetReader = new ParquetReader(ms))
                        {
                            Assert.Equal(1, parquetReader.RowGroupCount);

                            var expectedFields = new[] { "OrderDate", "ShipVia", "Company", ParquetTransformedItems.DefaultIdColumn, ParquetTransformedItems.LastModifiedColumn };

                            Assert.Equal(expectedFields.Length, parquetReader.Schema.Fields.Count);

                            using var rowGroupReader = parquetReader.OpenRowGroupReader(0);
                            foreach (var field in parquetReader.Schema.Fields)
                            {
                                Assert.True(field.Name.In(expectedFields));

                                var data = rowGroupReader.ReadColumn((DataField)field).Data;
                                Assert.True(data.Length == 100);

                                if (field.Name == ParquetTransformedItems.LastModifiedColumn)
                                {
                                    continue;
                                }

                                var count = 0;
                                foreach (var val in data)
                                {
                                    if (field.Name == "OrderDate")
                                    {
                                        var expectedDto = new DateTimeOffset(DateTime.SpecifyKind(baseline.AddDays(count), DateTimeKind.Utc));
                                        Assert.Equal(expectedDto, val);
                                    }

                                    else
                                    {
                                        var expected = field.Name switch
                                        {
                                            ParquetTransformedItems.DefaultIdColumn => $"orders/{count}",
                                            "Company" => $"companies/{count}",
                                            "ShipVia" => $"shippers/{count}",
                                            _ => null
                                        };

                                        Assert.Equal(expected, val);
                                    }

                                    count++;
                                }
                            }
                        }
                    }
                }
            }
            finally
            {
                await DeleteObjects(settings);
            }
        }
Пример #4
0
        public async Task CanLoadToMultipleTables()
        {
            const string salesTableName = "Sales";
            var          settings       = GetGoogleCloudSettings();

            try
            {
                using (var store = GetDocumentStore())
                {
                    var baseline = new DateTime(2020, 1, 1);

                    using (var session = store.OpenAsyncSession())
                    {
                        for (int i = 0; i < 31; i++)
                        {
                            var orderedAt = baseline.AddDays(i);
                            var lines     = new List <OrderLine>();

                            for (int j = 1; j <= 5; j++)
                            {
                                lines.Add(new OrderLine
                                {
                                    Quantity     = j * 10,
                                    PricePerUnit = i + j,
                                    Product      = $"Products/{j}"
                                });
                            }

                            var o = new Order
                            {
                                Id        = $"orders/{i}",
                                OrderedAt = orderedAt,
                                RequireAt = orderedAt.AddDays(7),
                                Company   = $"companies/{i}",
                                Lines     = lines
                            };

                            await session.StoreAsync(o);
                        }

                        baseline = baseline.AddMonths(1);

                        for (int i = 0; i < 28; i++)
                        {
                            var orderedAt = baseline.AddDays(i);
                            var lines     = new List <OrderLine>();

                            for (int j = 1; j <= 5; j++)
                            {
                                lines.Add(new OrderLine
                                {
                                    Quantity     = j * 10,
                                    PricePerUnit = i + j,
                                    Product      = $"Products/{j}"
                                });
                            }

                            var o = new Order
                            {
                                Id        = $"orders/{i + 31}",
                                OrderedAt = orderedAt,
                                RequireAt = orderedAt.AddDays(7),
                                Company   = $"companies/{i}",
                                Lines     = lines
                            };

                            await session.StoreAsync(o);
                        }

                        await session.SaveChangesAsync();
                    }

                    var etlDone = WaitForEtl(store, (n, statistics) => statistics.LoadSuccesses != 0);

                    var script = @"
var orderData = {
    Company : this.Company,
    RequireAt : new Date(this.RequireAt),
    ItemsCount: this.Lines.length,
    TotalCost: 0
};

var orderDate = new Date(this.OrderedAt);
var year = orderDate.getFullYear();
var month = orderDate.getMonth();
var key = new Date(year, month);

for (var i = 0; i < this.Lines.length; i++) {
    var line = this.Lines[i];
    orderData.TotalCost += (line.PricePerUnit * line.Quantity);
    
    // load to 'sales' table

    loadToSales(partitionBy(key), {
        Qty: line.Quantity,
        Product: line.Product,
        Cost: line.PricePerUnit
    });
}

// load to 'orders' table
loadToOrders(partitionBy(key), orderData);
";

                    SetupGoogleCloudOlapEtl(store, script, settings);
                    etlDone.Wait(TimeSpan.FromMinutes(1));

                    using (var client = new RavenGoogleCloudClient(settings, DefaultBackupConfiguration))
                    {
                        var prefix       = $"{settings.RemoteFolderName}/{CollectionName}";
                        var cloudObjects = await client.ListObjectsAsync(prefix);

                        Assert.Equal(2, cloudObjects.Count);
                        Assert.Contains("2020-01-01", cloudObjects[0].Name);
                        Assert.Contains("2020-02-01", cloudObjects[1].Name);

                        var fullPath = cloudObjects[0].Name;
                        var stream   = client.DownloadObject(fullPath);
                        var ms       = new MemoryStream();
                        await stream.CopyToAsync(ms);

                        using (var parquetReader = new ParquetReader(ms))
                        {
                            Assert.Equal(1, parquetReader.RowGroupCount);

                            var expectedFields = new[] { "Company", "RequireAt", "ItemsCount", "TotalCost", ParquetTransformedItems.DefaultIdColumn, ParquetTransformedItems.LastModifiedColumn };
                            Assert.Equal(expectedFields.Length, parquetReader.Schema.Fields.Count);

                            using var rowGroupReader = parquetReader.OpenRowGroupReader(0);
                            foreach (var field in parquetReader.Schema.Fields)
                            {
                                Assert.True(field.Name.In(expectedFields));

                                var data = rowGroupReader.ReadColumn((DataField)field).Data;
                                Assert.True(data.Length == 31);
                            }
                        }
                    }

                    using (var client = new RavenGoogleCloudClient(settings, DefaultBackupConfiguration))
                    {
                        var prefix       = $"{settings.RemoteFolderName}/{salesTableName}";
                        var cloudObjects = await client.ListObjectsAsync(prefix);

                        Assert.Equal(2, cloudObjects.Count);
                        Assert.Contains("2020-01-01", cloudObjects[0].Name);
                        Assert.Contains("2020-02-01", cloudObjects[1].Name);

                        var fullPath = cloudObjects[1].Name;
                        var stream   = client.DownloadObject(fullPath);
                        var ms       = new MemoryStream();
                        await stream.CopyToAsync(ms);

                        using (var parquetReader = new ParquetReader(ms))
                        {
                            Assert.Equal(1, parquetReader.RowGroupCount);

                            var expectedFields = new[] { "Qty", "Product", "Cost", ParquetTransformedItems.DefaultIdColumn, ParquetTransformedItems.LastModifiedColumn };
                            Assert.Equal(expectedFields.Length, parquetReader.Schema.Fields.Count);

                            using var rowGroupReader = parquetReader.OpenRowGroupReader(0);
                            foreach (var field in parquetReader.Schema.Fields)
                            {
                                Assert.True(field.Name.In(expectedFields));

                                var data = rowGroupReader.ReadColumn((DataField)field).Data;
                                Assert.True(data.Length == 28 * 5);
                            }
                        }
                    }
                }
            }
            finally
            {
                await DeleteObjects(settings);
            }
        }
Пример #5
0
        public async Task SimpleTransformation()
        {
            var settings = GetGoogleCloudSettings();

            try
            {
                using (var store = GetDocumentStore())
                {
                    var baseline = new DateTime(2020, 1, 1);

                    using (var session = store.OpenAsyncSession())
                    {
                        for (int i = 1; i <= 10; i++)
                        {
                            var o = new Order
                            {
                                Id        = $"orders/{i}",
                                OrderedAt = baseline.AddDays(i),
                                Company   = $"companies/{i}"
                            };

                            await session.StoreAsync(o);
                        }

                        await session.SaveChangesAsync();
                    }

                    var etlDone = WaitForEtl(store, (n, statistics) => statistics.LoadSuccesses != 0);

                    var script = @"
var orderDate = new Date(this.OrderedAt);
var year = orderDate.getFullYear();
var month = orderDate.getMonth();
var key = new Date(year, month);

loadToOrders(partitionBy(key),
    {
        Company : this.Company,
        ShipVia : this.ShipVia
    })
";
                    SetupGoogleCloudOlapEtl(store, script, settings);

                    etlDone.Wait(TimeSpan.FromMinutes(1));

                    using (var client = new RavenGoogleCloudClient(settings, DefaultBackupConfiguration))
                    {
                        var prefix = $"{settings.RemoteFolderName}/{CollectionName}";

                        var cloudObjects = await client.ListObjectsAsync(prefix);

                        Assert.Equal(1, cloudObjects.Count);

                        var fullPath = cloudObjects[0].Name;
                        var stream   = client.DownloadObject(fullPath);
                        var ms       = new MemoryStream();
                        stream.CopyTo(ms);

                        using (var parquetReader = new ParquetReader(ms))
                        {
                            Assert.Equal(1, parquetReader.RowGroupCount);

                            var expectedFields = new[] { "Company", ParquetTransformedItems.DefaultIdColumn, ParquetTransformedItems.LastModifiedColumn };

                            Assert.Equal(expectedFields.Length, parquetReader.Schema.Fields.Count);

                            using var rowGroupReader = parquetReader.OpenRowGroupReader(0);
                            foreach (var field in parquetReader.Schema.Fields)
                            {
                                Assert.True(field.Name.In(expectedFields));

                                var data = rowGroupReader.ReadColumn((DataField)field).Data;
                                Assert.True(data.Length == 10);

                                if (field.Name == ParquetTransformedItems.LastModifiedColumn)
                                {
                                    continue;
                                }

                                var count = 1;
                                foreach (var val in data)
                                {
                                    switch (field.Name)
                                    {
                                    case ParquetTransformedItems.DefaultIdColumn:
                                        Assert.Equal($"orders/{count}", val);
                                        break;

                                    case "Company":
                                        Assert.Equal($"companies/{count}", val);
                                        break;
                                    }

                                    count++;
                                }
                            }
                        }
                    }
                }
            }

            finally
            {
                await DeleteObjects(settings);
            }
        }
Пример #6
0
 protected override Task <ZipArchive> GetZipArchive(string filePath)
 {
     return(Task.FromResult(new ZipArchive(_client.DownloadObject(filePath), ZipArchiveMode.Read)));
 }