private void SetupLocalOlapEtl(DocumentStore store, string script, string path) { var connectionStringName = $"{store.Database} to local"; var configuration = new OlapEtlConfiguration { ConnectionStringName = connectionStringName, RunFrequency = "* * * * *", Transforms = { new Transformation { Name = "MonthlyOrders", Collections = new List <string>{ "Orders" }, Script = script } } }; var connectionString = new OlapConnectionString { Name = connectionStringName, LocalSettings = new LocalSettings { FolderPath = path } }; AddEtl(store, configuration, connectionString); }
private void SetupAzureEtl(DocumentStore store, string script, AzureSettings settings, string customPartition = null) { var connectionStringName = $"{store.Database} to Azure"; var configuration = new OlapEtlConfiguration { Name = "olap-azure-test", ConnectionStringName = connectionStringName, RunFrequency = LocalTests.DefaultFrequency, CustomPartitionValue = customPartition, Transforms = { new Transformation { Name = "MonthlyOrders", Collections = new List <string>{ "Orders" }, Script = script } } }; AddEtl(store, configuration, new OlapConnectionString { Name = connectionStringName, AzureSettings = settings }); }
private void SetupAzureEtl(DocumentStore store, AzureSettings settings, OlapEtlConfiguration configuration) { var connectionStringName = $"{store.Database} to Azure"; AddEtl(store, configuration, new OlapConnectionString { Name = connectionStringName, AzureSettings = settings }); }
private static AddEtlOperationResult AddEtl(IDocumentStore src, OlapEtlConfiguration configuration, OlapConnectionString connectionString) { var putResult = src.Maintenance.Send(new PutConnectionStringOperation <OlapConnectionString>(connectionString)); Assert.NotNull(putResult.RaftCommandIndex); var addResult = src.Maintenance.Send(new AddEtlOperation <OlapConnectionString>(configuration)); return(addResult); }
private void SetupLocalOlapEtl(DocumentStore store, OlapEtlConfiguration configuration, string path, string connectionStringName) { var connectionString = new OlapConnectionString { Name = connectionStringName, LocalSettings = new LocalSettings { FolderPath = path } }; AddEtl(store, configuration, connectionString); }
public ParquetTransformedItems(string name, string key, string tmpPath, string fileNameSuffix, List <string> partitions, OlapEtlConfiguration configuration) : base(OlapEtlFileFormat.Parquet) { _tableName = name; _key = key; _configuration = configuration; _fileNameSuffix = fileNameSuffix; _tmpFilePath = tmpPath; _dataTypes = new Dictionary <string, DataType>(); _group = new RowGroup(); SetIdColumn(); GetSafeFolderName(name, partitions); }
public OlapDocumentTransformer(Transformation transformation, DocumentDatabase database, DocumentsOperationContext context, OlapEtlConfiguration config) : base(database, context, new PatchRequest(transformation.Script, PatchRequestType.OlapEtl), null) { _config = config; _tables = new Dictionary <string, OlapTransformedItems>(); var localSettings = BackupTask.GetBackupConfigurationFromScript(_config.Connection.LocalSettings, x => JsonDeserializationServer.LocalSettings(x), database, updateServerWideSettingsFunc: null, serverWide: false); _localFilePath = localSettings?.FolderPath ?? (database.Configuration.Storage.TempPath ?? database.Configuration.Core.DataDirectory).FullPath; _fileNameSuffix = ParquetTransformedItems.GetSafeNameForRemoteDestination($"{database.Name}-{_config.Name}-{transformation.Name}"); LoadToDestinations = transformation.GetCollectionsFromScript(); }
private void SetupLocalOlapEtl(DocumentStore store, string script, string path, string name = "olap-test", string frequency = null, string transformationName = null) { var connectionStringName = $"{store.Database} to local"; var configuration = new OlapEtlConfiguration { Name = name, ConnectionStringName = connectionStringName, RunFrequency = frequency ?? DefaultFrequency, Transforms = { new Transformation { Name = transformationName ?? "MonthlyOrders", Collections = new List <string>{ "Orders" }, Script = script } } }; SetupLocalOlapEtl(store, configuration, path, connectionStringName); }
public async Task CanModifyPartitionColumnName() { var settings = GetAzureSettings(); try { using (var store = GetDocumentStore()) { const string partitionColumn = "order_date"; var baseline = new DateTime(2020, 1, 1); using (var session = store.OpenAsyncSession()) { for (int i = 0; i < 31; i++) { await session.StoreAsync(new Order { Id = $"orders/{i}", OrderedAt = baseline.AddDays(i), ShipVia = $"shippers/{i}", Company = $"companies/{i}" }); } for (int i = 0; i < 28; i++) { await session.StoreAsync(new Order { Id = $"orders/{i + 31}", OrderedAt = baseline.AddMonths(1).AddDays(i), ShipVia = $"shippers/{i + 31}", Company = $"companies/{i + 31}" }); } await session.SaveChangesAsync(); } var etlDone = WaitForEtl(store, (n, statistics) => statistics.LoadSuccesses != 0); var script = @" var orderDate = new Date(this.OrderedAt); var year = orderDate.getFullYear(); var month = orderDate.getMonth(); var key = new Date(year, month); loadToOrders(partitionBy(['order_date', key]), { Company : this.Company, ShipVia : this.ShipVia }) "; var connectionStringName = $"{store.Database} to Azure"; var configuration = new OlapEtlConfiguration { Name = "olap-azure-test", ConnectionStringName = connectionStringName, RunFrequency = LocalTests.DefaultFrequency, Transforms = { new Transformation { Name = "MonthlyOrders", Collections = new List <string>{ "Orders" }, Script = script } } }; SetupAzureEtl(store, settings, configuration); etlDone.Wait(TimeSpan.FromMinutes(1)); using (var client = RavenAzureClient.Create(settings, DefaultBackupConfiguration)) { var prefix = $"{settings.RemoteFolderName}/{CollectionName}"; var cloudObjects = await client.ListBlobsAsync(prefix, string.Empty, false); var list = cloudObjects.List.ToList(); Assert.Equal(2, list.Count); Assert.Contains($"{partitionColumn}=2020-01-01", list[0].Name); Assert.Contains($"{partitionColumn}=2020-02-01", list[1].Name); } } } finally { await DeleteObjects(settings); } }
private void SetupGoogleCloudOlapEtl(DocumentStore store, GoogleCloudSettings settings, OlapEtlConfiguration configuration) { var connectionStringName = $"{store.Database} to GCP"; AddEtl(store, configuration, new OlapConnectionString { Name = connectionStringName, GoogleCloudSettings = settings }); }
public async Task ShouldNotReuseCustomPartitionFromPreviousTestRun() { using (var store = GetDocumentStore()) { var baseline = new DateTime(2020, 1, 1); using (var session = store.OpenAsyncSession()) { for (int i = 0; i < 31; i++) { await session.StoreAsync(new Order { Id = $"orders/{i}", OrderedAt = baseline.AddDays(i), ShipVia = $"shippers/{i}", Company = $"companies/{i}" }); } for (int i = 0; i < 28; i++) { await session.StoreAsync(new Order { Id = $"orders/{i + 31}", OrderedAt = baseline.AddMonths(1).AddDays(i), ShipVia = $"shippers/{i + 31}", Company = $"companies/{i + 31}" }); } await session.SaveChangesAsync(); } var database = await GetDatabase(store.Database); var configuration = new OlapEtlConfiguration { Name = "simulate", Transforms = { new Transformation { Collections ={ "Orders" }, Name = "MonthlyOrders", Script = @" var orderDate = new Date(this.OrderedAt); var year = orderDate.getFullYear(); var month = orderDate.getMonth(); var key = new Date(year, month); loadToOrders(partitionBy(['order_date', key], ['location', $customPartitionValue]), { Company : this.Company, ShipVia : this.ShipVia }); " } }, CustomPartitionValue = "USA" }; using (database.DocumentsStorage.ContextPool.AllocateOperationContext(out DocumentsOperationContext context)) { using (OlapEtl.TestScript(new TestOlapEtlScript { DocumentId = "orders/1", Configuration = configuration }, database, database.ServerStore, context, out var testResult)) { var result = (OlapEtlTestScriptResult)testResult; Assert.Equal(1, result.ItemsByPartition.Count); Assert.Equal(4, result.ItemsByPartition[0].Columns.Count); Assert.Equal("Orders/order_date=2020-01-01-00-00/location=USA", result.ItemsByPartition[0].Key); } } configuration.CustomPartitionValue = null; using (database.DocumentsStorage.ContextPool.AllocateOperationContext(out DocumentsOperationContext context)) { using (OlapEtl.TestScript(new TestOlapEtlScript { DocumentId = "orders/1", Configuration = configuration }, database, database.ServerStore, context, out var testResult)) { var result = (OlapEtlTestScriptResult)testResult; Assert.Equal(1, result.ItemsByPartition.Count); Assert.Equal(4, result.ItemsByPartition[0].Columns.Count); Assert.Equal("Orders/order_date=2020-01-01-00-00/location=undefined", result.ItemsByPartition[0].Key); } } } }
public async Task OlapTaskShouldBeHighlyAvailable() { var cluster = await CreateRaftCluster(3); var leader = cluster.Leader; var dbName = GetDatabaseName(); var db = await CreateDatabaseInCluster(dbName, 3, leader.WebUrl); var stores = db.Servers.Select(s => new DocumentStore { Database = dbName, Urls = new[] { s.WebUrl }, Conventions = new DocumentConventions { DisableTopologyUpdates = true } }.Initialize()) .ToList(); var mentorNode = db.Servers.First(s => s != leader); var mentorTag = mentorNode.ServerStore.NodeTag; var store = stores.Single(s => s.Urls[0] == mentorNode.WebUrl); Assert.Equal(store.Database, dbName); var baseline = new DateTime(2020, 1, 1); using (var session = store.OpenAsyncSession()) { for (int i = 0; i < 31; i++) { await session.StoreAsync(new Order { Id = $"orders/{i}", OrderedAt = baseline.AddDays(i), ShipVia = $"shippers/{i}", Company = $"companies/{i}" }); } await session.SaveChangesAsync(); } var etlDone = WaitForEtl(mentorNode, dbName, (n, statistics) => statistics.LoadSuccesses != 0); var script = @" var orderDate = new Date(this.OrderedAt); var year = orderDate.getFullYear(); var month = orderDate.getMonth(); var key = new Date(year, month); loadToOrders(partitionBy(key), { OrderId: id(this), Company : this.Company, ShipVia : this.ShipVia }); "; var connectionStringName = $"{store.Database} to S3"; var configName = "olap-s3"; var transformationName = "MonthlyOrders"; var path = NewDataPath(forceCreateDir: true); var configuration = new OlapEtlConfiguration { Name = configName, ConnectionStringName = connectionStringName, RunFrequency = LocalTests.DefaultFrequency, Transforms = { new Transformation { Name = transformationName, Collections = new List <string>{ "Orders" }, Script = script } }, MentorNode = mentorTag }; var task = AddEtl(store, configuration, new OlapConnectionString { Name = connectionStringName, LocalSettings = new LocalSettings { FolderPath = path } }); Assert.True(etlDone.Wait(TimeSpan.FromMinutes(1))); var files = Directory.GetFiles(path, "*.*", SearchOption.AllDirectories); Assert.Equal(1, files.Length); await ActionWithLeader(l => l.ServerStore.RemoveFromClusterAsync(mentorTag)); Assert.True(await mentorNode.ServerStore.WaitForState(RachisState.Passive, CancellationToken.None).WaitWithoutExceptionAsync(TimeSpan.FromSeconds(30)), $"Removed node {mentorTag} wasn't move to passive state ({mentorNode.ServerStore.Engine.CurrentState})"); var store2 = stores.First(s => s != store); var newResponsible = WaitForNewResponsibleNode(store2, task.TaskId, OngoingTaskType.OlapEtl, mentorTag); var newResponsibleNode = cluster.Nodes.Single(s => s.ServerStore.NodeTag == newResponsible); etlDone = WaitForEtl(newResponsibleNode, dbName, (n, statistics) => statistics.LoadSuccesses != 0); using (var session = store2.OpenAsyncSession()) { for (int i = 0; i < 28; i++) { await session.StoreAsync(new Order { Id = $"orders/{i + 31}", OrderedAt = baseline.AddMonths(1).AddDays(i), ShipVia = $"shippers/{i + 31}", Company = $"companies/{i + 31}" }); } await session.SaveChangesAsync(); } Assert.True(etlDone.Wait(TimeSpan.FromMinutes(1))); files = Directory.GetFiles(path, "*.*", SearchOption.AllDirectories); Assert.True(files.Length == 2, $"Expected 2 output files but got {files.Length}. " + $"files : '{string.Join(", ", files)}'. " + $"Mentor node : '{mentorTag}', new Responsible node : '{newResponsibleNode.ServerStore.NodeTag}'."); }