public string DeltaTest([FromServices] IAWSSettings awsSettings) { string result = String.Empty; try { SparkSession spark = SparkSession .Builder() .AppName("DeltaTest") .GetOrCreate(); string tempDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); string dt = DateTime.Now.ToString("MMddhhmmss"); string path = Path.Combine(tempDirectory, $"delta-table{dt}"); // Write data to a Delta table. DataFrame data = spark.Range(0, 5); result += "Write data to a Delta table >> spark.Range(0, 5)" + " "; foreach (var row in data.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; data.Write().Format("delta").Save(path); // Create a second iteration of the table. data = spark.Range(5, 10); result += "Create a second iteration of the table >> spark.Range(0, 5)" + " "; foreach (var row in data.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; data.Write().Format("delta").Mode("overwrite").Save(path); // Load the data into a DeltaTable object. DeltaTable deltaTable = DeltaTable.ForPath(path); result += "Load the data into a DeltaTable object >> DeltaTable.ForPath" + " "; foreach (var row in deltaTable.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; // Update every even value by adding 100 to it. deltaTable.Update( condition: Functions.Expr("id % 2 == 0"), set: new Dictionary <string, Column>() { { "id", Functions.Expr("id + 100") } }); result += "Update every even value by adding 100 to it." + " "; foreach (var row in deltaTable.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; // Delete every even value. deltaTable.Delete(condition: Functions.Expr("id % 2 == 0")); result += "Delete every even value id % 2 == 0" + " "; foreach (var row in deltaTable.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; // Upsert (merge) new data. DataFrame newData = spark.Range(0, 20).As("newData").ToDF(); result += "Upsert (merge) new data" + Environment.NewLine; foreach (var row in newData.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; deltaTable.As("oldData") .Merge(newData, "oldData.id = newData.id") .WhenMatched() .Update( new Dictionary <string, Column>() { { "id", Functions.Col("newData.id") } }) .WhenNotMatched() .InsertExpr(new Dictionary <string, string>() { { "id", "newData.id" } }) .Execute(); spark.Stop(); } catch (Exception ex) { result = ex.Message; } return(result); }
public void TestSignatures() { using var tempDirectory = new TemporaryDirectory(); string path = Path.Combine(tempDirectory.Path, "delta-table"); DataFrame rangeRate = _spark.Range(15); rangeRate.Write().Format("delta").Save(path); DeltaTable table = Assert.IsType <DeltaTable>(DeltaTable.ForPath(path)); table = Assert.IsType <DeltaTable>(DeltaTable.ForPath(_spark, path)); Assert.IsType <bool>(DeltaTable.IsDeltaTable(_spark, path)); Assert.IsType <bool>(DeltaTable.IsDeltaTable(path)); Assert.IsType <DeltaTable>(table.As("oldTable")); Assert.IsType <DeltaTable>(table.Alias("oldTable")); Assert.IsType <DataFrame>(table.History()); Assert.IsType <DataFrame>(table.History(200)); Assert.IsType <DataFrame>(table.ToDF()); DataFrame newTable = _spark.Range(10, 15).As("newTable"); Assert.IsType <DeltaMergeBuilder>( table.Merge(newTable, Functions.Exp("oldTable.id == newTable.id"))); DeltaMergeBuilder mergeBuilder = Assert.IsType <DeltaMergeBuilder>( table.Merge(newTable, "oldTable.id == newTable.id")); // Validate the MergeBuilder matched signatures. Assert.IsType <DeltaMergeMatchedActionBuilder>(mergeBuilder.WhenMatched()); Assert.IsType <DeltaMergeMatchedActionBuilder>(mergeBuilder.WhenMatched("id = 5")); DeltaMergeMatchedActionBuilder matchedActionBuilder = Assert.IsType <DeltaMergeMatchedActionBuilder>( mergeBuilder.WhenMatched(Functions.Expr("id = 5"))); Assert.IsType <DeltaMergeBuilder>( matchedActionBuilder.Update(new Dictionary <string, Column>())); Assert.IsType <DeltaMergeBuilder>( matchedActionBuilder.UpdateExpr(new Dictionary <string, string>())); Assert.IsType <DeltaMergeBuilder>(matchedActionBuilder.UpdateAll()); Assert.IsType <DeltaMergeBuilder>(matchedActionBuilder.Delete()); // Validate the MergeBuilder not-matched signatures. Assert.IsType <DeltaMergeNotMatchedActionBuilder>(mergeBuilder.WhenNotMatched()); Assert.IsType <DeltaMergeNotMatchedActionBuilder>( mergeBuilder.WhenNotMatched("id = 5")); DeltaMergeNotMatchedActionBuilder notMatchedActionBuilder = Assert.IsType <DeltaMergeNotMatchedActionBuilder>( mergeBuilder.WhenNotMatched(Functions.Expr("id = 5"))); Assert.IsType <DeltaMergeBuilder>( notMatchedActionBuilder.Insert(new Dictionary <string, Column>())); Assert.IsType <DeltaMergeBuilder>( notMatchedActionBuilder.InsertExpr(new Dictionary <string, string>())); Assert.IsType <DeltaMergeBuilder>(notMatchedActionBuilder.InsertAll()); // Update and UpdateExpr should return void. table.Update(new Dictionary <string, Column>() { }); table.Update(Functions.Expr("id % 2 == 0"), new Dictionary <string, Column>() { }); table.UpdateExpr(new Dictionary <string, string>() { }); table.UpdateExpr("id % 2 == 1", new Dictionary <string, string>() { }); Assert.IsType <DataFrame>(table.Vacuum()); Assert.IsType <DataFrame>(table.Vacuum(168)); // Generate should return void. table.Generate("symlink_format_manifest"); // Delete should return void. table.Delete("id > 10"); table.Delete(Functions.Expr("id > 5")); table.Delete(); // Load the table as a streaming source. Assert.IsType <DataFrame>(_spark .ReadStream() .Format("delta") .Option("path", path) .Load()); Assert.IsType <DataFrame>(_spark.ReadStream().Format("delta").Load(path)); // Create Parquet data and convert it to DeltaTables. string parquetIdentifier = $"parquet.`{path}`"; rangeRate.Write().Mode(SaveMode.Overwrite).Parquet(path); Assert.IsType <DeltaTable>(DeltaTable.ConvertToDelta(_spark, parquetIdentifier)); rangeRate .Select(Functions.Col("id"), Functions.Expr($"(`id` + 1) AS `id_plus_one`")) .Write() .PartitionBy("id") .Mode(SaveMode.Overwrite) .Parquet(path); Assert.IsType <DeltaTable>(DeltaTable.ConvertToDelta( _spark, parquetIdentifier, "id bigint")); Assert.IsType <DeltaTable>(DeltaTable.ConvertToDelta( _spark, parquetIdentifier, new StructType(new[] { new StructField("id", new IntegerType()) }))); }
public void TestTutorialScenario() { using var tempDirectory = new TemporaryDirectory(); string path = Path.Combine(tempDirectory.Path, "delta-table"); // Write data to a Delta table. DataFrame data = _spark.Range(0, 5); data.Write().Format("delta").Save(path); // Validate that data contains the the sequence [0 ... 4]. ValidateRangeDataFrame(Enumerable.Range(0, 5), data); // Create a second iteration of the table. data = _spark.Range(5, 10); data.Write().Format("delta").Mode("overwrite").Save(path); // Load the data into a DeltaTable object. DeltaTable deltaTable = DeltaTable.ForPath(path); // Validate that deltaTable contains the the sequence [5 ... 9]. ValidateRangeDataFrame(Enumerable.Range(5, 5), deltaTable.ToDF()); // Update every even value by adding 100 to it. deltaTable.Update( condition: Functions.Expr("id % 2 == 0"), set: new Dictionary <string, Column>() { { "id", Functions.Expr("id + 100") } }); // Validate that deltaTable contains the the data: // +---+ // | id| // +---+ // | 5| // | 7| // | 9| // |106| // |108| // +---+ ValidateRangeDataFrame( new List <int>() { 5, 7, 9, 106, 108 }, deltaTable.ToDF()); // Delete every even value. deltaTable.Delete(condition: Functions.Expr("id % 2 == 0")); // Validate that deltaTable contains: // +---+ // | id| // +---+ // | 5| // | 7| // | 9| // +---+ ValidateRangeDataFrame(new List <int>() { 5, 7, 9 }, deltaTable.ToDF()); // Upsert (merge) new data. DataFrame newData = _spark.Range(0, 20).As("newData").ToDF(); deltaTable.As("oldData") .Merge(newData, "oldData.id = newData.id") .WhenMatched() .Update( new Dictionary <string, Column>() { { "id", Functions.Col("newData.id") } }) .WhenNotMatched() .InsertExpr(new Dictionary <string, string>() { { "id", "newData.id" } }) .Execute(); // Validate that the resulTable contains the the sequence [0 ... 19]. ValidateRangeDataFrame(Enumerable.Range(0, 20), deltaTable.ToDF()); }
public void TestSignatures() { using (var tempDirectory = new TemporaryDirectory()) { string path = Path.Combine(tempDirectory.Path, "delta-table"); DataFrame rangeRate = _spark.Range(15); rangeRate.Write().Format("delta").Save(path); DeltaTable table = Assert.IsType <DeltaTable>(DeltaTable.ForPath(path)); table = Assert.IsType <DeltaTable>(DeltaTable.ForPath(_spark, path)); Assert.IsType <DeltaTable>(table.As("oldTable")); Assert.IsType <DataFrame>(table.History()); Assert.IsType <DataFrame>(table.History(200)); Assert.IsType <DataFrame>(table.ToDF()); DataFrame newTable = _spark.Range(10, 15).As("newTable"); Assert.IsType <DeltaMergeBuilder>( table.Merge(newTable, Functions.Exp("oldTable.id == newTable.id"))); DeltaMergeBuilder mergeBuilder = Assert.IsType <DeltaMergeBuilder>( table.Merge(newTable, "oldTable.id == newTable.id")); // Validate the MergeBuilder matched signatures. Assert.IsType <DeltaMergeMatchedActionBuilder>(mergeBuilder.WhenMatched()); Assert.IsType <DeltaMergeMatchedActionBuilder>(mergeBuilder.WhenMatched("id = 5")); DeltaMergeMatchedActionBuilder matchedActionBuilder = Assert.IsType <DeltaMergeMatchedActionBuilder>( mergeBuilder.WhenMatched(Functions.Expr("id = 5"))); Assert.IsType <DeltaMergeBuilder>( matchedActionBuilder.Update(new Dictionary <string, Column>())); Assert.IsType <DeltaMergeBuilder>( matchedActionBuilder.UpdateExpr(new Dictionary <string, string>())); Assert.IsType <DeltaMergeBuilder>(matchedActionBuilder.UpdateAll()); Assert.IsType <DeltaMergeBuilder>(matchedActionBuilder.Delete()); // Validate the MergeBuilder not-matched signatures. Assert.IsType <DeltaMergeNotMatchedActionBuilder>(mergeBuilder.WhenNotMatched()); Assert.IsType <DeltaMergeNotMatchedActionBuilder>( mergeBuilder.WhenNotMatched("id = 5")); DeltaMergeNotMatchedActionBuilder notMatchedActionBuilder = Assert.IsType <DeltaMergeNotMatchedActionBuilder>( mergeBuilder.WhenNotMatched(Functions.Expr("id = 5"))); Assert.IsType <DeltaMergeBuilder>( notMatchedActionBuilder.Insert(new Dictionary <string, Column>())); Assert.IsType <DeltaMergeBuilder>( notMatchedActionBuilder.InsertExpr(new Dictionary <string, string>())); Assert.IsType <DeltaMergeBuilder>(notMatchedActionBuilder.InsertAll()); // Update and UpdateExpr should return void. table.Update(new Dictionary <string, Column>() { }); table.Update(Functions.Expr("id % 2 == 0"), new Dictionary <string, Column>() { }); table.UpdateExpr(new Dictionary <string, string>() { }); table.UpdateExpr("id % 2 == 1", new Dictionary <string, string>() { }); Assert.IsType <DataFrame>(table.Vacuum()); Assert.IsType <DataFrame>(table.Vacuum(168)); // Delete should return void. table.Delete("id > 10"); table.Delete(Functions.Expr("id > 5")); table.Delete(); } }