Пример #1
0
        private void IdentifyAndDeleteMissingEntries()
        {
            if (MergeMode == MergeMode.NoDeletions || MergeMode == MergeMode.OnlyUpdates)
            {
                return;
            }
            IEnumerable <TInput> deletions = null;

            if (MergeMode == MergeMode.Delta)
            {
                deletions = InputData.Where(row => GetChangeAction(row) == ChangeAction.Delete).ToList();
            }
            else
            {
                deletions = InputData.Where(row => GetChangeAction(row) == null).ToList();
            }
            if (!UseTruncateMethod)
            {
                SqlDeleteIds(deletions);
            }
            foreach (var row in deletions) //.ForEach(row =>
            {
                SetChangeAction(row, ChangeAction.Delete);
                SetChangeDate(row, DateTime.Now);
            }
            ;
            DeltaTable.AddRange(deletions);
        }
Пример #2
0
        private void IdentifyAndDeleteMissingEntries()
        {
            if (MergeMode == MergeMode.InsertsAndUpdatesOnly || MergeMode == MergeMode.UpdatesOnly)
            {
                return;
            }
            IEnumerable <TInput> deletions = null;

            if (MergeMode == MergeMode.Delta)
            {
                deletions = InputData.Where(row => GetChangeAction(row) == ChangeAction.Delete).ToList();
            }
            else
            {
                deletions = InputData.Where(row => GetChangeAction(row) == null).ToList();
            }
            if (!UseTruncateMethod)
            {
                foreach (var batch in deletions.Batch(DbDestination.DEFAULT_BATCH_SIZE))
                {
                    SqlDeleteIds(batch);
                }
            }
            foreach (var row in deletions)
            {
                SetChangeAction(row, ChangeAction.Delete);
                SetChangeDate(row, DateTime.Now);
            }
            ;
            DeltaTable.AddRange(deletions);
        }
Пример #3
0
        private void InitInternalFlow()
        {
            Lookup = new Lookup <TInput, TInput, TInput>(
                row => UpdateRowWithDeltaInfo(row),
                DestinationTableAsSource,
                InputData
                );

            DestinationTable.BeforeBatchWrite = batch =>
            {
                DeleteMissingEntriesOnce();
                DeltaTable.AddRange(batch);
                if (!UseTruncateMethod)
                {
                    SqlDeleteIds(batch.Where(row => row.ChangeAction != "I" && row.ChangeAction != "E"));
                }
                if (UseTruncateMethod)
                {
                    return(batch.Where(row => row.ChangeAction == "I" || row.ChangeAction == "U" || row.ChangeAction == "E").ToArray());
                }
                else
                {
                    return(batch.Where(row => row.ChangeAction == "I" || row.ChangeAction == "U").ToArray());
                }
            };

            Lookup.LinkTo(DestinationTable);
        }
Пример #4
0
        void DeleteMissingEntriesOnce()
        {
            if (WasDeletionExecuted == true)
            {
                return;
            }
            WasDeletionExecuted = true;
            if (DisableDeletion == true)
            {
                return;
            }
            var deletions = InputData.Where(row => String.IsNullOrEmpty(row.ChangeAction));

            if (UseTruncateMethod)
            {
                TruncateTableTask.Truncate(this.ConnectionManager, TableName);
            }
            else
            {
                SqlDeleteIds(deletions);
            }
            DeltaTable.AddRange(deletions);
            DeltaTable.ForEach(row =>
            {
                row.ChangeAction = "D";
                row.ChangeDate   = DateTime.Now;
            });
        }
Пример #5
0
 private void SetOutputReadFunc()
 {
     OutputSource.ReadFunc = progressCount =>
     {
         return(DeltaTable.ElementAt(progressCount));
     };
     OutputSource.ReadingCompleted = progressCount => progressCount >= DeltaTable.Count;
 }
Пример #6
0
        public void TestSignaturesV3_0_X()
        {
            string tableName = "my_new_table";

            _spark.Range(15).Write().Format("delta").SaveAsTable(tableName);

            Assert.IsType <DeltaTable>(DeltaTable.ForName(tableName));
            Assert.IsType <DeltaTable>(DeltaTable.ForName(_spark, tableName));
        }
Пример #7
0
        private void InitOutputFlow()
        {
            int x = 0;

            OutputSource = new CustomSource <TInput>(() =>
            {
                return(DeltaTable.ElementAt(x++));
            }, () => x >= DeltaTable.Count);

            DestinationTable.OnCompletion = () => OutputSource.Execute();
        }
        private static void Main(string[] args)
        {
            var spark = SparkSession.Builder()
                        .Config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
                        .GetOrCreate();

            if (!DeltaTable.IsDeltaTable("parquet.`/tmp/delta-demo`"))
            {
                spark.Range(1000).WithColumn("name", Lit("Sammy")).Write().Mode("overwrite")
                .Parquet("/tmp/delta-demo");
                DeltaTable.ConvertToDelta(spark, "parquet.`/tmp/delta-demo`");
            }

            var delta = DeltaTable.ForPath("/tmp/delta-demo");

            delta.ToDF().OrderBy(Desc("Id")).Show();

            spark.Range(5, 500).WithColumn("name", Lit("Lucy")).Write().Mode("append").Format("delta")
            .Save("/tmp/delta-demo");

            delta.Update(Expr("id > 500"), new Dictionary <string, Column>
            {
                { "id", Lit(999) }
            });

            delta.Delete(Column("id").EqualTo(999));

            spark.Range(100000, 100100).Write().Format("delta").Mode("append").Save("/tmp/delta-demo");

            delta.History().Show(1000, 10000);

            spark.Read().Format("delta").Option("versionAsOf", 0).Load("/tmp/delta-demo").OrderBy(Desc("Id"))
            .Show();

            spark.Read().Format("delta").Option("timestampAsOf", "2021-10-22 22:03:36")
            .Load("/tmp/delta-demo").OrderBy(Desc("Id")).Show();

            var newData = spark.Range(10).WithColumn("name", Lit("Ed"));

            delta.Alias("target")
            .Merge(newData.Alias("source"), "target.id = source.id")
            .WhenMatched(newData["id"].Mod(2).EqualTo(0)).Update(new Dictionary <string, Column>
            {
                { "name", newData["name"] }
            })
            .WhenMatched(newData["id"].Mod(2).EqualTo(1)).Delete()
            .WhenNotMatched().InsertAll()
            .Execute();

            delta.ToDF().OrderBy("id").Show(1000, 10000);

            delta.Vacuum(1F);
        }
Пример #9
0
        public void TestConvertToDelta()
        {
            string    partitionColumnName = "id_plus_one";
            DataFrame data = _spark.Range(0, 5).Select(
                Functions.Col("id"),
                Functions.Expr($"(`id` + 1) AS `{partitionColumnName}`"));

            // Run the same test on the different overloads of DeltaTable.ConvertToDelta().
            void testWrapper(
                DataFrame dataFrame,
                Func <string, DeltaTable> convertToDelta,
                string partitionColumn = null)
            {
                using var tempDirectory = new TemporaryDirectory();
                string          path       = Path.Combine(tempDirectory.Path, "parquet-data");
                DataFrameWriter dataWriter = dataFrame.Write();

                if (!string.IsNullOrEmpty(partitionColumn))
                {
                    dataWriter = dataWriter.PartitionBy(partitionColumn);
                }

                dataWriter.Parquet(path);

                Assert.False(DeltaTable.IsDeltaTable(path));

                string     identifier          = $"parquet.`{path}`";
                DeltaTable convertedDeltaTable = convertToDelta(identifier);

                ValidateRangeDataFrame(Enumerable.Range(0, 5), convertedDeltaTable.ToDF());
                Assert.True(DeltaTable.IsDeltaTable(path));
            }

            testWrapper(data, identifier => DeltaTable.ConvertToDelta(_spark, identifier));
            testWrapper(
                data.Repartition(Functions.Col(partitionColumnName)),
                identifier => DeltaTable.ConvertToDelta(
                    _spark,
                    identifier,
                    $"{partitionColumnName} bigint"),
                partitionColumnName);
            testWrapper(
                data.Repartition(Functions.Col(partitionColumnName)),
                identifier => DeltaTable.ConvertToDelta(
                    _spark,
                    identifier,
                    new StructType(new[]
            {
                new StructField(partitionColumnName, new IntegerType())
            })),
                partitionColumnName);
        }
Пример #10
0
        public void TestIsDeltaTable()
        {
            using var tempDirectory = new TemporaryDirectory();
            // Save the same data to a DeltaTable and to Parquet.
            DataFrame data        = _spark.Range(0, 5);
            string    parquetPath = Path.Combine(tempDirectory.Path, "parquet-data");

            data.Write().Parquet(parquetPath);
            string deltaTablePath = Path.Combine(tempDirectory.Path, "delta-table");

            data.Write().Format("delta").Save(deltaTablePath);

            Assert.False(DeltaTable.IsDeltaTable(parquetPath));
            Assert.False(DeltaTable.IsDeltaTable(_spark, parquetPath));

            Assert.True(DeltaTable.IsDeltaTable(deltaTablePath));
            Assert.True(DeltaTable.IsDeltaTable(_spark, deltaTablePath));
        }
Пример #11
0
        void DeleteMissingEntriesOnce()
        {
            var deletions = InputData.Where(row => row.ChangeAction == 0);

            if (DisableDeletion == false && WasDeletionExecuted == false)
            {
                if (UseTruncateMethod)
                {
                    TruncateTableTask.Truncate(TableName);
                }
                else
                {
                    SqlDeleteIds(deletions);
                }
            }
            DeltaTable.AddRange(deletions);
            DeltaTable.ForEach(row => {
                row.ChangeAction = 'D';
                row.ChangeDate   = DateTime.Now;
            });
            WasDeletionExecuted = true;
        }
Пример #12
0
        public void TestStreamingScenario()
        {
            using var tempDirectory = new TemporaryDirectory();
            // Write [0, 1, 2, 3, 4] to a Delta table.
            string sourcePath = Path.Combine(tempDirectory.Path, "source-delta-table");

            _spark.Range(0, 5).Write().Format("delta").Save(sourcePath);

            // Create a stream from the source DeltaTable to the sink DeltaTable.
            // To make the test synchronous and deterministic, we will use a series of
            // "one-time micro-batch" triggers.
            string           sinkPath         = Path.Combine(tempDirectory.Path, "sink-delta-table");
            DataStreamWriter dataStreamWriter = _spark
                                                .ReadStream()
                                                .Format("delta")
                                                .Load(sourcePath)
                                                .WriteStream()
                                                .Format("delta")
                                                .OutputMode("append")
                                                .Option("checkpointLocation", Path.Combine(tempDirectory.Path, "checkpoints"));

            // Trigger the first stream batch
            dataStreamWriter.Trigger(Trigger.Once()).Start(sinkPath).AwaitTermination();

            // Now read the sink DeltaTable and validate its content.
            DeltaTable sink = DeltaTable.ForPath(sinkPath);

            ValidateRangeDataFrame(Enumerable.Range(0, 5), sink.ToDF());

            // Write [5,6,7,8,9] to the source and trigger another stream batch.
            _spark.Range(5, 10).Write().Format("delta").Mode("append").Save(sourcePath);
            dataStreamWriter.Trigger(Trigger.Once()).Start(sinkPath).AwaitTermination();

            // Finally, validate that the new data made its way to the sink.
            ValidateRangeDataFrame(Enumerable.Range(0, 10), sink.ToDF());
        }
        public static void WriteToPublish(SparkSession spark, string rootPath, string publishPath)
        {
            var data      = spark.Read().Parquet(rootPath);
            var suppliers = data.Select(Col("Supplier")).Distinct()
                            .WithColumn("supplier_hash", Hash(Col("Supplier")));

            var supplierPublishPath = $"{publishPath}-suppliers";

            if (!Directory.Exists(supplierPublishPath))
            {
                suppliers.Write().Format("delta").Save(supplierPublishPath);
            }
            else
            {
                var existingSuppliers = spark.Read().Format("delta").Load(supplierPublishPath);
                var newSuppliers      = suppliers.Join(existingSuppliers,
                                                       existingSuppliers["Supplier"] == suppliers["Supplier"], "left_anti");
                newSuppliers.Write().Mode(SaveMode.Append).Format("delta")
                .Save(supplierPublishPath);
            }

            var expenseTypePublishPath = $"{publishPath}-expense-type";

            var expenseType = data.Select(Col("Expense_Type")).Distinct()
                              .WithColumn("expense_type_hash", Hash(Col("Expense_Type")));

            if (!Directory.Exists(expenseTypePublishPath))
            {
                expenseType.Write().Format("delta").Save(expenseTypePublishPath);
            }
            else
            {
                var existingExpenseType =
                    spark.Read().Format("delta").Load(expenseTypePublishPath);
                var newExpenseType = expenseType.Join(existingExpenseType,
                                                      existingExpenseType["Expense_Type"] == expenseType["Expense_Type"],
                                                      "left_anti");
                newExpenseType.Write().Mode(SaveMode.Append).Format("delta")
                .Save(expenseTypePublishPath);
            }

            data = data.WithColumn("Expense_Type", Hash(Col("Expense_Type")))
                   .WithColumn("Supplier", Hash(Col("Supplier")));

            if (!Directory.Exists(publishPath))
            {
                data.Write().Format("delta").Save(publishPath);
            }
            else
            {
                var target = DeltaTable.ForPath(publishPath).Alias("target");
                target.Merge(
                    data.Alias("source"),
                    "source.Date = target.Date AND source.Expense_Type = target.Expense_Type AND source.Expense_Area = target.Expense_Area AND source.Supplier = target.supplier AND source.Reference = target.Reference"
                    ).WhenMatched("source.Amount != target.Amount")
                .Update(new Dictionary <string, Column> {
                    { "Amount", data["Amount"] }
                }
                        ).WhenNotMatched()
                .InsertAll()
                .Execute();
            }
        }
Пример #14
0
        private void SetDestinationTableProperties()
        {
            DestinationTable.ConnectionManager = ConnectionManager;
            DestinationTable.TableName         = TableName;
            DestinationTable.BatchSize         = BatchSize;
            DestinationTable.MaxBufferSize     = this.MaxBufferSize;

            DestinationTable.BeforeBatchWrite = batch =>
            {
                if (MergeMode == MergeMode.Delta)
                {
                    DeltaTable.AddRange(batch.Where(row => GetChangeAction(row) != ChangeAction.Delete));
                }
                else if (MergeMode == MergeMode.OnlyUpdates)
                {
                    DeltaTable.AddRange(batch.Where(row => GetChangeAction(row) == ChangeAction.Exists ||
                                                    GetChangeAction(row) == ChangeAction.Update));
                }
                else
                {
                    DeltaTable.AddRange(batch);
                }

                if (!UseTruncateMethod)
                {
                    if (MergeMode == MergeMode.OnlyUpdates)
                    {
                        SqlDeleteIds(batch.Where(row => GetChangeAction(row) == ChangeAction.Update));
                        return(batch.Where(row => GetChangeAction(row) == ChangeAction.Update).ToArray());
                    }
                    else
                    {
                        SqlDeleteIds(batch.Where(row => GetChangeAction(row) != ChangeAction.Insert && GetChangeAction(row) != ChangeAction.Exists));
                        return(batch.Where(row => GetChangeAction(row) == ChangeAction.Insert ||
                                           GetChangeAction(row) == ChangeAction.Update)
                               .ToArray());
                    }
                }
                else
                {
                    if (MergeMode == MergeMode.Delta)
                    {
                        throw new ETLBoxNotSupportedException("If you provide a delta load, you must define at least one compare column." +
                                                              "Using the truncate method is not allowed. ");
                    }
                    TruncateDestinationOnce();
                    if (MergeMode == MergeMode.OnlyUpdates)
                    {
                        return(batch.Where(row => GetChangeAction(row) != ChangeAction.Delete &&
                                           GetChangeAction(row) != ChangeAction.Insert).ToArray());
                    }
                    else
                    {
                        return(batch.Where(row => GetChangeAction(row) == ChangeAction.Insert ||
                                           GetChangeAction(row) == ChangeAction.Update ||
                                           GetChangeAction(row) == ChangeAction.Exists)
                               .ToArray());
                    }
                }
            };

            DestinationTable.OnCompletion = () =>
            {
                IdentifyAndDeleteMissingEntries();
                if (UseTruncateMethod && (MergeMode == MergeMode.OnlyUpdates || MergeMode == MergeMode.NoDeletions))
                {
                    ReinsertTruncatedRecords();
                }
                if (Successors.Count > 0)
                {
                    OutputSource.ExecuteAsync();
                    OutputSource.Completion.Wait();
                    //Careful: A TPL buffer never completes if it has no consumer linked to it!!!
                    OutputSource.BufferCompletion.Wait();
                }
            };
        }
Пример #15
0
        public string DeltaTest([FromServices] IAWSSettings awsSettings)
        {
            string result = String.Empty;

            try
            {
                SparkSession spark = SparkSession
                                     .Builder()
                                     .AppName("DeltaTest")
                                     .GetOrCreate();

                string tempDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);

                string dt   = DateTime.Now.ToString("MMddhhmmss");
                string path = Path.Combine(tempDirectory, $"delta-table{dt}");

                // Write data to a Delta table.
                DataFrame data = spark.Range(0, 5);

                result += "Write data to a Delta table >> spark.Range(0, 5)" + "              ";
                foreach (var row in data.ToDF().Collect())
                {
                    result += row.Values[0];
                    result += " | ";
                }
                result += "              ";
                data.Write().Format("delta").Save(path);

                // Create a second iteration of the table.
                data    = spark.Range(5, 10);
                result += "Create a second iteration of the table >> spark.Range(0, 5)" + "              ";
                foreach (var row in data.ToDF().Collect())
                {
                    result += row.Values[0];
                    result += " | ";
                }
                result += "              ";
                data.Write().Format("delta").Mode("overwrite").Save(path);

                // Load the data into a DeltaTable object.
                DeltaTable deltaTable = DeltaTable.ForPath(path);
                result += "Load the data into a DeltaTable object >> DeltaTable.ForPath" + "              ";
                foreach (var row in deltaTable.ToDF().Collect())
                {
                    result += row.Values[0];
                    result += " | ";
                }
                result += "              ";
                // Update every even value by adding 100 to it.
                deltaTable.Update(
                    condition: Functions.Expr("id % 2 == 0"),
                    set: new Dictionary <string, Column>()
                {
                    { "id", Functions.Expr("id + 100") }
                });

                result += "Update every even value by adding 100 to it." + "              ";
                foreach (var row in deltaTable.ToDF().Collect())
                {
                    result += row.Values[0];
                    result += " | ";
                }
                result += "              ";

                // Delete every even value.
                deltaTable.Delete(condition: Functions.Expr("id % 2 == 0"));
                result += "Delete every even value  id % 2 == 0" + "              ";
                foreach (var row in deltaTable.ToDF().Collect())
                {
                    result += row.Values[0];
                    result += " | ";
                }
                result += "              ";

                // Upsert (merge) new data.
                DataFrame newData = spark.Range(0, 20).As("newData").ToDF();
                result += "Upsert (merge) new data" + Environment.NewLine;
                foreach (var row in newData.ToDF().Collect())
                {
                    result += row.Values[0];
                    result += " | ";
                }
                result += "              ";

                deltaTable.As("oldData")
                .Merge(newData, "oldData.id = newData.id")
                .WhenMatched()
                .Update(
                    new Dictionary <string, Column>()
                {
                    { "id", Functions.Col("newData.id") }
                })
                .WhenNotMatched()
                .InsertExpr(new Dictionary <string, string>()
                {
                    { "id", "newData.id" }
                })
                .Execute();


                spark.Stop();
            }
            catch (Exception ex)
            {
                result = ex.Message;
            }
            return(result);
        }
Пример #16
0
        public void TestTutorialScenario()
        {
            using var tempDirectory = new TemporaryDirectory();
            string path = Path.Combine(tempDirectory.Path, "delta-table");

            // Write data to a Delta table.
            DataFrame data = _spark.Range(0, 5);

            data.Write().Format("delta").Save(path);

            // Validate that data contains the the sequence [0 ... 4].
            ValidateRangeDataFrame(Enumerable.Range(0, 5), data);

            // Create a second iteration of the table.
            data = _spark.Range(5, 10);
            data.Write().Format("delta").Mode("overwrite").Save(path);

            // Load the data into a DeltaTable object.
            DeltaTable deltaTable = DeltaTable.ForPath(path);

            // Validate that deltaTable contains the the sequence [5 ... 9].
            ValidateRangeDataFrame(Enumerable.Range(5, 5), deltaTable.ToDF());

            // Update every even value by adding 100 to it.
            deltaTable.Update(
                condition: Functions.Expr("id % 2 == 0"),
                set: new Dictionary <string, Column>()
            {
                { "id", Functions.Expr("id + 100") }
            });

            // Validate that deltaTable contains the the data:
            // +---+
            // | id|
            // +---+
            // |  5|
            // |  7|
            // |  9|
            // |106|
            // |108|
            // +---+
            ValidateRangeDataFrame(
                new List <int>()
            {
                5, 7, 9, 106, 108
            },
                deltaTable.ToDF());

            // Delete every even value.
            deltaTable.Delete(condition: Functions.Expr("id % 2 == 0"));

            // Validate that deltaTable contains:
            // +---+
            // | id|
            // +---+
            // |  5|
            // |  7|
            // |  9|
            // +---+
            ValidateRangeDataFrame(new List <int>()
            {
                5, 7, 9
            }, deltaTable.ToDF());

            // Upsert (merge) new data.
            DataFrame newData = _spark.Range(0, 20).As("newData").ToDF();

            deltaTable.As("oldData")
            .Merge(newData, "oldData.id = newData.id")
            .WhenMatched()
            .Update(
                new Dictionary <string, Column>()
            {
                { "id", Functions.Col("newData.id") }
            })
            .WhenNotMatched()
            .InsertExpr(new Dictionary <string, string>()
            {
                { "id", "newData.id" }
            })
            .Execute();

            // Validate that the resulTable contains the the sequence [0 ... 19].
            ValidateRangeDataFrame(Enumerable.Range(0, 20), deltaTable.ToDF());
        }
Пример #17
0
        public void TestSignatures()
        {
            using var tempDirectory = new TemporaryDirectory();
            string path = Path.Combine(tempDirectory.Path, "delta-table");

            DataFrame rangeRate = _spark.Range(15);

            rangeRate.Write().Format("delta").Save(path);

            DeltaTable table = Assert.IsType <DeltaTable>(DeltaTable.ForPath(path));

            table = Assert.IsType <DeltaTable>(DeltaTable.ForPath(_spark, path));

            Assert.IsType <bool>(DeltaTable.IsDeltaTable(_spark, path));
            Assert.IsType <bool>(DeltaTable.IsDeltaTable(path));

            Assert.IsType <DeltaTable>(table.As("oldTable"));
            Assert.IsType <DeltaTable>(table.Alias("oldTable"));
            Assert.IsType <DataFrame>(table.History());
            Assert.IsType <DataFrame>(table.History(200));
            Assert.IsType <DataFrame>(table.ToDF());

            DataFrame newTable = _spark.Range(10, 15).As("newTable");

            Assert.IsType <DeltaMergeBuilder>(
                table.Merge(newTable, Functions.Exp("oldTable.id == newTable.id")));
            DeltaMergeBuilder mergeBuilder = Assert.IsType <DeltaMergeBuilder>(
                table.Merge(newTable, "oldTable.id == newTable.id"));

            // Validate the MergeBuilder matched signatures.
            Assert.IsType <DeltaMergeMatchedActionBuilder>(mergeBuilder.WhenMatched());
            Assert.IsType <DeltaMergeMatchedActionBuilder>(mergeBuilder.WhenMatched("id = 5"));
            DeltaMergeMatchedActionBuilder matchedActionBuilder =
                Assert.IsType <DeltaMergeMatchedActionBuilder>(
                    mergeBuilder.WhenMatched(Functions.Expr("id = 5")));

            Assert.IsType <DeltaMergeBuilder>(
                matchedActionBuilder.Update(new Dictionary <string, Column>()));
            Assert.IsType <DeltaMergeBuilder>(
                matchedActionBuilder.UpdateExpr(new Dictionary <string, string>()));
            Assert.IsType <DeltaMergeBuilder>(matchedActionBuilder.UpdateAll());
            Assert.IsType <DeltaMergeBuilder>(matchedActionBuilder.Delete());

            // Validate the MergeBuilder not-matched signatures.
            Assert.IsType <DeltaMergeNotMatchedActionBuilder>(mergeBuilder.WhenNotMatched());
            Assert.IsType <DeltaMergeNotMatchedActionBuilder>(
                mergeBuilder.WhenNotMatched("id = 5"));
            DeltaMergeNotMatchedActionBuilder notMatchedActionBuilder =
                Assert.IsType <DeltaMergeNotMatchedActionBuilder>(
                    mergeBuilder.WhenNotMatched(Functions.Expr("id = 5")));

            Assert.IsType <DeltaMergeBuilder>(
                notMatchedActionBuilder.Insert(new Dictionary <string, Column>()));
            Assert.IsType <DeltaMergeBuilder>(
                notMatchedActionBuilder.InsertExpr(new Dictionary <string, string>()));
            Assert.IsType <DeltaMergeBuilder>(notMatchedActionBuilder.InsertAll());

            // Update and UpdateExpr should return void.
            table.Update(new Dictionary <string, Column>()
            {
            });
            table.Update(Functions.Expr("id % 2 == 0"), new Dictionary <string, Column>()
            {
            });
            table.UpdateExpr(new Dictionary <string, string>()
            {
            });
            table.UpdateExpr("id % 2 == 1", new Dictionary <string, string>()
            {
            });

            Assert.IsType <DataFrame>(table.Vacuum());
            Assert.IsType <DataFrame>(table.Vacuum(168));

            // Generate should return void.
            table.Generate("symlink_format_manifest");

            // Delete should return void.
            table.Delete("id > 10");
            table.Delete(Functions.Expr("id > 5"));
            table.Delete();

            // Load the table as a streaming source.
            Assert.IsType <DataFrame>(_spark
                                      .ReadStream()
                                      .Format("delta")
                                      .Option("path", path)
                                      .Load());
            Assert.IsType <DataFrame>(_spark.ReadStream().Format("delta").Load(path));

            // Create Parquet data and convert it to DeltaTables.
            string parquetIdentifier = $"parquet.`{path}`";

            rangeRate.Write().Mode(SaveMode.Overwrite).Parquet(path);
            Assert.IsType <DeltaTable>(DeltaTable.ConvertToDelta(_spark, parquetIdentifier));
            rangeRate
            .Select(Functions.Col("id"), Functions.Expr($"(`id` + 1) AS `id_plus_one`"))
            .Write()
            .PartitionBy("id")
            .Mode(SaveMode.Overwrite)
            .Parquet(path);
            Assert.IsType <DeltaTable>(DeltaTable.ConvertToDelta(
                                           _spark,
                                           parquetIdentifier,
                                           "id bigint"));
            Assert.IsType <DeltaTable>(DeltaTable.ConvertToDelta(
                                           _spark,
                                           parquetIdentifier,
                                           new StructType(new[]
            {
                new StructField("id", new IntegerType())
            })));
        }
Пример #18
0
        public void TestSignatures()
        {
            using (var tempDirectory = new TemporaryDirectory())
            {
                string path = Path.Combine(tempDirectory.Path, "delta-table");

                DataFrame rangeRate = _spark.Range(15);
                rangeRate.Write().Format("delta").Save(path);

                DeltaTable table = Assert.IsType <DeltaTable>(DeltaTable.ForPath(path));
                table = Assert.IsType <DeltaTable>(DeltaTable.ForPath(_spark, path));

                Assert.IsType <DeltaTable>(table.As("oldTable"));
                Assert.IsType <DataFrame>(table.History());
                Assert.IsType <DataFrame>(table.History(200));
                Assert.IsType <DataFrame>(table.ToDF());

                DataFrame newTable = _spark.Range(10, 15).As("newTable");
                Assert.IsType <DeltaMergeBuilder>(
                    table.Merge(newTable, Functions.Exp("oldTable.id == newTable.id")));
                DeltaMergeBuilder mergeBuilder = Assert.IsType <DeltaMergeBuilder>(
                    table.Merge(newTable, "oldTable.id == newTable.id"));

                // Validate the MergeBuilder matched signatures.
                Assert.IsType <DeltaMergeMatchedActionBuilder>(mergeBuilder.WhenMatched());
                Assert.IsType <DeltaMergeMatchedActionBuilder>(mergeBuilder.WhenMatched("id = 5"));
                DeltaMergeMatchedActionBuilder matchedActionBuilder =
                    Assert.IsType <DeltaMergeMatchedActionBuilder>(
                        mergeBuilder.WhenMatched(Functions.Expr("id = 5")));

                Assert.IsType <DeltaMergeBuilder>(
                    matchedActionBuilder.Update(new Dictionary <string, Column>()));
                Assert.IsType <DeltaMergeBuilder>(
                    matchedActionBuilder.UpdateExpr(new Dictionary <string, string>()));
                Assert.IsType <DeltaMergeBuilder>(matchedActionBuilder.UpdateAll());
                Assert.IsType <DeltaMergeBuilder>(matchedActionBuilder.Delete());

                // Validate the MergeBuilder not-matched signatures.
                Assert.IsType <DeltaMergeNotMatchedActionBuilder>(mergeBuilder.WhenNotMatched());
                Assert.IsType <DeltaMergeNotMatchedActionBuilder>(
                    mergeBuilder.WhenNotMatched("id = 5"));
                DeltaMergeNotMatchedActionBuilder notMatchedActionBuilder =
                    Assert.IsType <DeltaMergeNotMatchedActionBuilder>(
                        mergeBuilder.WhenNotMatched(Functions.Expr("id = 5")));

                Assert.IsType <DeltaMergeBuilder>(
                    notMatchedActionBuilder.Insert(new Dictionary <string, Column>()));
                Assert.IsType <DeltaMergeBuilder>(
                    notMatchedActionBuilder.InsertExpr(new Dictionary <string, string>()));
                Assert.IsType <DeltaMergeBuilder>(notMatchedActionBuilder.InsertAll());

                // Update and UpdateExpr should return void.
                table.Update(new Dictionary <string, Column>()
                {
                });
                table.Update(Functions.Expr("id % 2 == 0"), new Dictionary <string, Column>()
                {
                });
                table.UpdateExpr(new Dictionary <string, string>()
                {
                });
                table.UpdateExpr("id % 2 == 1", new Dictionary <string, string>()
                {
                });

                Assert.IsType <DataFrame>(table.Vacuum());
                Assert.IsType <DataFrame>(table.Vacuum(168));

                // Delete should return void.
                table.Delete("id > 10");
                table.Delete(Functions.Expr("id > 5"));
                table.Delete();
            }
        }