Exemple #1
0
        public void TestOptions()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.Options(It.IsAny <Dictionary <string, string> >()));
            var          dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);
            const string key1            = "key1";
            const string value1          = "value1";
            const string key2            = "key2";
            const string value2          = "value2";

            var opts = new Dictionary <string, string>()
            {
                { key1, value1 },
                { key2, value2 }
            };

            // Act
            dataFrameWriter.Options(opts);

            // Assert
            mockDataFrameWriterProxy.Verify(m => m.Options(It.Is <Dictionary <string, string> >(
                                                               dict =>
                                                               dict[key1] == value1 &&
                                                               dict[key2] == value2 &&
                                                               dict.Count == 2)
                                                           ),
                                            Times.Once
                                            );
        }
Exemple #2
0
        public void TestSave()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.Save());
            var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);

            // Act
            dataFrameWriter.Save();

            // Assert
            mockDataFrameWriterProxy.Verify(m => m.Save(), Times.Once);
        }
Exemple #3
0
        public void TestConvertToDelta()
        {
            string    partitionColumnName = "id_plus_one";
            DataFrame data = _spark.Range(0, 5).Select(
                Functions.Col("id"),
                Functions.Expr($"(`id` + 1) AS `{partitionColumnName}`"));

            // Run the same test on the different overloads of DeltaTable.ConvertToDelta().
            void testWrapper(
                DataFrame dataFrame,
                Func <string, DeltaTable> convertToDelta,
                string partitionColumn = null)
            {
                using var tempDirectory = new TemporaryDirectory();
                string          path       = Path.Combine(tempDirectory.Path, "parquet-data");
                DataFrameWriter dataWriter = dataFrame.Write();

                if (!string.IsNullOrEmpty(partitionColumn))
                {
                    dataWriter = dataWriter.PartitionBy(partitionColumn);
                }

                dataWriter.Parquet(path);

                Assert.False(DeltaTable.IsDeltaTable(path));

                string     identifier          = $"parquet.`{path}`";
                DeltaTable convertedDeltaTable = convertToDelta(identifier);

                ValidateRangeDataFrame(Enumerable.Range(0, 5), convertedDeltaTable.ToDF());
                Assert.True(DeltaTable.IsDeltaTable(path));
            }

            testWrapper(data, identifier => DeltaTable.ConvertToDelta(_spark, identifier));
            testWrapper(
                data.Repartition(Functions.Col(partitionColumnName)),
                identifier => DeltaTable.ConvertToDelta(
                    _spark,
                    identifier,
                    $"{partitionColumnName} bigint"),
                partitionColumnName);
            testWrapper(
                data.Repartition(Functions.Col(partitionColumnName)),
                identifier => DeltaTable.ConvertToDelta(
                    _spark,
                    identifier,
                    new StructType(new[]
            {
                new StructField(partitionColumnName, new IntegerType())
            })),
                partitionColumnName);
        }
Exemple #4
0
        public void TestFormat()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.Format(It.IsAny <string>()));
            var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);

            foreach (var format in new string[] { "parquet", "json" })
            {
                dataFrameWriter.Format(format);
                mockDataFrameWriterProxy.Verify(m => m.Format(format));
                mockDataFrameWriterProxy.Reset();
            }
        }
Exemple #5
0
        public void TestStringMode()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.Mode(It.IsAny <string>()));
            var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);

            foreach (var mode in new string[] { "append", "ignore", "overwrite", "error", "default" })
            {
                dataFrameWriter.Mode(mode);
                mockDataFrameWriterProxy.Verify(m => m.Mode(mode));
                mockDataFrameWriterProxy.Reset();
            }
        }
Exemple #6
0
        public void TestSaveAsTable()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.SaveAsTable(It.IsAny <string>()));
            var          dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);
            const string table           = "table";

            // Act
            dataFrameWriter.SaveAsTable(table);

            // Assert
            mockDataFrameWriterProxy.Verify(m => m.SaveAsTable(table), Times.Once);
        }
Exemple #7
0
        public void TestPartitionBy()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.PartitionBy(It.IsAny <string[]>()));
            var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);
            var colNames        = new string[] { "col1", "col2", "col3" };

            // Act
            dataFrameWriter.PartitionBy(colNames);

            // Assert
            mockDataFrameWriterProxy.Verify(m => m.PartitionBy(colNames));
        }
Exemple #8
0
        public void TestOption()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.Options(It.IsAny <Dictionary <string, string> >()));
            var          dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);
            const string key             = "path";
            const string value           = "path_value";

            // Act
            dataFrameWriter.Option(key, value);

            // Assert
            mockDataFrameWriterProxy.Verify(m => m.Options(
                                                It.Is <Dictionary <string, string> >(dict => dict[key] == value && dict.Count == 1)), Times.Once);
        }
Exemple #9
0
        public void TestSaveWithPath()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.Save());
            mockDataFrameWriterProxy.Setup(m => m.Options(It.IsAny <Dictionary <string, string> >()));
            var          dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);
            const string path            = "/path/to/save";

            // Act
            dataFrameWriter.Save(path);

            // Assert
            mockDataFrameWriterProxy.Verify(m => m.Save(), Times.Once);
            mockDataFrameWriterProxy.Verify(m => m.Options(
                                                It.Is <Dictionary <string, string> >(dict => dict["path"] == path && dict.Count == 1)), Times.Once);
        }
Exemple #10
0
        public void TestJdbc()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.Jdbc(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <Dictionary <string, string> >()));
            var          dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);
            const string url             = "jdbc:subprotocol:subname";
            const string table           = "table";
            var          properties      = new Dictionary <string, string>()
            {
                { "autocommit", "false" }
            };

            // Act
            dataFrameWriter.Jdbc(url, table, properties);

            // Assert
            mockDataFrameWriterProxy.Verify(m => m.Jdbc(url, table, properties), Times.Once);
        }
Exemple #11
0
        public void TestMode()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.Mode(It.IsAny <string>()));
            var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);

            dataFrameWriter.Mode(SaveMode.Append);
            mockDataFrameWriterProxy.Verify(m => m.Mode(SaveMode.Append.ToString()));
            mockDataFrameWriterProxy.Reset();

            dataFrameWriter.Mode(SaveMode.Ignore);
            mockDataFrameWriterProxy.Verify(m => m.Mode(SaveMode.Ignore.ToString()));
            mockDataFrameWriterProxy.Reset();

            dataFrameWriter.Mode(SaveMode.Overwrite);
            mockDataFrameWriterProxy.Verify(m => m.Mode(SaveMode.Overwrite.ToString()));
            mockDataFrameWriterProxy.Reset();

            dataFrameWriter.Mode(SaveMode.ErrorIfExists);
            mockDataFrameWriterProxy.Verify(m => m.Mode(It.IsIn("error", "default")));
            mockDataFrameWriterProxy.Reset();
        }
Exemple #12
0
        public void TestSignaturesV2_3_X()
        {
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Schema("age INT, name STRING")
                                      .Json($"{TestEnvironment.ResourceDirectory}people.json")
                                      .Write();

                Assert.IsType <DataFrameWriter>(dfw.Mode(SaveMode.Ignore));

                Assert.IsType <DataFrameWriter>(dfw.Mode("overwrite"));

                Assert.IsType <DataFrameWriter>(dfw.Format("json"));

                Assert.IsType <DataFrameWriter>(dfw.Option("stringOption", "value"));
                Assert.IsType <DataFrameWriter>(dfw.Option("boolOption", true));
                Assert.IsType <DataFrameWriter>(dfw.Option("longOption", 1L));
                Assert.IsType <DataFrameWriter>(dfw.Option("doubleOption", 3D));

                Assert.IsType <DataFrameWriter>(
                    dfw.Options(
                        new Dictionary <string, string>
                {
                    { "option1", "value1" },
                    { "option2", "value2" }
                }));

                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age"));
                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age"));
                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.SortBy("name"));
            }

            using (var tempDir = new TemporaryDirectory())
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Csv($"{TestEnvironment.ResourceDirectory}people.csv")
                                      .Write();

                // TODO: Test dfw.Jdbc without running a local db.

                dfw.Option("path", tempDir.Path).SaveAsTable("TestTable");

                dfw.InsertInto("TestTable");

                dfw.Option("path", $"{tempDir.Path}TestSavePath1").Save();
                dfw.Save($"{tempDir.Path}TestSavePath2");

                dfw.Json($"{tempDir.Path}TestJsonPath");

                dfw.Parquet($"{tempDir.Path}TestParquetPath");

                dfw.Orc($"{tempDir.Path}TestOrcPath");

                dfw.Text($"{tempDir.Path}TestTextPath");

                dfw.Csv($"{tempDir.Path}TestCsvPath");
            }
        }
        public void TestSignaturesV2_3_X()
        {
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Schema("age INT, name STRING")
                                      .Json($"{TestEnvironment.ResourceDirectory}people.json")
                                      .Write();

                Assert.IsType <DataFrameWriter>(dfw.Mode(SaveMode.Ignore));

                Assert.IsType <DataFrameWriter>(dfw.Mode("overwrite"));

                Assert.IsType <DataFrameWriter>(dfw.Format("json"));

                Assert.IsType <DataFrameWriter>(dfw.Option("stringOption", "value"));
                Assert.IsType <DataFrameWriter>(dfw.Option("boolOption", true));
                Assert.IsType <DataFrameWriter>(dfw.Option("longOption", 1L));
                Assert.IsType <DataFrameWriter>(dfw.Option("doubleOption", 3D));

                Assert.IsType <DataFrameWriter>(
                    dfw.Options(
                        new Dictionary <string, string>
                {
                    { "option1", "value1" },
                    { "option2", "value2" }
                }));

                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age"));
                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age"));
                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.SortBy("name"));
            }

            using (var tempDir = new TemporaryDirectory())
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Csv($"{TestEnvironment.ResourceDirectory}people.csv")
                                      .Write();

                // TODO: Test dfw.Jdbc without running a local db.

                dfw.Save($"{tempDir.Path}TestSavePath1");

                dfw.Json($"{tempDir.Path}TestJsonPath");

                dfw.Parquet($"{tempDir.Path}TestParquetPath");

                dfw.Orc($"{tempDir.Path}TestOrcPath");

                dfw.Text($"{tempDir.Path}TestTextPath");

                dfw.Csv($"{tempDir.Path}TestCsvPath");

                dfw.Option("path", tempDir.Path).SaveAsTable("TestTable");

                dfw.InsertInto("TestTable");

                // In Spark 3.1.1+ setting the `path` Option and then calling .Save(path) is not
                // supported unless `spark.sql.legacy.pathOptionBehavior.enabled` conf is set.
                // .Json(path), .Parquet(path), etc follow the same code path so the conf
                // needs to be set in these scenarios as well.
                dfw.Option("path", $"{tempDir.Path}TestSavePath2").Save();
            }
        }