Beispiel #1
0
        public void TestOption()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.Options(It.IsAny <Dictionary <string, string> >()));
            var          dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);
            const string key             = "path";
            const string value           = "path_value";

            // Act
            dataFrameWriter.Option(key, value);

            // Assert
            mockDataFrameWriterProxy.Verify(m => m.Options(
                                                It.Is <Dictionary <string, string> >(dict => dict[key] == value && dict.Count == 1)), Times.Once);
        }
Beispiel #2
0
        public void TestSignaturesV2_3_X()
        {
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Schema("age INT, name STRING")
                                      .Json($"{TestEnvironment.ResourceDirectory}people.json")
                                      .Write();

                Assert.IsType <DataFrameWriter>(dfw.Mode(SaveMode.Ignore));

                Assert.IsType <DataFrameWriter>(dfw.Mode("overwrite"));

                Assert.IsType <DataFrameWriter>(dfw.Format("json"));

                Assert.IsType <DataFrameWriter>(dfw.Option("stringOption", "value"));
                Assert.IsType <DataFrameWriter>(dfw.Option("boolOption", true));
                Assert.IsType <DataFrameWriter>(dfw.Option("longOption", 1L));
                Assert.IsType <DataFrameWriter>(dfw.Option("doubleOption", 3D));

                Assert.IsType <DataFrameWriter>(
                    dfw.Options(
                        new Dictionary <string, string>
                {
                    { "option1", "value1" },
                    { "option2", "value2" }
                }));

                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age"));
                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age"));
                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.SortBy("name"));
            }

            using (var tempDir = new TemporaryDirectory())
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Csv($"{TestEnvironment.ResourceDirectory}people.csv")
                                      .Write();

                // TODO: Test dfw.Jdbc without running a local db.

                dfw.Option("path", tempDir.Path).SaveAsTable("TestTable");

                dfw.InsertInto("TestTable");

                dfw.Option("path", $"{tempDir.Path}TestSavePath1").Save();
                dfw.Save($"{tempDir.Path}TestSavePath2");

                dfw.Json($"{tempDir.Path}TestJsonPath");

                dfw.Parquet($"{tempDir.Path}TestParquetPath");

                dfw.Orc($"{tempDir.Path}TestOrcPath");

                dfw.Text($"{tempDir.Path}TestTextPath");

                dfw.Csv($"{tempDir.Path}TestCsvPath");
            }
        }
        public void TestSignaturesV2_3_X()
        {
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Schema("age INT, name STRING")
                                      .Json($"{TestEnvironment.ResourceDirectory}people.json")
                                      .Write();

                Assert.IsType <DataFrameWriter>(dfw.Mode(SaveMode.Ignore));

                Assert.IsType <DataFrameWriter>(dfw.Mode("overwrite"));

                Assert.IsType <DataFrameWriter>(dfw.Format("json"));

                Assert.IsType <DataFrameWriter>(dfw.Option("stringOption", "value"));
                Assert.IsType <DataFrameWriter>(dfw.Option("boolOption", true));
                Assert.IsType <DataFrameWriter>(dfw.Option("longOption", 1L));
                Assert.IsType <DataFrameWriter>(dfw.Option("doubleOption", 3D));

                Assert.IsType <DataFrameWriter>(
                    dfw.Options(
                        new Dictionary <string, string>
                {
                    { "option1", "value1" },
                    { "option2", "value2" }
                }));

                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age"));
                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age"));
                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.SortBy("name"));
            }

            using (var tempDir = new TemporaryDirectory())
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Csv($"{TestEnvironment.ResourceDirectory}people.csv")
                                      .Write();

                // TODO: Test dfw.Jdbc without running a local db.

                dfw.Save($"{tempDir.Path}TestSavePath1");

                dfw.Json($"{tempDir.Path}TestJsonPath");

                dfw.Parquet($"{tempDir.Path}TestParquetPath");

                dfw.Orc($"{tempDir.Path}TestOrcPath");

                dfw.Text($"{tempDir.Path}TestTextPath");

                dfw.Csv($"{tempDir.Path}TestCsvPath");

                dfw.Option("path", tempDir.Path).SaveAsTable("TestTable");

                dfw.InsertInto("TestTable");

                // In Spark 3.1.1+ setting the `path` Option and then calling .Save(path) is not
                // supported unless `spark.sql.legacy.pathOptionBehavior.enabled` conf is set.
                // .Json(path), .Parquet(path), etc follow the same code path so the conf
                // needs to be set in these scenarios as well.
                dfw.Option("path", $"{tempDir.Path}TestSavePath2").Save();
            }
        }