public void TestOption() { // arrange mockDataFrameWriterProxy.Setup(m => m.Options(It.IsAny <Dictionary <string, string> >())); var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object); const string key = "path"; const string value = "path_value"; // Act dataFrameWriter.Option(key, value); // Assert mockDataFrameWriterProxy.Verify(m => m.Options( It.Is <Dictionary <string, string> >(dict => dict[key] == value && dict.Count == 1)), Times.Once); }
public void TestSignaturesV2_3_X() { { DataFrameWriter dfw = _spark .Read() .Schema("age INT, name STRING") .Json($"{TestEnvironment.ResourceDirectory}people.json") .Write(); Assert.IsType <DataFrameWriter>(dfw.Mode(SaveMode.Ignore)); Assert.IsType <DataFrameWriter>(dfw.Mode("overwrite")); Assert.IsType <DataFrameWriter>(dfw.Format("json")); Assert.IsType <DataFrameWriter>(dfw.Option("stringOption", "value")); Assert.IsType <DataFrameWriter>(dfw.Option("boolOption", true)); Assert.IsType <DataFrameWriter>(dfw.Option("longOption", 1L)); Assert.IsType <DataFrameWriter>(dfw.Option("doubleOption", 3D)); Assert.IsType <DataFrameWriter>( dfw.Options( new Dictionary <string, string> { { "option1", "value1" }, { "option2", "value2" } })); Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age")); Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age", "name")); Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age")); Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age", "name")); Assert.IsType <DataFrameWriter>(dfw.SortBy("name")); } using (var tempDir = new TemporaryDirectory()) { DataFrameWriter dfw = _spark .Read() .Csv($"{TestEnvironment.ResourceDirectory}people.csv") .Write(); // TODO: Test dfw.Jdbc without running a local db. dfw.Option("path", tempDir.Path).SaveAsTable("TestTable"); dfw.InsertInto("TestTable"); dfw.Option("path", $"{tempDir.Path}TestSavePath1").Save(); dfw.Save($"{tempDir.Path}TestSavePath2"); dfw.Json($"{tempDir.Path}TestJsonPath"); dfw.Parquet($"{tempDir.Path}TestParquetPath"); dfw.Orc($"{tempDir.Path}TestOrcPath"); dfw.Text($"{tempDir.Path}TestTextPath"); dfw.Csv($"{tempDir.Path}TestCsvPath"); } }
public void TestSignaturesV2_3_X() { { DataFrameWriter dfw = _spark .Read() .Schema("age INT, name STRING") .Json($"{TestEnvironment.ResourceDirectory}people.json") .Write(); Assert.IsType <DataFrameWriter>(dfw.Mode(SaveMode.Ignore)); Assert.IsType <DataFrameWriter>(dfw.Mode("overwrite")); Assert.IsType <DataFrameWriter>(dfw.Format("json")); Assert.IsType <DataFrameWriter>(dfw.Option("stringOption", "value")); Assert.IsType <DataFrameWriter>(dfw.Option("boolOption", true)); Assert.IsType <DataFrameWriter>(dfw.Option("longOption", 1L)); Assert.IsType <DataFrameWriter>(dfw.Option("doubleOption", 3D)); Assert.IsType <DataFrameWriter>( dfw.Options( new Dictionary <string, string> { { "option1", "value1" }, { "option2", "value2" } })); Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age")); Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age", "name")); Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age")); Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age", "name")); Assert.IsType <DataFrameWriter>(dfw.SortBy("name")); } using (var tempDir = new TemporaryDirectory()) { DataFrameWriter dfw = _spark .Read() .Csv($"{TestEnvironment.ResourceDirectory}people.csv") .Write(); // TODO: Test dfw.Jdbc without running a local db. dfw.Save($"{tempDir.Path}TestSavePath1"); dfw.Json($"{tempDir.Path}TestJsonPath"); dfw.Parquet($"{tempDir.Path}TestParquetPath"); dfw.Orc($"{tempDir.Path}TestOrcPath"); dfw.Text($"{tempDir.Path}TestTextPath"); dfw.Csv($"{tempDir.Path}TestCsvPath"); dfw.Option("path", tempDir.Path).SaveAsTable("TestTable"); dfw.InsertInto("TestTable"); // In Spark 3.1.1+ setting the `path` Option and then calling .Save(path) is not // supported unless `spark.sql.legacy.pathOptionBehavior.enabled` conf is set. // .Json(path), .Parquet(path), etc follow the same code path so the conf // needs to be set in these scenarios as well. dfw.Option("path", $"{tempDir.Path}TestSavePath2").Save(); } }