public void TestOptions() { // arrange mockDataFrameWriterProxy.Setup(m => m.Options(It.IsAny <Dictionary <string, string> >())); var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object); const string key1 = "key1"; const string value1 = "value1"; const string key2 = "key2"; const string value2 = "value2"; var opts = new Dictionary <string, string>() { { key1, value1 }, { key2, value2 } }; // Act dataFrameWriter.Options(opts); // Assert mockDataFrameWriterProxy.Verify(m => m.Options(It.Is <Dictionary <string, string> >( dict => dict[key1] == value1 && dict[key2] == value2 && dict.Count == 2) ), Times.Once ); }
public void TestSave() { // arrange mockDataFrameWriterProxy.Setup(m => m.Save()); var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object); // Act dataFrameWriter.Save(); // Assert mockDataFrameWriterProxy.Verify(m => m.Save(), Times.Once); }
public void TestConvertToDelta() { string partitionColumnName = "id_plus_one"; DataFrame data = _spark.Range(0, 5).Select( Functions.Col("id"), Functions.Expr($"(`id` + 1) AS `{partitionColumnName}`")); // Run the same test on the different overloads of DeltaTable.ConvertToDelta(). void testWrapper( DataFrame dataFrame, Func <string, DeltaTable> convertToDelta, string partitionColumn = null) { using var tempDirectory = new TemporaryDirectory(); string path = Path.Combine(tempDirectory.Path, "parquet-data"); DataFrameWriter dataWriter = dataFrame.Write(); if (!string.IsNullOrEmpty(partitionColumn)) { dataWriter = dataWriter.PartitionBy(partitionColumn); } dataWriter.Parquet(path); Assert.False(DeltaTable.IsDeltaTable(path)); string identifier = $"parquet.`{path}`"; DeltaTable convertedDeltaTable = convertToDelta(identifier); ValidateRangeDataFrame(Enumerable.Range(0, 5), convertedDeltaTable.ToDF()); Assert.True(DeltaTable.IsDeltaTable(path)); } testWrapper(data, identifier => DeltaTable.ConvertToDelta(_spark, identifier)); testWrapper( data.Repartition(Functions.Col(partitionColumnName)), identifier => DeltaTable.ConvertToDelta( _spark, identifier, $"{partitionColumnName} bigint"), partitionColumnName); testWrapper( data.Repartition(Functions.Col(partitionColumnName)), identifier => DeltaTable.ConvertToDelta( _spark, identifier, new StructType(new[] { new StructField(partitionColumnName, new IntegerType()) })), partitionColumnName); }
public void TestFormat() { // arrange mockDataFrameWriterProxy.Setup(m => m.Format(It.IsAny <string>())); var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object); foreach (var format in new string[] { "parquet", "json" }) { dataFrameWriter.Format(format); mockDataFrameWriterProxy.Verify(m => m.Format(format)); mockDataFrameWriterProxy.Reset(); } }
public void TestStringMode() { // arrange mockDataFrameWriterProxy.Setup(m => m.Mode(It.IsAny <string>())); var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object); foreach (var mode in new string[] { "append", "ignore", "overwrite", "error", "default" }) { dataFrameWriter.Mode(mode); mockDataFrameWriterProxy.Verify(m => m.Mode(mode)); mockDataFrameWriterProxy.Reset(); } }
public void TestSaveAsTable() { // arrange mockDataFrameWriterProxy.Setup(m => m.SaveAsTable(It.IsAny <string>())); var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object); const string table = "table"; // Act dataFrameWriter.SaveAsTable(table); // Assert mockDataFrameWriterProxy.Verify(m => m.SaveAsTable(table), Times.Once); }
public void TestPartitionBy() { // arrange mockDataFrameWriterProxy.Setup(m => m.PartitionBy(It.IsAny <string[]>())); var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object); var colNames = new string[] { "col1", "col2", "col3" }; // Act dataFrameWriter.PartitionBy(colNames); // Assert mockDataFrameWriterProxy.Verify(m => m.PartitionBy(colNames)); }
public void TestOption() { // arrange mockDataFrameWriterProxy.Setup(m => m.Options(It.IsAny <Dictionary <string, string> >())); var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object); const string key = "path"; const string value = "path_value"; // Act dataFrameWriter.Option(key, value); // Assert mockDataFrameWriterProxy.Verify(m => m.Options( It.Is <Dictionary <string, string> >(dict => dict[key] == value && dict.Count == 1)), Times.Once); }
public void TestSaveWithPath() { // arrange mockDataFrameWriterProxy.Setup(m => m.Save()); mockDataFrameWriterProxy.Setup(m => m.Options(It.IsAny <Dictionary <string, string> >())); var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object); const string path = "/path/to/save"; // Act dataFrameWriter.Save(path); // Assert mockDataFrameWriterProxy.Verify(m => m.Save(), Times.Once); mockDataFrameWriterProxy.Verify(m => m.Options( It.Is <Dictionary <string, string> >(dict => dict["path"] == path && dict.Count == 1)), Times.Once); }
public void TestJdbc() { // arrange mockDataFrameWriterProxy.Setup(m => m.Jdbc(It.IsAny <string>(), It.IsAny <string>(), It.IsAny <Dictionary <string, string> >())); var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object); const string url = "jdbc:subprotocol:subname"; const string table = "table"; var properties = new Dictionary <string, string>() { { "autocommit", "false" } }; // Act dataFrameWriter.Jdbc(url, table, properties); // Assert mockDataFrameWriterProxy.Verify(m => m.Jdbc(url, table, properties), Times.Once); }
public void TestMode() { // arrange mockDataFrameWriterProxy.Setup(m => m.Mode(It.IsAny <string>())); var dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object); dataFrameWriter.Mode(SaveMode.Append); mockDataFrameWriterProxy.Verify(m => m.Mode(SaveMode.Append.ToString())); mockDataFrameWriterProxy.Reset(); dataFrameWriter.Mode(SaveMode.Ignore); mockDataFrameWriterProxy.Verify(m => m.Mode(SaveMode.Ignore.ToString())); mockDataFrameWriterProxy.Reset(); dataFrameWriter.Mode(SaveMode.Overwrite); mockDataFrameWriterProxy.Verify(m => m.Mode(SaveMode.Overwrite.ToString())); mockDataFrameWriterProxy.Reset(); dataFrameWriter.Mode(SaveMode.ErrorIfExists); mockDataFrameWriterProxy.Verify(m => m.Mode(It.IsIn("error", "default"))); mockDataFrameWriterProxy.Reset(); }
public void TestSignaturesV2_3_X() { { DataFrameWriter dfw = _spark .Read() .Schema("age INT, name STRING") .Json($"{TestEnvironment.ResourceDirectory}people.json") .Write(); Assert.IsType <DataFrameWriter>(dfw.Mode(SaveMode.Ignore)); Assert.IsType <DataFrameWriter>(dfw.Mode("overwrite")); Assert.IsType <DataFrameWriter>(dfw.Format("json")); Assert.IsType <DataFrameWriter>(dfw.Option("stringOption", "value")); Assert.IsType <DataFrameWriter>(dfw.Option("boolOption", true)); Assert.IsType <DataFrameWriter>(dfw.Option("longOption", 1L)); Assert.IsType <DataFrameWriter>(dfw.Option("doubleOption", 3D)); Assert.IsType <DataFrameWriter>( dfw.Options( new Dictionary <string, string> { { "option1", "value1" }, { "option2", "value2" } })); Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age")); Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age", "name")); Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age")); Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age", "name")); Assert.IsType <DataFrameWriter>(dfw.SortBy("name")); } using (var tempDir = new TemporaryDirectory()) { DataFrameWriter dfw = _spark .Read() .Csv($"{TestEnvironment.ResourceDirectory}people.csv") .Write(); // TODO: Test dfw.Jdbc without running a local db. dfw.Option("path", tempDir.Path).SaveAsTable("TestTable"); dfw.InsertInto("TestTable"); dfw.Option("path", $"{tempDir.Path}TestSavePath1").Save(); dfw.Save($"{tempDir.Path}TestSavePath2"); dfw.Json($"{tempDir.Path}TestJsonPath"); dfw.Parquet($"{tempDir.Path}TestParquetPath"); dfw.Orc($"{tempDir.Path}TestOrcPath"); dfw.Text($"{tempDir.Path}TestTextPath"); dfw.Csv($"{tempDir.Path}TestCsvPath"); } }
public void TestSignaturesV2_3_X() { { DataFrameWriter dfw = _spark .Read() .Schema("age INT, name STRING") .Json($"{TestEnvironment.ResourceDirectory}people.json") .Write(); Assert.IsType <DataFrameWriter>(dfw.Mode(SaveMode.Ignore)); Assert.IsType <DataFrameWriter>(dfw.Mode("overwrite")); Assert.IsType <DataFrameWriter>(dfw.Format("json")); Assert.IsType <DataFrameWriter>(dfw.Option("stringOption", "value")); Assert.IsType <DataFrameWriter>(dfw.Option("boolOption", true)); Assert.IsType <DataFrameWriter>(dfw.Option("longOption", 1L)); Assert.IsType <DataFrameWriter>(dfw.Option("doubleOption", 3D)); Assert.IsType <DataFrameWriter>( dfw.Options( new Dictionary <string, string> { { "option1", "value1" }, { "option2", "value2" } })); Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age")); Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age", "name")); Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age")); Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age", "name")); Assert.IsType <DataFrameWriter>(dfw.SortBy("name")); } using (var tempDir = new TemporaryDirectory()) { DataFrameWriter dfw = _spark .Read() .Csv($"{TestEnvironment.ResourceDirectory}people.csv") .Write(); // TODO: Test dfw.Jdbc without running a local db. dfw.Save($"{tempDir.Path}TestSavePath1"); dfw.Json($"{tempDir.Path}TestJsonPath"); dfw.Parquet($"{tempDir.Path}TestParquetPath"); dfw.Orc($"{tempDir.Path}TestOrcPath"); dfw.Text($"{tempDir.Path}TestTextPath"); dfw.Csv($"{tempDir.Path}TestCsvPath"); dfw.Option("path", tempDir.Path).SaveAsTable("TestTable"); dfw.InsertInto("TestTable"); // In Spark 3.1.1+ setting the `path` Option and then calling .Save(path) is not // supported unless `spark.sql.legacy.pathOptionBehavior.enabled` conf is set. // .Json(path), .Parquet(path), etc follow the same code path so the conf // needs to be set in these scenarios as well. dfw.Option("path", $"{tempDir.Path}TestSavePath2").Save(); } }