Esempio n. 1
0
        public void TestOptions()
        {
            // arrange
            mockDataFrameWriterProxy.Setup(m => m.Options(It.IsAny <Dictionary <string, string> >()));
            var          dataFrameWriter = new DataFrameWriter(mockDataFrameWriterProxy.Object);
            const string key1            = "key1";
            const string value1          = "value1";
            const string key2            = "key2";
            const string value2          = "value2";

            var opts = new Dictionary <string, string>()
            {
                { key1, value1 },
                { key2, value2 }
            };

            // Act
            dataFrameWriter.Options(opts);

            // Assert
            mockDataFrameWriterProxy.Verify(m => m.Options(It.Is <Dictionary <string, string> >(
                                                               dict =>
                                                               dict[key1] == value1 &&
                                                               dict[key2] == value2 &&
                                                               dict.Count == 2)
                                                           ),
                                            Times.Once
                                            );
        }
Esempio n. 2
0
        public void TestSignaturesV2_3_X()
        {
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Schema("age INT, name STRING")
                                      .Json($"{TestEnvironment.ResourceDirectory}people.json")
                                      .Write();

                Assert.IsType <DataFrameWriter>(dfw.Mode(SaveMode.Ignore));

                Assert.IsType <DataFrameWriter>(dfw.Mode("overwrite"));

                Assert.IsType <DataFrameWriter>(dfw.Format("json"));

                Assert.IsType <DataFrameWriter>(dfw.Option("stringOption", "value"));
                Assert.IsType <DataFrameWriter>(dfw.Option("boolOption", true));
                Assert.IsType <DataFrameWriter>(dfw.Option("longOption", 1L));
                Assert.IsType <DataFrameWriter>(dfw.Option("doubleOption", 3D));

                Assert.IsType <DataFrameWriter>(
                    dfw.Options(
                        new Dictionary <string, string>
                {
                    { "option1", "value1" },
                    { "option2", "value2" }
                }));

                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age"));
                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age"));
                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.SortBy("name"));
            }

            using (var tempDir = new TemporaryDirectory())
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Csv($"{TestEnvironment.ResourceDirectory}people.csv")
                                      .Write();

                // TODO: Test dfw.Jdbc without running a local db.

                dfw.Option("path", tempDir.Path).SaveAsTable("TestTable");

                dfw.InsertInto("TestTable");

                dfw.Option("path", $"{tempDir.Path}TestSavePath1").Save();
                dfw.Save($"{tempDir.Path}TestSavePath2");

                dfw.Json($"{tempDir.Path}TestJsonPath");

                dfw.Parquet($"{tempDir.Path}TestParquetPath");

                dfw.Orc($"{tempDir.Path}TestOrcPath");

                dfw.Text($"{tempDir.Path}TestTextPath");

                dfw.Csv($"{tempDir.Path}TestCsvPath");
            }
        }
Esempio n. 3
0
        public void TestSignaturesV2_3_X()
        {
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Schema("age INT, name STRING")
                                      .Json($"{TestEnvironment.ResourceDirectory}people.json")
                                      .Write();

                Assert.IsType <DataFrameWriter>(dfw.Mode(SaveMode.Ignore));

                Assert.IsType <DataFrameWriter>(dfw.Mode("overwrite"));

                Assert.IsType <DataFrameWriter>(dfw.Format("json"));

                Assert.IsType <DataFrameWriter>(dfw.Option("stringOption", "value"));
                Assert.IsType <DataFrameWriter>(dfw.Option("boolOption", true));
                Assert.IsType <DataFrameWriter>(dfw.Option("longOption", 1L));
                Assert.IsType <DataFrameWriter>(dfw.Option("doubleOption", 3D));

                Assert.IsType <DataFrameWriter>(
                    dfw.Options(
                        new Dictionary <string, string>
                {
                    { "option1", "value1" },
                    { "option2", "value2" }
                }));

                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age"));
                Assert.IsType <DataFrameWriter>(dfw.PartitionBy("age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age"));
                Assert.IsType <DataFrameWriter>(dfw.BucketBy(3, "age", "name"));

                Assert.IsType <DataFrameWriter>(dfw.SortBy("name"));
            }

            using (var tempDir = new TemporaryDirectory())
            {
                DataFrameWriter dfw = _spark
                                      .Read()
                                      .Csv($"{TestEnvironment.ResourceDirectory}people.csv")
                                      .Write();

                // TODO: Test dfw.Jdbc without running a local db.

                dfw.Save($"{tempDir.Path}TestSavePath1");

                dfw.Json($"{tempDir.Path}TestJsonPath");

                dfw.Parquet($"{tempDir.Path}TestParquetPath");

                dfw.Orc($"{tempDir.Path}TestOrcPath");

                dfw.Text($"{tempDir.Path}TestTextPath");

                dfw.Csv($"{tempDir.Path}TestCsvPath");

                dfw.Option("path", tempDir.Path).SaveAsTable("TestTable");

                dfw.InsertInto("TestTable");

                // In Spark 3.1.1+ setting the `path` Option and then calling .Save(path) is not
                // supported unless `spark.sql.legacy.pathOptionBehavior.enabled` conf is set.
                // .Json(path), .Parquet(path), etc follow the same code path so the conf
                // needs to be set in these scenarios as well.
                dfw.Option("path", $"{tempDir.Path}TestSavePath2").Save();
            }
        }