public void TestSignaturesV2_3_X() { Assert.IsType <SparkContext>(_spark.SparkContext); Assert.IsType <Builder>(SparkSession.Builder()); SparkSession.ClearDefaultSession(); SparkSession.SetDefaultSession(_spark); Assert.IsType <SparkSession>(SparkSession.GetDefaultSession()); Assert.IsType <RuntimeConfig>(_spark.Conf()); Assert.IsType <SparkSession>(_spark.NewSession()); Assert.IsType <DataFrameReader>(_spark.Read()); Assert.IsType <DataFrame>(_spark.Range(10)); Assert.IsType <DataFrame>(_spark.Range(10, 100)); Assert.IsType <DataFrame>(_spark.Range(10, 100, 10)); Assert.IsType <DataFrame>(_spark.Range(10, 100, 10, 5)); _spark.Range(10).CreateOrReplaceTempView("testView"); Assert.IsType <DataFrame>(_spark.Table("testView")); Assert.IsType <DataStreamReader>(_spark.ReadStream()); Assert.IsType <UdfRegistration>(_spark.Udf()); Assert.IsType <Catalog>(_spark.Catalog()); }
public void TestSignaturesV2_4_X() { DataFrame df = _spark.Range(1); string jsonSchema = "{\"type\":\"long\", \"name\":\"col\"}"; Column inputCol = df.Col("id"); Column avroCol = ToAvro(inputCol); Assert.IsType <Column>(FromAvro(avroCol, jsonSchema)); }
public void TestDelete() { using FileSystem fs = FileSystem.Get(_spark.SparkContext.HadoopConfiguration()); using var tempDirectory = new TemporaryDirectory(); string path = Path.Combine(tempDirectory.Path, "temp-table"); _spark.Range(25).Write().Format("parquet").Save(path); Assert.True(Directory.Exists(path)); Assert.True(fs.Delete(path, true)); Assert.False(fs.Delete(path, true)); Assert.False(Directory.Exists(path)); }
static void CreateUsingRange(SparkSession spark) { Console.WriteLine("spark.Range(1000)"); var dataFrame = spark.Range(1000); dataFrame.Show(5); /* * +---+ | id| +---+ | 0| | 1| | 2| | 3| | 4| +---+ * */ Console.WriteLine("spark.Range(1000).WithColumn"); dataFrame = dataFrame.WithColumn("Another Column", Functions.Lit("Literal")); dataFrame.Show(5); /* * +---+--------------+ | id|Another Column| +---+--------------+ | 0| Literal| | 1| Literal| | 2| Literal| | 3| Literal| | 4| Literal| +---+--------------+ */ Console.WriteLine("spark.Range(1000).WithColumn"); dataFrame = dataFrame.WithColumn("Mod", Functions.Pmod(Functions.Col("id"), Functions.Lit(2))); dataFrame.Show(5); /* * +---+--------------+---+ | id|Another Column|Mod| +---+--------------+---+ | 0| Literal| 0| | 1| Literal| 1| | 2| Literal| 0| | 3| Literal| 1| | 4| Literal| 0| +---+--------------+---+ */ }
public string DeltaTest([FromServices] IAWSSettings awsSettings) { string result = String.Empty; try { SparkSession spark = SparkSession .Builder() .AppName("DeltaTest") .GetOrCreate(); string tempDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); string dt = DateTime.Now.ToString("MMddhhmmss"); string path = Path.Combine(tempDirectory, $"delta-table{dt}"); // Write data to a Delta table. DataFrame data = spark.Range(0, 5); result += "Write data to a Delta table >> spark.Range(0, 5)" + " "; foreach (var row in data.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; data.Write().Format("delta").Save(path); // Create a second iteration of the table. data = spark.Range(5, 10); result += "Create a second iteration of the table >> spark.Range(0, 5)" + " "; foreach (var row in data.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; data.Write().Format("delta").Mode("overwrite").Save(path); // Load the data into a DeltaTable object. DeltaTable deltaTable = DeltaTable.ForPath(path); result += "Load the data into a DeltaTable object >> DeltaTable.ForPath" + " "; foreach (var row in deltaTable.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; // Update every even value by adding 100 to it. deltaTable.Update( condition: Functions.Expr("id % 2 == 0"), set: new Dictionary <string, Column>() { { "id", Functions.Expr("id + 100") } }); result += "Update every even value by adding 100 to it." + " "; foreach (var row in deltaTable.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; // Delete every even value. deltaTable.Delete(condition: Functions.Expr("id % 2 == 0")); result += "Delete every even value id % 2 == 0" + " "; foreach (var row in deltaTable.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; // Upsert (merge) new data. DataFrame newData = spark.Range(0, 20).As("newData").ToDF(); result += "Upsert (merge) new data" + Environment.NewLine; foreach (var row in newData.ToDF().Collect()) { result += row.Values[0]; result += " | "; } result += " "; deltaTable.As("oldData") .Merge(newData, "oldData.id = newData.id") .WhenMatched() .Update( new Dictionary <string, Column>() { { "id", Functions.Col("newData.id") } }) .WhenNotMatched() .InsertExpr(new Dictionary <string, string>() { { "id", "newData.id" } }) .Execute(); spark.Stop(); } catch (Exception ex) { result = ex.Message; } return(result); }
public void TestTutorialScenario() { using var tempDirectory = new TemporaryDirectory(); string path = Path.Combine(tempDirectory.Path, "delta-table"); // Write data to a Delta table. DataFrame data = _spark.Range(0, 5); data.Write().Format("delta").Save(path); // Validate that data contains the the sequence [0 ... 4]. ValidateRangeDataFrame(Enumerable.Range(0, 5), data); // Create a second iteration of the table. data = _spark.Range(5, 10); data.Write().Format("delta").Mode("overwrite").Save(path); // Load the data into a DeltaTable object. DeltaTable deltaTable = DeltaTable.ForPath(path); // Validate that deltaTable contains the the sequence [5 ... 9]. ValidateRangeDataFrame(Enumerable.Range(5, 5), deltaTable.ToDF()); // Update every even value by adding 100 to it. deltaTable.Update( condition: Functions.Expr("id % 2 == 0"), set: new Dictionary <string, Column>() { { "id", Functions.Expr("id + 100") } }); // Validate that deltaTable contains the the data: // +---+ // | id| // +---+ // | 5| // | 7| // | 9| // |106| // |108| // +---+ ValidateRangeDataFrame( new List <int>() { 5, 7, 9, 106, 108 }, deltaTable.ToDF()); // Delete every even value. deltaTable.Delete(condition: Functions.Expr("id % 2 == 0")); // Validate that deltaTable contains: // +---+ // | id| // +---+ // | 5| // | 7| // | 9| // +---+ ValidateRangeDataFrame(new List <int>() { 5, 7, 9 }, deltaTable.ToDF()); // Upsert (merge) new data. DataFrame newData = _spark.Range(0, 20).As("newData").ToDF(); deltaTable.As("oldData") .Merge(newData, "oldData.id = newData.id") .WhenMatched() .Update( new Dictionary <string, Column>() { { "id", Functions.Col("newData.id") } }) .WhenNotMatched() .InsertExpr(new Dictionary <string, string>() { { "id", "newData.id" } }) .Execute(); // Validate that the resulTable contains the the sequence [0 ... 19]. ValidateRangeDataFrame(Enumerable.Range(0, 20), deltaTable.ToDF()); }
public void TestSignaturesV2_3_X() { DataFrameReader dfr = _spark.Read(); Assert.IsType <DataFrameReader>(dfr.Format("json")); Assert.IsType <DataFrameReader>( dfr.Schema( new StructType(new[] { new StructField("age", new IntegerType()), new StructField("name", new StringType()) }))); Assert.IsType <DataFrameReader>(dfr.Schema("age INT, name STRING")); Assert.IsType <DataFrameReader>(dfr.Option("stringOption", "value")); Assert.IsType <DataFrameReader>(dfr.Option("boolOption", true)); Assert.IsType <DataFrameReader>(dfr.Option("longOption", 1L)); Assert.IsType <DataFrameReader>(dfr.Option("doubleOption", 3D)); Assert.IsType <DataFrameReader>( dfr.Options( new Dictionary <string, string> { { "option1", "value1" }, { "option2", "value2" } })); string jsonFile = $"{TestEnvironment.ResourceDirectory}people.json"; Assert.IsType <DataFrame>(dfr.Load()); Assert.IsType <DataFrame>(dfr.Load(jsonFile)); Assert.IsType <DataFrame>(dfr.Load(jsonFile, jsonFile)); Assert.IsType <DataFrame>(dfr.Json(jsonFile)); Assert.IsType <DataFrame>(dfr.Json(jsonFile, jsonFile)); string csvFile = $"{TestEnvironment.ResourceDirectory}people.csv"; Assert.IsType <DataFrame>(dfr.Csv(csvFile)); Assert.IsType <DataFrame>(dfr.Csv(csvFile, csvFile)); string parquetFile = $"{TestEnvironment.ResourceDirectory}users.parquet"; Assert.IsType <DataFrame>(dfr.Parquet(parquetFile)); Assert.IsType <DataFrame>(dfr.Parquet(parquetFile, parquetFile)); string orcFile = $"{TestEnvironment.ResourceDirectory}users.orc"; Assert.IsType <DataFrame>(dfr.Orc(orcFile)); Assert.IsType <DataFrame>(dfr.Orc(orcFile, orcFile)); dfr = _spark.Read(); string textFile = $"{TestEnvironment.ResourceDirectory}people.txt"; Assert.IsType <DataFrame>(dfr.Text(textFile)); Assert.IsType <DataFrame>(dfr.Text(textFile, textFile)); _spark.Range(10).CreateOrReplaceTempView("testView"); Assert.IsType <DataFrame>(dfr.Table("testView")); }
public void TestSignaturesV2_3_X() { ////////////////////////////// // Basic Functions ////////////////////////////// Column col = Column("col1"); Assert.IsType <Column>(col); Assert.IsType <Column>(Col("col2")); Assert.IsType <Column>(Lit(1)); Assert.IsType <Column>(Lit("some column")); Assert.IsType <Column>(Lit(col)); ////////////////////////////// // Sort Functions ////////////////////////////// Assert.IsType <Column>(Asc("col")); Assert.IsType <Column>(AscNullsFirst("col")); Assert.IsType <Column>(AscNullsLast("col")); Assert.IsType <Column>(Desc("col")); Assert.IsType <Column>(DescNullsFirst("col")); Assert.IsType <Column>(DescNullsLast("col")); ////////////////////////////// // Aggregate Functions ////////////////////////////// Assert.IsType <Column>(Column("col")); Assert.IsType <Column>(ApproxCountDistinct(col)); Assert.IsType <Column>(ApproxCountDistinct("col")); Assert.IsType <Column>(ApproxCountDistinct(col, 0.05)); Assert.IsType <Column>(ApproxCountDistinct("col", 0.05)); Assert.IsType <Column>(Avg(col)); Assert.IsType <Column>(Avg("col")); Assert.IsType <Column>(CollectList(col)); Assert.IsType <Column>(CollectList("col")); Assert.IsType <Column>(CollectSet(col)); Assert.IsType <Column>(CollectSet("col")); Assert.IsType <Column>(Corr(col, col)); Assert.IsType <Column>(Corr("col1", "col2")); Assert.IsType <Column>(Count(col)); Assert.IsType <Column>(Count("col")); Assert.IsType <Column>(CountDistinct(col)); Assert.IsType <Column>(CountDistinct(col, col)); Assert.IsType <Column>(CountDistinct(col, col, col)); Assert.IsType <Column>(CountDistinct("col1")); Assert.IsType <Column>(CountDistinct("col1", "col2")); Assert.IsType <Column>(CountDistinct("col1", "col2", "col3")); Assert.IsType <Column>(CovarPop(col, col)); Assert.IsType <Column>(CovarPop("col1", "col2")); Assert.IsType <Column>(CovarSamp(col, col)); Assert.IsType <Column>(CovarSamp("col1", "col2")); Assert.IsType <Column>(First(col)); Assert.IsType <Column>(First(col, true)); Assert.IsType <Column>(First(col, false)); Assert.IsType <Column>(First("col")); Assert.IsType <Column>(First("col", true)); Assert.IsType <Column>(First("col", false)); Assert.IsType <Column>(Grouping(col)); Assert.IsType <Column>(Grouping("col")); Assert.IsType <Column>(GroupingId()); Assert.IsType <Column>(GroupingId(col)); Assert.IsType <Column>(GroupingId(col, col)); Assert.IsType <Column>(GroupingId("col1")); Assert.IsType <Column>(GroupingId("col1", "col2")); Assert.IsType <Column>(GroupingId("col1", "col2", "col3")); Assert.IsType <Column>(Kurtosis(col)); Assert.IsType <Column>(Kurtosis("col")); Assert.IsType <Column>(Last(col)); Assert.IsType <Column>(Last(col, true)); Assert.IsType <Column>(Last(col, false)); Assert.IsType <Column>(Last("col")); Assert.IsType <Column>(Last("col", true)); Assert.IsType <Column>(Last("col", false)); Assert.IsType <Column>(Max(col)); Assert.IsType <Column>(Max("col")); Assert.IsType <Column>(Mean(col)); Assert.IsType <Column>(Mean("col")); Assert.IsType <Column>(Min(col)); Assert.IsType <Column>(Min("col")); Assert.IsType <Column>(Skewness(col)); Assert.IsType <Column>(Skewness("col")); Assert.IsType <Column>(Stddev(col)); Assert.IsType <Column>(Stddev("col")); Assert.IsType <Column>(StddevSamp(col)); Assert.IsType <Column>(StddevSamp("col")); Assert.IsType <Column>(StddevPop(col)); Assert.IsType <Column>(StddevPop("col")); Assert.IsType <Column>(Sum(col)); Assert.IsType <Column>(Sum("col")); Assert.IsType <Column>(SumDistinct(col)); Assert.IsType <Column>(SumDistinct("col")); Assert.IsType <Column>(Variance(col)); Assert.IsType <Column>(Variance("col")); Assert.IsType <Column>(VarSamp(col)); Assert.IsType <Column>(VarSamp("col")); Assert.IsType <Column>(VarPop(col)); Assert.IsType <Column>(VarPop("col")); ////////////////////////////// // Window Functions ////////////////////////////// Assert.IsType <Column>(UnboundedPreceding()); Assert.IsType <Column>(UnboundedFollowing()); Assert.IsType <Column>(CurrentRow()); Assert.IsType <Column>(CumeDist()); Assert.IsType <Column>(DenseRank()); Assert.IsType <Column>(Lag(col, 0)); Assert.IsType <Column>(Lag(col, 2, "col2")); Assert.IsType <Column>(Lag("col", 0)); Assert.IsType <Column>(Lag("col", 2, "col2")); Assert.IsType <Column>(Lead(col, 0)); Assert.IsType <Column>(Lead(col, 2, "col2")); Assert.IsType <Column>(Lead("col", 0)); Assert.IsType <Column>(Lead("col", 2, "col2")); Assert.IsType <Column>(Ntile(100)); Assert.IsType <Column>(PercentRank()); Assert.IsType <Column>(Rank()); Assert.IsType <Column>(RowNumber()); ////////////////////////////// // Non-Aggregate Functions ////////////////////////////// Assert.IsType <Column>(Column("col")); Assert.IsType <Column>(Abs(col)); Assert.IsType <Column>(Array()); Assert.IsType <Column>(Array(col)); Assert.IsType <Column>(Array(col, col)); Assert.IsType <Column>(Array("col1")); Assert.IsType <Column>(Array("col1", "col2")); Assert.IsType <Column>(Array("col1", "col2", "col3")); Assert.IsType <Column>(Map()); Assert.IsType <Column>(Map(col)); Assert.IsType <Column>(Map(col, col)); DataFrame df = _spark .Read() .Json($"{TestEnvironment.ResourceDirectory}people.json"); Assert.IsType <DataFrame>(Broadcast(df)); Assert.IsType <DataFrame>(_spark.Range(10)); Assert.IsType <DataFrame>(_spark.Range(10, 100)); Assert.IsType <DataFrame>(_spark.Range(10, 100, 10)); Assert.IsType <DataFrame>(_spark.Range(10, 100, 10, 5)); Assert.IsType <Column>(Coalesce()); Assert.IsType <Column>(Coalesce(col)); Assert.IsType <Column>(Coalesce(col, col)); Assert.IsType <Column>(InputFileName()); Assert.IsType <Column>(IsNaN(col)); Assert.IsType <Column>(IsNull(col)); Assert.IsType <Column>(MonotonicallyIncreasingId()); Assert.IsType <Column>(NaNvl(col, col)); Assert.IsType <Column>(Negate(col)); Assert.IsType <Column>(Not(col)); Assert.IsType <Column>(Rand(12345)); Assert.IsType <Column>(Rand()); Assert.IsType <Column>(Randn(12345)); Assert.IsType <Column>(Randn()); Assert.IsType <Column>(SparkPartitionId()); Assert.IsType <Column>(Sqrt(col)); Assert.IsType <Column>(Sqrt("col")); Assert.IsType <Column>(Struct()); Assert.IsType <Column>(Struct(col)); Assert.IsType <Column>(Struct(col, col)); Assert.IsType <Column>(Struct("col1")); Assert.IsType <Column>(Struct("col1", "col2")); Assert.IsType <Column>(Struct("col1", "col2", "col3")); Assert.IsType <Column>(When(col, col)); Assert.IsType <Column>(When(col, "col")); Assert.IsType <Column>(When(col, 12345)); Assert.IsType <Column>(BitwiseNOT(col)); Assert.IsType <Column>(Expr("expr")); ////////////////////////////// // Math Functions ////////////////////////////// Assert.IsType <Column>(Column("col")); Assert.IsType <Column>(Acos(col)); Assert.IsType <Column>(Acos("col")); Assert.IsType <Column>(Asin(col)); Assert.IsType <Column>(Asin("col")); Assert.IsType <Column>(Atan(col)); Assert.IsType <Column>(Atan("col")); Assert.IsType <Column>(Atan2(col, col)); Assert.IsType <Column>(Atan2(col, "x")); Assert.IsType <Column>(Atan2("y", col)); Assert.IsType <Column>(Atan2("y", "x")); Assert.IsType <Column>(Atan2(col, 0.5)); Assert.IsType <Column>(Atan2("y", 0.5)); Assert.IsType <Column>(Atan2(0.5, col)); Assert.IsType <Column>(Atan2(0.5, "x")); Assert.IsType <Column>(Bin(col)); Assert.IsType <Column>(Bin("col")); Assert.IsType <Column>(Cbrt(col)); Assert.IsType <Column>(Cbrt("col")); Assert.IsType <Column>(Ceil(col)); Assert.IsType <Column>(Ceil("col")); Assert.IsType <Column>(Conv(col, 2, 10)); Assert.IsType <Column>(Cos(col)); Assert.IsType <Column>(Cos("col")); Assert.IsType <Column>(Cosh(col)); Assert.IsType <Column>(Cosh("col")); Assert.IsType <Column>(Exp(col)); Assert.IsType <Column>(Exp("col")); Assert.IsType <Column>(Expm1(col)); Assert.IsType <Column>(Expm1("col")); Assert.IsType <Column>(Factorial(col)); Assert.IsType <Column>(Floor(col)); Assert.IsType <Column>(Floor("col")); Assert.IsType <Column>(Greatest()); Assert.IsType <Column>(Greatest(col)); Assert.IsType <Column>(Greatest(col, col)); Assert.IsType <Column>(Greatest("col1")); Assert.IsType <Column>(Greatest("col1", "col2")); Assert.IsType <Column>(Greatest("col1", "col2", "col3")); Assert.IsType <Column>(Hex(col)); Assert.IsType <Column>(Unhex(col)); Assert.IsType <Column>(Hypot(col, col)); Assert.IsType <Column>(Hypot(col, "right")); Assert.IsType <Column>(Hypot("left", col)); Assert.IsType <Column>(Hypot("left", "right")); Assert.IsType <Column>(Hypot(col, 0.5)); Assert.IsType <Column>(Hypot("left", 0.5)); Assert.IsType <Column>(Hypot(0.5, col)); Assert.IsType <Column>(Hypot(0.5, "right")); Assert.IsType <Column>(Least()); Assert.IsType <Column>(Least(col)); Assert.IsType <Column>(Least(col, col)); Assert.IsType <Column>(Least("col1")); Assert.IsType <Column>(Least("col1", "col2")); Assert.IsType <Column>(Least("col1", "col2", "col3")); Assert.IsType <Column>(Log(col)); Assert.IsType <Column>(Log("col")); Assert.IsType <Column>(Log(2.0, col)); Assert.IsType <Column>(Log(2.0, "col")); Assert.IsType <Column>(Log10(col)); Assert.IsType <Column>(Log10("col")); Assert.IsType <Column>(Log1p(col)); Assert.IsType <Column>(Log1p("col")); Assert.IsType <Column>(Log2(col)); Assert.IsType <Column>(Log2("col")); Assert.IsType <Column>(Pow(col, col)); Assert.IsType <Column>(Pow(col, "right")); Assert.IsType <Column>(Pow("left", col)); Assert.IsType <Column>(Pow("left", "right")); Assert.IsType <Column>(Pow(col, 0.5)); Assert.IsType <Column>(Pow("left", 0.5)); Assert.IsType <Column>(Pow(0.5, col)); Assert.IsType <Column>(Pow(0.5, "right")); Assert.IsType <Column>(Pmod(col, col)); Assert.IsType <Column>(Rint(col)); Assert.IsType <Column>(Rint("col")); Assert.IsType <Column>(Round(col)); Assert.IsType <Column>(Round(col, 10)); Assert.IsType <Column>(Bround(col)); Assert.IsType <Column>(Bround(col, 10)); Assert.IsType <Column>(ShiftLeft(col, 4)); Assert.IsType <Column>(ShiftRight(col, 4)); Assert.IsType <Column>(ShiftRightUnsigned(col, 4)); Assert.IsType <Column>(Signum(col)); Assert.IsType <Column>(Signum("col")); Assert.IsType <Column>(Sin(col)); Assert.IsType <Column>(Sin("col")); Assert.IsType <Column>(Sinh(col)); Assert.IsType <Column>(Sinh("col")); Assert.IsType <Column>(Tan(col)); Assert.IsType <Column>(Tan("col")); Assert.IsType <Column>(Tanh(col)); Assert.IsType <Column>(Tanh("col")); Assert.IsType <Column>(Degrees(col)); Assert.IsType <Column>(Degrees("col")); Assert.IsType <Column>(Radians(col)); Assert.IsType <Column>(Radians("col")); ////////////////////////////// // Miscellaneous Functions ////////////////////////////// Assert.IsType <Column>(Md5(col)); Assert.IsType <Column>(Sha1(col)); Assert.IsType <Column>(Sha2(col, 224)); Assert.IsType <Column>(Crc32(col)); Assert.IsType <Column>(Hash()); Assert.IsType <Column>(Hash(col)); Assert.IsType <Column>(Hash(col, col)); ////////////////////////////// // String Functions ////////////////////////////// Assert.IsType <Column>(Ascii(col)); Assert.IsType <Column>(Base64(col)); Assert.IsType <Column>(ConcatWs(";")); Assert.IsType <Column>(ConcatWs(";", col)); Assert.IsType <Column>(ConcatWs(";", col, col)); Assert.IsType <Column>(Decode(col, "UTF-8")); Assert.IsType <Column>(Encode(col, "UTF-8")); Assert.IsType <Column>(FormatNumber(col, 2)); Assert.IsType <Column>(FormatString("%s %d")); Assert.IsType <Column>(FormatString("%s %d", col)); Assert.IsType <Column>(FormatString("%s %d", col, col)); Assert.IsType <Column>(InitCap(col)); Assert.IsType <Column>(Instr(col, "abc")); Assert.IsType <Column>(Length(col)); Assert.IsType <Column>(Lower(col)); Assert.IsType <Column>(Levenshtein(col, col)); Assert.IsType <Column>(Locate("abc", col)); Assert.IsType <Column>(Locate("abc", col, 3)); Assert.IsType <Column>(Lpad(col, 3, "pad")); Assert.IsType <Column>(Ltrim(col)); Assert.IsType <Column>(Ltrim(col, "\n")); Assert.IsType <Column>(RegexpExtract(col, "[a-z]", 0)); Assert.IsType <Column>(RegexpReplace(col, "[a-z]", "hello")); Assert.IsType <Column>(RegexpReplace(col, col, col)); Assert.IsType <Column>(Unbase64(col)); Assert.IsType <Column>(Rpad(col, 3, "pad")); Assert.IsType <Column>(Repeat(col, 3)); Assert.IsType <Column>(Rtrim(col)); Assert.IsType <Column>(Rtrim(col, "\n")); Assert.IsType <Column>(Soundex(col)); Assert.IsType <Column>(Split(col, "\t")); Assert.IsType <Column>(Substring(col, 0, 5)); Assert.IsType <Column>(SubstringIndex(col, ";", 5)); Assert.IsType <Column>(Translate(col, "abc", "edf")); Assert.IsType <Column>(Trim(col)); Assert.IsType <Column>(Trim(col, "\n")); Assert.IsType <Column>(Upper(col)); ////////////////////////////// // DateTime Functions ////////////////////////////// Assert.IsType <Column>(AddMonths(col, 3)); Assert.IsType <Column>(CurrentDate()); Assert.IsType <Column>(CurrentTimestamp()); Assert.IsType <Column>(DateFormat(col, "format")); Assert.IsType <Column>(DateAdd(col, 5)); Assert.IsType <Column>(DateSub(col, 5)); Assert.IsType <Column>(DateDiff(col, col)); Assert.IsType <Column>(Year(col)); Assert.IsType <Column>(Quarter(col)); Assert.IsType <Column>(Month(col)); Assert.IsType <Column>(DayOfWeek(col)); Assert.IsType <Column>(DayOfMonth(col)); Assert.IsType <Column>(DayOfYear(col)); Assert.IsType <Column>(Hour(col)); Assert.IsType <Column>(LastDay(col)); Assert.IsType <Column>(Minute(col)); Assert.IsType <Column>(MonthsBetween(col, col)); Assert.IsType <Column>(NextDay(col, "Mon")); Assert.IsType <Column>(Second(col)); Assert.IsType <Column>(WeekOfYear(col)); Assert.IsType <Column>(FromUnixTime(col)); Assert.IsType <Column>(FromUnixTime(col, "yyyy-MM-dd HH:mm:ss")); Assert.IsType <Column>(UnixTimestamp()); Assert.IsType <Column>(UnixTimestamp(col)); Assert.IsType <Column>(UnixTimestamp(col, "yyyy-MM-dd HH:mm:ss")); Assert.IsType <Column>(ToTimestamp(col)); Assert.IsType <Column>(ToTimestamp(col, "yyyy-MM-dd HH:mm:ss")); Assert.IsType <Column>(ToDate(col)); Assert.IsType <Column>(ToDate(col, "yyyy-MM-dd HH:mm:ss")); Assert.IsType <Column>(Trunc(col, "yyyy")); Assert.IsType <Column>(DateTrunc("mon", col)); Assert.IsType <Column>(FromUtcTimestamp(col, "GMT+1")); Assert.IsType <Column>(ToUtcTimestamp(col, "GMT+1")); Assert.IsType <Column>(Window(col, "1 minute", "10 seconds", "5 seconds")); Assert.IsType <Column>(Window(col, "1 minute", "10 seconds")); Assert.IsType <Column>(Window(col, "1 minute")); ////////////////////////////// // Collection Functions ////////////////////////////// Assert.IsType <Column>(ArrayContains(col, 12345)); Assert.IsType <Column>(ArrayContains(col, "str")); Assert.IsType <Column>(Concat()); Assert.IsType <Column>(Concat(col)); Assert.IsType <Column>(Concat(col, col)); Assert.IsType <Column>(Explode(col)); Assert.IsType <Column>(ExplodeOuter(col)); Assert.IsType <Column>(PosExplode(col)); Assert.IsType <Column>(PosExplodeOuter(col)); Assert.IsType <Column>(GetJsonObject(col, "abc.json")); Assert.IsType <Column>(JsonTuple(col, "a")); Assert.IsType <Column>(JsonTuple(col, "a", "b")); var options = new Dictionary <string, string>() { { "hello", "world" } }; Assert.IsType <Column>(FromJson(col, "a Int")); Assert.IsType <Column>(FromJson(col, "a Int", options)); Assert.IsType <Column>(ToJson(col)); Assert.IsType <Column>(ToJson(col, options)); Assert.IsType <Column>(Size(col)); Assert.IsType <Column>(SortArray(col)); Assert.IsType <Column>(SortArray(col, true)); Assert.IsType <Column>(SortArray(col, false)); Assert.IsType <Column>(Reverse(col)); Assert.IsType <Column>(MapKeys(col)); Assert.IsType <Column>(MapValues(col)); ////////////////////////////// // Udf Functions ////////////////////////////// TestUdf(); Assert.IsType <Column>(CallUDF("udf")); Assert.IsType <Column>(CallUDF("udf", col)); Assert.IsType <Column>(CallUDF("udf", col, col)); }