static void Main(string[] args) { var spark = SparkSession.Builder().GetOrCreate(); var dataFrame = spark.Sql("SELECT ID FROM range(1000)"); //Set shared state to some random number, this will be ignored. AddAmount = 991923; //Call using Arrow var addUdf = VectorUdf <Int64DataFrameColumn, Int64DataFrameColumn>((id) => Add100(id)); dataFrame.Select(dataFrame["ID"], addUdf(dataFrame["ID"])).Show(); //Call using Pickling var addUdfPickle = Udf <int, int>(id => Add100Pickle(id)); dataFrame.Select(dataFrame["ID"], addUdfPickle(dataFrame["ID"])).Show(); //Call using Pickling with anonymous lambda function dataFrame.Select(dataFrame["ID"], Udf <int, int>(p => p + AddAmount)(dataFrame["ID"])).Show(); }
private void TestUdf() { // Test Udf with different number of arguments. Column col = Udf(() => 1)(); col = Udf <int, int>((a1) => 1)(col); col = Udf <int, int, int>((a1, a2) => 1)(col, col); col = Udf <int, int, int, int>((a1, a2, a3) => 1)(col, col, col); col = Udf <int, int, int, int, int>((a1, a2, a3, a4) => 1)(col, col, col, col); col = Udf <int, int, int, int, int, int>( (a1, a2, a3, a4, a5) => 1)(col, col, col, col, col); col = Udf <int, int, int, int, int, int, int>( (a1, a2, a3, a4, a5, a6) => 1)(col, col, col, col, col, col); col = Udf <int, int, int, int, int, int, int, int>( (a1, a2, a3, a4, a5, a6, a7) => 1)(col, col, col, col, col, col, col); col = Udf <int, int, int, int, int, int, int, int, int>( (a1, a2, a3, a4, a5, a6, a7, a8) => 1)(col, col, col, col, col, col, col, col); col = Udf <int, int, int, int, int, int, int, int, int, int>( (a1, a2, a3, a4, a5, a6, a7, a8, a9) => 1)( col, col, col, col, col, col, col, col, col); col = Udf <int, int, int, int, int, int, int, int, int, int, int>( (a1, a2, a3, a4, a5, a6, a7, a8, a9, a10) => 1)( col, col, col, col, col, col, col, col, col, col); // Test various retun types of Udf. // Test simple types. Udf <string, string>((arg) => arg); Udf <byte[], byte[]>((arg) => arg); Udf <bool, bool>((arg) => arg); Udf <decimal, decimal>((arg) => arg); Udf <double, double>((arg) => arg); Udf <float, float>((arg) => arg); Udf <byte, byte>((arg) => arg); Udf <int, int>((arg) => arg); Udf <long, long>((arg) => arg); Udf <short, short>((arg) => arg); Udf <Date, Date>((arg) => arg); Udf <Timestamp, Timestamp>((arg) => arg); // Test array type. Udf <string, string[]>((arg) => new[] { arg }); Udf <string, IEnumerable <string> >((arg) => new[] { arg }); Udf <string, IEnumerable <IEnumerable <string> > >((arg) => new[] { new[] { arg } }); // Test map type. Udf <string, Dictionary <string, string> >( (arg) => new Dictionary <string, string> { { arg, arg } }); Udf <string, IDictionary <string, string> >( (arg) => new Dictionary <string, string> { { arg, arg } }); Udf <string, IDictionary <string, string[]> >( (arg) => new Dictionary <string, string[]> { { arg, new[] { arg } } }); }
/// <summary> /// Represents a column in the select statement. /// </summary> /// <param name="arg">Is an UDF function.</param> public Column(Udf arg) : this(arg as Chainer) { SetArgType(arg); }
internal ScalarArgument(Udf arg) : this(arg as Chainer) { SetArgType(arg); }
internal VariableArgument(Udf arg) : this(arg as Chainer) { SetArgType(arg); }
internal Table(Udf arg) : this(arg as Chainer) { SetArgType(arg); }
internal FunctionArgument(Udf arg) : this(arg as Chainer) { SetArgType(arg); }
public void TestSignaturesV2_3_X() { ////////////////////////////// // Basic Functions ////////////////////////////// Column col = Column("col1"); col = Col("col2"); col = Lit(1); col = Lit("some column"); col = Lit(col); ////////////////////////////// // Sort Functions ////////////////////////////// col = Asc("col"); col = AscNullsFirst("col"); col = AscNullsLast("col"); col = Desc("col"); col = DescNullsFirst("col"); col = DescNullsLast("col"); ////////////////////////////// // Aggregate Functions ////////////////////////////// col = Column("col"); col = ApproxCountDistinct(col); col = ApproxCountDistinct("col"); col = ApproxCountDistinct(col, 0.05); col = ApproxCountDistinct("col", 0.05); col = Avg(col); col = Avg("col"); col = CollectList(col); col = CollectList("col"); col = CollectSet(col); col = CollectSet("col"); col = Corr(col, col); col = Corr("col1", "col2"); col = Count(col); col = Count("col"); col = CountDistinct(col); col = CountDistinct(col, col); col = CountDistinct(col, col, col); col = CountDistinct("col1"); col = CountDistinct("col1", "col2"); col = CountDistinct("col1", "col2", "col3"); col = CovarPop(col, col); col = CovarPop("col1", "col2"); col = CovarSamp(col, col); col = CovarSamp("col1", "col2"); col = First(col); col = First(col, true); col = First(col, false); col = First("col"); col = First("col", true); col = First("col", false); col = Grouping(col); col = Grouping("col"); col = GroupingId(); col = GroupingId(col); col = GroupingId(col, col); col = GroupingId("col1"); col = GroupingId("col1", "col2"); col = GroupingId("col1", "col2", "col3"); col = Kurtosis(col); col = Kurtosis("col"); col = Last(col); col = Last(col, true); col = Last(col, false); col = Last("col"); col = Last("col", true); col = Last("col", false); col = Max(col); col = Max("col"); col = Mean(col); col = Mean("col"); col = Min(col); col = Min("col"); col = Skewness(col); col = Skewness("col"); col = Stddev(col); col = Stddev("col"); col = StddevSamp(col); col = StddevSamp("col"); col = StddevPop(col); col = StddevPop("col"); col = Sum(col); col = Sum("col"); col = SumDistinct(col); col = SumDistinct("col"); col = Variance(col); col = Variance("col"); col = VarSamp(col); col = VarSamp("col"); col = VarPop(col); col = VarPop("col"); ////////////////////////////// // Window Functions ////////////////////////////// col = UnboundedPreceding(); col = UnboundedFollowing(); col = CurrentRow(); col = CumeDist(); col = DenseRank(); col = Lag(col, 0); col = Lag(col, 2, "col2"); col = Lag("col", 0); col = Lag("col", 2, "col2"); col = Lead(col, 0); col = Lead(col, 2, "col2"); col = Lead("col", 0); col = Lead("col", 2, "col2"); col = Ntile(100); col = PercentRank(); col = Rank(); col = RowNumber(); ////////////////////////////// // Non-Aggregate Functions ////////////////////////////// col = Column("col"); col = Abs(col); col = Array(); col = Array(col); col = Array(col, col); col = Array("col1"); col = Array("col1", "col2"); col = Array("col1", "col2", "col3"); col = Map(); col = Map(col); col = Map(col, col); DataFrame df = _spark .Read() .Json(TestEnvironment.ResourceDirectory + "people.json"); df = Broadcast(df); col = Coalesce(); col = Coalesce(col); col = Coalesce(col, col); col = InputFileName(); col = IsNaN(col); col = IsNull(col); col = MonotonicallyIncreasingId(); col = NaNvl(col, col); col = Negate(col); col = Not(col); col = Rand(12345); col = Rand(); col = Randn(12345); col = Randn(); col = SparkPartitionId(); col = Sqrt(col); col = Sqrt("col"); col = Struct(); col = Struct(col); col = Struct(col, col); col = Struct("col1"); col = Struct("col1", "col2"); col = Struct("col1", "col2", "col3"); col = When(col, col); col = When(col, "col"); col = When(col, 12345); col = BitwiseNOT(col); col = Expr("expr"); ////////////////////////////// // Math Functions ////////////////////////////// col = Column("col"); col = Acos(col); col = Acos("col"); col = Asin(col); col = Asin("col"); col = Atan(col); col = Atan("col"); col = Atan2(col, col); col = Atan2(col, "x"); col = Atan2("y", col); col = Atan2("y", "x"); col = Atan2(col, 0.5); col = Atan2("y", 0.5); col = Atan2(0.5, col); col = Atan2(0.5, "x"); col = Bin(col); col = Bin("col"); col = Cbrt(col); col = Cbrt("col"); col = Ceil(col); col = Ceil("col"); col = Conv(col, 2, 10); col = Cos(col); col = Cos("col"); col = Cosh(col); col = Cosh("col"); col = Exp(col); col = Exp("col"); col = Expm1(col); col = Expm1("col"); col = Factorial(col); col = Floor(col); col = Floor("col"); col = Greatest(); col = Greatest(col); col = Greatest(col, col); col = Greatest("col1"); col = Greatest("col1", "col2"); col = Greatest("col1", "col2", "col3"); col = Hex(col); col = Unhex(col); col = Hypot(col, col); col = Hypot(col, "right"); col = Hypot("left", col); col = Hypot("left", "right"); col = Hypot(col, 0.5); col = Hypot("left", 0.5); col = Hypot(0.5, col); col = Hypot(0.5, "right"); col = Least(); col = Least(col); col = Least(col, col); col = Least("col1"); col = Least("col1", "col2"); col = Least("col1", "col2", "col3"); col = Log(col); col = Log("col"); col = Log(2.0, col); col = Log(2.0, "col"); col = Log10(col); col = Log10("col"); col = Log1p(col); col = Log1p("col"); col = Log2(col); col = Log2("col"); col = Pow(col, col); col = Pow(col, "right"); col = Pow("left", col); col = Pow("left", "right"); col = Pow(col, 0.5); col = Pow("left", 0.5); col = Pow(0.5, col); col = Pow(0.5, "right"); col = Pmod(col, col); col = Rint(col); col = Rint("col"); col = Round(col); col = Round(col, 10); col = Bround(col); col = Bround(col, 10); col = ShiftLeft(col, 4); col = ShiftRight(col, 4); col = ShiftRightUnsigned(col, 4); col = Signum(col); col = Signum("col"); col = Sin(col); col = Sin("col"); col = Sinh(col); col = Sinh("col"); col = Tan(col); col = Tan("col"); col = Tanh(col); col = Tanh("col"); col = Degrees(col); col = Degrees("col"); col = Radians(col); col = Radians("col"); ////////////////////////////// // Miscellaneous Functions ////////////////////////////// col = Md5(col); col = Sha1(col); col = Sha2(col, 224); col = Crc32(col); col = Hash(); col = Hash(col); col = Hash(col, col); ////////////////////////////// // String Functions ////////////////////////////// col = Ascii(col); col = Base64(col); col = ConcatWs(";"); col = ConcatWs(";", col); col = ConcatWs(";", col, col); col = Decode(col, "UTF-8"); col = Encode(col, "UTF-8"); col = FormatNumber(col, 2); col = FormatString("%s %d"); col = FormatString("%s %d", col); col = FormatString("%s %d", col, col); col = InitCap(col); col = Instr(col, "abc"); col = Length(col); col = Lower(col); col = Levenshtein(col, col); col = Locate("abc", col); col = Locate("abc", col, 3); col = Lpad(col, 3, "pad"); col = Ltrim(col); col = Ltrim(col, "\n"); col = RegexpExtract(col, "[a-z]", 0); col = RegexpReplace(col, "[a-z]", "hello"); col = RegexpReplace(col, col, col); col = Unbase64(col); col = Rpad(col, 3, "pad"); col = Repeat(col, 3); col = Rtrim(col); col = Rtrim(col, "\n"); col = Soundex(col); col = Split(col, "\t"); col = Substring(col, 0, 5); col = SubstringIndex(col, ";", 5); col = Translate(col, "abc", "edf"); col = Trim(col); col = Trim(col, "\n"); col = Upper(col); ////////////////////////////// // DateTime Functions ////////////////////////////// col = AddMonths(col, 3); col = CurrentDate(); col = CurrentTimestamp(); col = DateFormat(col, "format"); col = DateAdd(col, 5); col = DateSub(col, 5); col = DateDiff(col, col); col = Year(col); col = Quarter(col); col = Month(col); col = DayOfWeek(col); col = DayOfMonth(col); col = DayOfYear(col); col = Hour(col); col = LastDay(col); col = Minute(col); col = MonthsBetween(col, col); col = NextDay(col, "Mon"); col = Second(col); col = WeekOfYear(col); col = FromUnixTime(col); col = FromUnixTime(col, "yyyy-MM-dd HH:mm:ss"); col = UnixTimestamp(); col = UnixTimestamp(col); col = UnixTimestamp(col, "yyyy-MM-dd HH:mm:ss"); col = ToTimestamp(col); col = ToTimestamp(col, "yyyy-MM-dd HH:mm:ss"); col = ToDate(col); col = ToDate(col, "yyyy-MM-dd HH:mm:ss"); col = Trunc(col, "yyyy"); col = DateTrunc("mon", col); col = FromUtcTimestamp(col, "GMT+1"); col = ToUtcTimestamp(col, "GMT+1"); col = Window(col, "1 minute", "10 seconds"); col = Window(col, "1 minute", "10 seconds", "5 seconds"); col = Window(col, "1 minute"); ////////////////////////////// // Collection Functions ////////////////////////////// col = ArrayContains(col, 12345); col = ArrayContains(col, "str"); col = Concat(); col = Concat(col); col = Concat(col, col); col = Explode(col); col = ExplodeOuter(col); col = PosExplode(col); col = PosExplodeOuter(col); col = GetJsonObject(col, "abc.json"); col = JsonTuple(col, "a"); col = JsonTuple(col, "a", "b"); var options = new Dictionary <string, string>() { { "hello", "world" } }; col = FromJson(col, "a Int"); col = FromJson(col, "a Int", options); col = ToJson(col); col = ToJson(col, options); col = Size(col); col = SortArray(col); col = SortArray(col, true); col = SortArray(col, false); col = Reverse(col); col = MapKeys(col); col = MapValues(col); ////////////////////////////// // Udf Functions ////////////////////////////// col = Udf(() => 1)(); col = Udf <int, int>((a1) => 1)(col); col = Udf <int, int, int>((a1, a2) => 1)(col, col); col = Udf <int, int, int, int>((a1, a2, a3) => 1)(col, col, col); col = Udf <int, int, int, int, int>((a1, a2, a3, a4) => 1)(col, col, col, col); col = Udf <int, int, int, int, int, int>( (a1, a2, a3, a4, a5) => 1)(col, col, col, col, col); col = Udf <int, int, int, int, int, int, int>( (a1, a2, a3, a4, a5, a6) => 1)(col, col, col, col, col, col); col = Udf <int, int, int, int, int, int, int, int>( (a1, a2, a3, a4, a5, a6, a7) => 1)(col, col, col, col, col, col, col); col = Udf <int, int, int, int, int, int, int, int, int>( (a1, a2, a3, a4, a5, a6, a7, a8) => 1)(col, col, col, col, col, col, col, col); col = Udf <int, int, int, int, int, int, int, int, int, int>( (a1, a2, a3, a4, a5, a6, a7, a8, a9) => 1)( col, col, col, col, col, col, col, col, col); col = Udf <int, int, int, int, int, int, int, int, int, int, int>( (a1, a2, a3, a4, a5, a6, a7, a8, a9, a10) => 1)( col, col, col, col, col, col, col, col, col, col); col = CallUDF("udf"); col = CallUDF("udf", col); col = CallUDF("udf", col, col); }
internal OrderingArgument(Udf arg) : this(arg as Chainer) { SetArgType(arg); }
internal AggregateArgument(Udf arg) : this(arg as Chainer) { SetArgType(arg); }
internal GroupingArgument(Udf arg) : this(arg as Chainer) { SetArgType(arg); }
/// <summary> /// Specifies that the values in the specified column should be sorted in ascending order. ASC is the default sort order. Null values are treated as the lowest possible values. /// </summary> /// <param name="column">Is a column expression of a scalar user-defined function on which to sort the query result set.</param> public static OrderedChainer AsAsc(this Udf column) { return(new OrderedChainer(column, SortOrder.Asc)); }
internal ValueScalarArgument(Udf arg) : base(arg) { }
internal NumericArgument(Udf arg) : this(arg as Chainer) { SetArgType(arg); }
/// <summary> /// Defines the column collation. /// </summary> /// <param name="column">A column.</param> /// <param name="collation">A collation name.</param> public static CollateChainer Collate(this Udf column, string collation = "DATABASE_DEFAULT") { return(new CollateChainer(column, collation)); }