/// <summary> /// Maps each group of the current DataFrame using a UDF and /// returns the result as a DataFrame. /// /// The user-defined function should take an Apache Arrow RecordBatch /// and return another Apache Arrow RecordBatch. For each group, all /// columns are passed together as a RecordBatch to the user-function and /// the returned RecordBatch are combined as a DataFrame. /// /// The returned <see cref="RecordBatch"/> can be of arbitrary length and its /// schema must match <paramref name="returnType"/>. /// </summary> /// <param name="returnType"> /// The <see cref="StructType"/> that represents the shape of the return data set. /// </param> /// <param name="func">A grouped map user-defined function.</param> /// <returns>New DataFrame object with the UDF applied.</returns> public DataFrame Apply(StructType returnType, Func <RecordBatch, RecordBatch> func) { ArrowGroupedMapWorkerFunction.ExecuteDelegate wrapper = new ArrowGroupedMapUdfWrapper(func).Execute; UserDefinedFunction udf = UserDefinedFunction.Create( Reference.Jvm, func.Method.ToString(), CommandSerDe.Serialize( wrapper, CommandSerDe.SerializedMode.Row, CommandSerDe.SerializedMode.Row), UdfUtils.PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF, returnType.Json); IReadOnlyList <string> columnNames = _dataFrame.Columns(); var columns = new Column[columnNames.Count]; for (int i = 0; i < columnNames.Count; ++i) { columns[i] = _dataFrame[columnNames[i]]; } Column udfColumn = udf.Apply(columns); return(new DataFrame((JvmObjectReference)Reference.Invoke( "flatMapGroupsInPandas", udfColumn.Expr()))); }