Exemplo n.º 1
0
        /// <summary>
        /// Executes the commands on the input data read from input stream
        /// and writes results to the output stream.
        /// </summary>
        /// <param name="inputStream">Input stream to read data from</param>
        /// <param name="outputStream">Output stream to write results to</param>
        /// <param name="evalType">Evaluation type for the current commands</param>
        /// <param name="commands">Contains the commands to execute</param>
        /// <returns>Statistics captured during the Execute() run</returns>
        internal static CommandExecutorStat Execute(
            Stream inputStream,
            Stream outputStream,
            UdfUtils.PythonEvalType evalType,
            SqlCommand[] commands)
        {
            if (commands.Length <= 0)
            {
                throw new ArgumentException("Commands cannot be empty.");
            }

            if (commands.Any(c =>
                             (c.SerializerMode != CommandSerDe.SerializedMode.Row) ||
                             (c.DeserializerMode != CommandSerDe.SerializedMode.Row)))
            {
                throw new ArgumentException("Unexpected serialization mode found.");
            }

            SqlCommandExecutor executor;

            if (evalType == UdfUtils.PythonEvalType.SQL_SCALAR_PANDAS_UDF)
            {
                executor = new ArrowSqlCommandExecutor();
            }
            else if (evalType == UdfUtils.PythonEvalType.SQL_BATCHED_UDF)
            {
                executor = new PicklingSqlCommandExecutor();
            }
            else
            {
                throw new NotSupportedException($"{evalType} is not supported.");
            }

            return(executor.ExecuteCore(inputStream, outputStream, commands));
        }
Exemplo n.º 2
0
 internal static UserDefinedFunction Create(
     string name,
     byte[] command,
     UdfUtils.PythonEvalType evalType,
     string returnType)
 {
     return(Create(SparkEnvironment.JvmBridge, name, command, evalType, returnType));
 }
Exemplo n.º 3
0
        /// <summary>
        /// Helper function to register wrapped udf.
        /// </summary>
        /// <param name="name">Name of the udf</param>
        /// <param name="func">Wrapped UDF function</param>
        /// <param name="evalType">The EvalType of the function</param>
        /// <param name="returnType">The return type of the function in JSON format</param>
        private void Register(string name, Delegate func, UdfUtils.PythonEvalType evalType, string returnType)
        {
            byte[] command = CommandSerDe.Serialize(
                func,
                CommandSerDe.SerializedMode.Row,
                CommandSerDe.SerializedMode.Row);

            UserDefinedFunction udf = UserDefinedFunction.Create(
                _jvmObject.Jvm,
                name,
                command,
                evalType,
                returnType);

            _jvmObject.Invoke("registerPython", name, udf);
        }
Exemplo n.º 4
0
        /// <summary>
        /// Helper function to register wrapped udf.
        /// </summary>
        /// <typeparam name="TResult">Return type of the udf</typeparam>
        /// <param name="name">Name of the udf</param>
        /// <param name="func">Wrapped UDF function</param>
        /// <param name="evalType">The EvalType of the function.</param>
        internal void Register <TResult>(string name, Delegate func, UdfUtils.PythonEvalType evalType)
        {
            byte[] command = CommandSerDe.Serialize(
                func,
                CommandSerDe.SerializedMode.Row,
                CommandSerDe.SerializedMode.Row);

            var udf = UserDefinedFunction.Create(
                _jvmObject.Jvm,
                name,
                command,
                evalType,
                UdfUtils.GetReturnType(typeof(TResult)));

            _jvmObject.Invoke("registerPython", name, udf);
        }
Exemplo n.º 5
0
 internal static UserDefinedFunction Create(
     IJvmBridge jvm,
     string name,
     byte[] command,
     UdfUtils.PythonEvalType evalType,
     string returnType)
 {
     return(new UserDefinedFunction(
                jvm.CallConstructor(
                    "org.apache.spark.sql.execution.python.UserDefinedPythonFunction",
                    name,
                    UdfUtils.CreatePythonFunction(jvm, command),
                    DataType.FromJson(jvm, returnType),
                    (int)evalType,
                    true // udfDeterministic
                    )));
 }
Exemplo n.º 6
0
        /// <summary>
        /// Helper function to register wrapped udf.
        /// </summary>
        /// <typeparam name="TResult">Return type of the udf</typeparam>
        /// <param name="name">Name of the udf</param>
        /// <param name="func">Wrapped UDF function</param>
        /// <param name="evalType">The EvalType of the function.</param>
        internal void Register <TResult>(string name, Delegate func, UdfUtils.PythonEvalType evalType)
        {
            byte[] command = CommandSerDe.Serialize(
                func,
                CommandSerDe.SerializedMode.Row,
                CommandSerDe.SerializedMode.Row);

            JvmObjectReference pythonFunction =
                UdfUtils.CreatePythonFunction(_jvmObject.Jvm, command);

            var udf = new UserDefinedFunction(
                _jvmObject.Jvm.CallConstructor(
                    "org.apache.spark.sql.execution.python.UserDefinedPythonFunction",
                    name,
                    pythonFunction,
                    GetDataType <TResult>(),
                    (int)evalType,
                    true // udfDeterministic
                    ));

            _jvmObject.Invoke("registerPython", name, udf);
        }
Exemplo n.º 7
0
        internal static UserDefinedFunction Create(
            string name,
            byte[] command,
            UdfUtils.PythonEvalType evalType,
            string returnType)
        {
            IJvmBridge jvm = SparkEnvironment.JvmBridge;

            JvmObjectReference hashTableReference = jvm.CallConstructor("java.util.Hashtable");
            JvmObjectReference arrayListReference = jvm.CallConstructor("java.util.ArrayList");

            var dataType = (JvmObjectReference)jvm.CallStaticJavaMethod(
                "org.apache.spark.sql.types.DataType",
                "fromJson",
                $"\"{returnType}\"");

            var pythonFunction = (JvmObjectReference)jvm.CallStaticJavaMethod(
                "org.apache.spark.sql.api.dotnet.SQLUtils",
                "createPythonFunction",
                command,
                hashTableReference, // Environment variables
                arrayListReference, // Python includes
                SparkEnvironment.ConfigurationService.GetWorkerExePath(),
                "1.0",
                arrayListReference, // Broadcast variables
                null);              // Accumulator

            return(new UserDefinedFunction(
                       jvm.CallConstructor(
                           "org.apache.spark.sql.execution.python.UserDefinedPythonFunction",
                           name,
                           pythonFunction,
                           dataType,
                           (int)evalType,
                           true // udfDeterministic
                           )));
        }
Exemplo n.º 8
0
        internal static UserDefinedFunction Create(
            IJvmBridge jvm,
            string name,
            byte[] command,
            UdfUtils.PythonEvalType evalType,
            string returnType)
        {
            var pythonFunction = UdfUtils.CreatePythonFunction(jvm, command);

            var dataType = (JvmObjectReference)jvm.CallStaticJavaMethod(
                "org.apache.spark.sql.types.DataType",
                "fromJson",
                $"{returnType}");

            return(new UserDefinedFunction(
                       jvm.CallConstructor(
                           "org.apache.spark.sql.execution.python.UserDefinedPythonFunction",
                           name,
                           pythonFunction,
                           dataType,
                           (int)evalType,
                           true // udfDeterministic
                           )));
        }
Exemplo n.º 9
0
 /// <summary>
 /// Helper function to register wrapped udf.
 /// </summary>
 /// <typeparam name="TResult">Return type of the udf</typeparam>
 /// <param name="name">Name of the udf</param>
 /// <param name="func">Wrapped UDF function</param>
 /// <param name="evalType">The EvalType of the function</param>
 internal void Register <TResult>(string name, Delegate func, UdfUtils.PythonEvalType evalType)
 {
     Register(name, func, evalType, UdfUtils.GetReturnType(typeof(TResult)));
 }