Exemplo n.º 1
0
        /// <summary>
        /// Read SqlCommands from the stream based on the given version.
        /// </summary>
        /// <param name="evalType">Evaluation type for the current commands</param>
        /// <param name="stream">Stream to read from</param>
        /// <param name="version">Spark version</param>
        /// <returns>SqlCommand objects</returns>
        private static SqlCommand[] ReadSqlCommands(
            PythonEvalType evalType,
            Stream stream,
            Version version)
        {
            if ((evalType != PythonEvalType.SQL_BATCHED_UDF) &&
                (evalType != PythonEvalType.SQL_SCALAR_PANDAS_UDF) &&
                (evalType != PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF))
            {
                throw new NotImplementedException($"{evalType} is not supported.");
            }

            if (version.Major == 2)
            {
                switch (version.Minor)
                {
                case 3:
                    return(SqlCommandProcessorV2_3_X.Process(evalType, stream));

                case 4:
                    return(SqlCommandProcessorV2_4_X.Process(evalType, stream));
                }
            }

            throw new NotSupportedException($"Spark {version} not supported.");
        }
Exemplo n.º 2
0
            internal static SqlCommand[] Process(PythonEvalType evalType, Stream stream)
            {
                SqlCommand[] sqlCommands = ReadSqlCommands(evalType, stream);

                if ((evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF) ||
                    (evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF))
                {
                    // Reads the timezone information. This is not going to be used until
                    // timestamp column is supported in Arrow.
                    SerDe.ReadString(stream);
                }

                return(sqlCommands);
            }
Exemplo n.º 3
0
        /// <summary>
        /// Read SqlCommands from the stream based on the given version.
        /// </summary>
        /// <param name="evalType">Evaluation type for the current commands</param>
        /// <param name="stream">Stream to read from</param>
        /// <param name="version">Spark version</param>
        /// <returns>SqlCommand objects</returns>
        private static SqlCommand[] ReadSqlCommands(
            PythonEvalType evalType,
            Stream stream,
            Version version)
        {
            if ((evalType != PythonEvalType.SQL_BATCHED_UDF) &&
                (evalType != PythonEvalType.SQL_SCALAR_PANDAS_UDF) &&
                (evalType != PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF))
            {
                throw new NotImplementedException($"{evalType} is not supported.");
            }

            return((version.Major, version.Minor) switch
            {
                (2, 4) => SqlCommandProcessorV2_4_X.Process(evalType, stream),
                (3, _) => SqlCommandProcessorV2_4_X.Process(evalType, stream),
                _ => throw new NotSupportedException($"Spark {version} not supported.")
            });
Exemplo n.º 4
0
            internal static SqlCommand[] Process(PythonEvalType evalType, Stream stream)
            {
                if (evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF ||
                    evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF ||
                    evalType == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF ||
                    evalType == PythonEvalType.SQL_WINDOW_AGG_PANDAS_UDF)
                {
                    int numConf = SerDe.ReadInt32(stream);
                    for (int i = 0; i < numConf; ++i)
                    {
                        // Currently this setting is not used.
                        // When Arrow supports timestamp type, "spark.sql.session.timeZone"
                        // can be retrieved from here.
                        SerDe.ReadString(stream);
                        SerDe.ReadString(stream);
                    }
                }

                return(ReadSqlCommands(evalType, stream));
            }
Exemplo n.º 5
0
        /// <summary>
        /// Read SqlCommands from the stream.
        /// </summary>
        /// <param name="stream">Stream to read from</param>
        /// <param name="evalType">Evaluation type for the current commands</param>
        /// <returns>SqlCommand objects</returns>
        private static SqlCommand[] ReadSqlCommands(
            PythonEvalType evalType,
            Stream stream)
        {
            int numUdfs  = SerDe.ReadInt32(stream);
            var commands = new SqlCommand[numUdfs];

            for (int i = 0; i < numUdfs; ++i)
            {
                var command = new SqlCommand();

                int numArgsOffsets = SerDe.ReadInt32(stream);
                command.ArgOffsets = new int[numArgsOffsets];
                for (int argIndex = 0; argIndex < numArgsOffsets; ++argIndex)
                {
                    command.ArgOffsets[argIndex] = SerDe.ReadInt32(stream);
                }

                command.NumChainedFunctions = SerDe.ReadInt32(stream);
                for (int funcIndex = 0; funcIndex < command.NumChainedFunctions; ++funcIndex)
                {
                    int commandBytesCount = SerDe.ReadInt32(stream);
                    if (commandBytesCount > 0)
                    {
                        CommandSerDe.SerializedMode serializerMode;
                        CommandSerDe.SerializedMode deserializerMode;
                        if (evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF)
                        {
                            var curWorkerFunction = new ArrowWorkerFunction(
                                CommandSerDe.Deserialize <ArrowWorkerFunction.ExecuteDelegate>(
                                    stream,
                                    out serializerMode,
                                    out deserializerMode,
                                    out string runMode));

                            command.WorkerFunction = (command.WorkerFunction == null) ?
                                                     curWorkerFunction :
                                                     ArrowWorkerFunction.Chain(
                                (ArrowWorkerFunction)command.WorkerFunction,
                                curWorkerFunction);
                        }
                        else if (evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
                        {
                            if ((numUdfs != 1) || (command.WorkerFunction != null))
                            {
                                throw new InvalidDataException(
                                          "Grouped map UDFs do not support combining multiple UDFs");
                            }

                            command.WorkerFunction = new ArrowGroupedMapWorkerFunction(
                                CommandSerDe.Deserialize <ArrowGroupedMapWorkerFunction.ExecuteDelegate>(
                                    stream,
                                    out serializerMode,
                                    out deserializerMode,
                                    out string runMode));
                        }
                        else
                        {
                            var curWorkerFunction = new PicklingWorkerFunction(
                                CommandSerDe.Deserialize <PicklingWorkerFunction.ExecuteDelegate>(
                                    stream,
                                    out serializerMode,
                                    out deserializerMode,
                                    out string runMode));

                            command.WorkerFunction = (command.WorkerFunction == null) ?
                                                     curWorkerFunction :
                                                     PicklingWorkerFunction.Chain(
                                (PicklingWorkerFunction)command.WorkerFunction,
                                curWorkerFunction);
                        }

                        command.SerializerMode   = serializerMode;
                        command.DeserializerMode = deserializerMode;
                    }
                    else
                    {
                        throw new InvalidDataException(
                                  $"Invalid command size: {commandBytesCount}");
                    }
                }

                commands[i] = command;
            }

            return(commands);
        }