示例#1
0
        public void TestCommandSerDeForSqlPickling()
        {
            var udfWrapper     = new Sql.PicklingUdfWrapper <string, string>((str) => $"hello {str}");
            var workerFunction = new Sql.PicklingWorkerFunction(udfWrapper.Execute);

            var serializedCommand = Utils.CommandSerDe.Serialize(
                workerFunction.Func,
                Utils.CommandSerDe.SerializedMode.Row,
                Utils.CommandSerDe.SerializedMode.Row);

            using (var ms = new MemoryStream(serializedCommand))
            {
                var deserializedWorkerFunction = new Sql.PicklingWorkerFunction(
                    Utils.CommandSerDe.Deserialize <Sql.PicklingWorkerFunction.ExecuteDelegate>(
                        ms,
                        out Utils.CommandSerDe.SerializedMode serializerMode,
                        out Utils.CommandSerDe.SerializedMode deserializerMode,
                        out var runMode));

                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode);
                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode);
                Assert.Equal("N", runMode);

                var result = deserializedWorkerFunction.Func(0, new[] { "spark" }, new[] { 0 });
                Assert.Equal("hello spark", result);
            }
        }
        public void TestPicklingSqlCommandExecutorWithEmptyInput(
            Version sparkVersion,
            IpcOptions ipcOptions)
        {
            _ = ipcOptions;
            var udfWrapper = new Sql.PicklingUdfWrapper <string, string>((str) => $"udf: {str}");
            var command    = new SqlCommand()
            {
                ArgOffsets          = new[] { 0 },
                NumChainedFunctions = 1,
                WorkerFunction      = new Sql.PicklingWorkerFunction(udfWrapper.Execute),
                SerializerMode      = CommandSerDe.SerializedMode.Row,
                DeserializerMode    = CommandSerDe.SerializedMode.Row
            };

            var commandPayload = new Worker.CommandPayload()
            {
                EvalType = UdfUtils.PythonEvalType.SQL_BATCHED_UDF,
                Commands = new[] { command }
            };

            using var inputStream  = new MemoryStream();
            using var outputStream = new MemoryStream();
            // Write test data to the input stream. For the empty input scenario,
            // only send SpecialLengths.END_OF_DATA_SECTION.
            SerDe.Write(inputStream, (int)SpecialLengths.END_OF_DATA_SECTION);
            inputStream.Seek(0, SeekOrigin.Begin);

            CommandExecutorStat stat = new CommandExecutor(sparkVersion).Execute(
                inputStream,
                outputStream,
                0,
                commandPayload);

            // Validate that all the data on the stream is read.
            Assert.Equal(inputStream.Length, inputStream.Position);
            Assert.Equal(0, stat.NumEntriesProcessed);

            // Validate the output stream.
            Assert.Equal(0, outputStream.Length);
        }
示例#3
0
        public void TestPicklingSqlCommandExecutorWithMultiCommands()
        {
            var udfWrapper1 = new Sql.PicklingUdfWrapper <string, string>((str) => $"udf: {str}");
            var udfWrapper2 = new Sql.PicklingUdfWrapper <int, int, int>(
                (arg1, arg2) => arg1 * arg2);

            var command1 = new SqlCommand()
            {
                ArgOffsets          = new[] { 0 },
                NumChainedFunctions = 1,
                WorkerFunction      = new Sql.PicklingWorkerFunction(udfWrapper1.Execute),
                SerializerMode      = CommandSerDe.SerializedMode.Row,
                DeserializerMode    = CommandSerDe.SerializedMode.Row
            };

            var command2 = new SqlCommand()
            {
                ArgOffsets          = new[] { 1, 2 },
                NumChainedFunctions = 1,
                WorkerFunction      = new Sql.PicklingWorkerFunction(udfWrapper2.Execute),
                SerializerMode      = CommandSerDe.SerializedMode.Row,
                DeserializerMode    = CommandSerDe.SerializedMode.Row
            };

            var commandPayload = new Worker.CommandPayload()
            {
                EvalType = UdfUtils.PythonEvalType.SQL_BATCHED_UDF,
                Commands = new[] { command1, command2 }
            };

            using (var inputStream = new MemoryStream())
                using (var outputStream = new MemoryStream())
                {
                    int numRows = 10;

                    // Write test data to the input stream.
                    var pickler = new Pickler();
                    for (int i = 0; i < numRows; ++i)
                    {
                        byte[] pickled = pickler.dumps(
                            new[] { new object[] { i.ToString(), i, i } });
                        SerDe.Write(inputStream, pickled.Length);
                        SerDe.Write(inputStream, pickled);
                    }
                    SerDe.Write(inputStream, (int)SpecialLengths.END_OF_DATA_SECTION);
                    inputStream.Seek(0, SeekOrigin.Begin);

                    CommandExecutorStat stat = new CommandExecutor().Execute(
                        inputStream,
                        outputStream,
                        0,
                        commandPayload);

                    // Validate all the data on the stream is read.
                    Assert.Equal(inputStream.Length, inputStream.Position);
                    Assert.Equal(10, stat.NumEntriesProcessed);

                    // Validate the output stream.
                    outputStream.Seek(0, SeekOrigin.Begin);
                    var unpickler = new Unpickler();

                    // One row was written as a batch above, thus need to read 'numRows' batches.
                    List <object[]> rows = new List <object[]>();
                    for (int i = 0; i < numRows; ++i)
                    {
                        int    length       = SerDe.ReadInt32(outputStream);
                        byte[] pickledBytes = SerDe.ReadBytes(outputStream, length);
                        rows.Add((unpickler.loads(pickledBytes) as ArrayList)[0] as object[]);
                    }

                    Assert.Equal(numRows, rows.Count);

                    for (int i = 0; i < numRows; ++i)
                    {
                        // There were two UDFs each of which produces one column.
                        object[] columns = rows[i];
                        Assert.Equal($"udf: {i}", (string)columns[0]);
                        Assert.Equal(i * i, (int)columns[1]);
                    }

                    // Validate all the data on the stream is read.
                    Assert.Equal(outputStream.Length, outputStream.Position);
                }
        }