public void TestCommandSerDeForSqlPickling() { var udfWrapper = new Sql.PicklingUdfWrapper <string, string>((str) => $"hello {str}"); var workerFunction = new Sql.PicklingWorkerFunction(udfWrapper.Execute); var serializedCommand = Utils.CommandSerDe.Serialize( workerFunction.Func, Utils.CommandSerDe.SerializedMode.Row, Utils.CommandSerDe.SerializedMode.Row); using (var ms = new MemoryStream(serializedCommand)) { var deserializedWorkerFunction = new Sql.PicklingWorkerFunction( Utils.CommandSerDe.Deserialize <Sql.PicklingWorkerFunction.ExecuteDelegate>( ms, out Utils.CommandSerDe.SerializedMode serializerMode, out Utils.CommandSerDe.SerializedMode deserializerMode, out var runMode)); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode); Assert.Equal("N", runMode); var result = deserializedWorkerFunction.Func(0, new[] { "spark" }, new[] { 0 }); Assert.Equal("hello spark", result); } }
public void TestPicklingSqlCommandExecutorWithEmptyInput( Version sparkVersion, IpcOptions ipcOptions) { _ = ipcOptions; var udfWrapper = new Sql.PicklingUdfWrapper <string, string>((str) => $"udf: {str}"); var command = new SqlCommand() { ArgOffsets = new[] { 0 }, NumChainedFunctions = 1, WorkerFunction = new Sql.PicklingWorkerFunction(udfWrapper.Execute), SerializerMode = CommandSerDe.SerializedMode.Row, DeserializerMode = CommandSerDe.SerializedMode.Row }; var commandPayload = new Worker.CommandPayload() { EvalType = UdfUtils.PythonEvalType.SQL_BATCHED_UDF, Commands = new[] { command } }; using var inputStream = new MemoryStream(); using var outputStream = new MemoryStream(); // Write test data to the input stream. For the empty input scenario, // only send SpecialLengths.END_OF_DATA_SECTION. SerDe.Write(inputStream, (int)SpecialLengths.END_OF_DATA_SECTION); inputStream.Seek(0, SeekOrigin.Begin); CommandExecutorStat stat = new CommandExecutor(sparkVersion).Execute( inputStream, outputStream, 0, commandPayload); // Validate that all the data on the stream is read. Assert.Equal(inputStream.Length, inputStream.Position); Assert.Equal(0, stat.NumEntriesProcessed); // Validate the output stream. Assert.Equal(0, outputStream.Length); }
public void TestPicklingSqlCommandExecutorWithMultiCommands() { var udfWrapper1 = new Sql.PicklingUdfWrapper <string, string>((str) => $"udf: {str}"); var udfWrapper2 = new Sql.PicklingUdfWrapper <int, int, int>( (arg1, arg2) => arg1 * arg2); var command1 = new SqlCommand() { ArgOffsets = new[] { 0 }, NumChainedFunctions = 1, WorkerFunction = new Sql.PicklingWorkerFunction(udfWrapper1.Execute), SerializerMode = CommandSerDe.SerializedMode.Row, DeserializerMode = CommandSerDe.SerializedMode.Row }; var command2 = new SqlCommand() { ArgOffsets = new[] { 1, 2 }, NumChainedFunctions = 1, WorkerFunction = new Sql.PicklingWorkerFunction(udfWrapper2.Execute), SerializerMode = CommandSerDe.SerializedMode.Row, DeserializerMode = CommandSerDe.SerializedMode.Row }; var commandPayload = new Worker.CommandPayload() { EvalType = UdfUtils.PythonEvalType.SQL_BATCHED_UDF, Commands = new[] { command1, command2 } }; using (var inputStream = new MemoryStream()) using (var outputStream = new MemoryStream()) { int numRows = 10; // Write test data to the input stream. var pickler = new Pickler(); for (int i = 0; i < numRows; ++i) { byte[] pickled = pickler.dumps( new[] { new object[] { i.ToString(), i, i } }); SerDe.Write(inputStream, pickled.Length); SerDe.Write(inputStream, pickled); } SerDe.Write(inputStream, (int)SpecialLengths.END_OF_DATA_SECTION); inputStream.Seek(0, SeekOrigin.Begin); CommandExecutorStat stat = new CommandExecutor().Execute( inputStream, outputStream, 0, commandPayload); // Validate all the data on the stream is read. Assert.Equal(inputStream.Length, inputStream.Position); Assert.Equal(10, stat.NumEntriesProcessed); // Validate the output stream. outputStream.Seek(0, SeekOrigin.Begin); var unpickler = new Unpickler(); // One row was written as a batch above, thus need to read 'numRows' batches. List <object[]> rows = new List <object[]>(); for (int i = 0; i < numRows; ++i) { int length = SerDe.ReadInt32(outputStream); byte[] pickledBytes = SerDe.ReadBytes(outputStream, length); rows.Add((unpickler.loads(pickledBytes) as ArrayList)[0] as object[]); } Assert.Equal(numRows, rows.Count); for (int i = 0; i < numRows; ++i) { // There were two UDFs each of which produces one column. object[] columns = rows[i]; Assert.Equal($"udf: {i}", (string)columns[0]); Assert.Equal(i * i, (int)columns[1]); } // Validate all the data on the stream is read. Assert.Equal(outputStream.Length, outputStream.Position); } }