Пример #1
0
        public void TestCommandSerDeForSqlArrow()
        {
            var udfWrapper = new Sql.ArrowUdfWrapper <StringArray, StringArray>(
                (strings) => (StringArray)ToArrowArray(
                    Enumerable.Range(0, strings.Length)
                    .Select(i => $"hello {strings.GetString(i)}")
                    .ToArray()));

            var workerFunction = new ArrowWorkerFunction(udfWrapper.Execute);

            var serializedCommand = Utils.CommandSerDe.Serialize(
                workerFunction.Func,
                Utils.CommandSerDe.SerializedMode.Row,
                Utils.CommandSerDe.SerializedMode.Row);

            using (var ms = new MemoryStream(serializedCommand))
            {
                var deserializedWorkerFunction = new ArrowWorkerFunction(
                    Utils.CommandSerDe.Deserialize <ArrowWorkerFunction.ExecuteDelegate>(
                        ms,
                        out Utils.CommandSerDe.SerializedMode serializerMode,
                        out Utils.CommandSerDe.SerializedMode deserializerMode,
                        out var runMode));

                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode);
                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode);
                Assert.Equal("N", runMode);

                Apache.Arrow.IArrowArray input  = ToArrowArray(new[] { "spark" });
                Apache.Arrow.IArrowArray result =
                    deserializedWorkerFunction.Func(new[] { input }, new[] { 0 });
                ArrowTestUtils.AssertEquals("hello spark", result);
            }
        }
Пример #2
0
        public void TestInvalidChainingArrow()
        {
            var func1 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <Int32Array, StringArray, StringArray>(
                    (numbers, strings) => (StringArray)ToArrowArray(
                        Enumerable.Range(0, strings.Length)
                        .Select(i => $"{strings.GetString(i)}:{numbers.Values[i]}")
                        .ToArray())).Execute);

            var func2 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <StringArray, StringArray>(
                    (strings) => (StringArray)ToArrowArray(
                        Enumerable.Range(0, strings.Length)
                        .Select(i => $"outer1:{strings.GetString(i)}")
                        .ToArray())).Execute);

            IArrowArray[] input = new[]
            {
                ToArrowArray(new[] { 100 }),
                ToArrowArray(new[] { "name" })
            };

            // The order does not align since workerFunction2 is executed first.
            ArrowWorkerFunction chainedFunc1 = ArrowWorkerFunction.Chain(func2, func1);

            Assert.ThrowsAny <Exception>(() => chainedFunc1.Func(input, new[] { 0, 1 }));
        }
Пример #3
0
        public void TestCommandSerDeForSqlArrow()
        {
            var udfWrapper     = new ArrowUdfWrapper <string, string>((str) => $"hello {str}");
            var workerFunction = new ArrowWorkerFunction(udfWrapper.Execute);

            var serializedCommand = Utils.CommandSerDe.Serialize(
                workerFunction.Func,
                Utils.CommandSerDe.SerializedMode.Row,
                Utils.CommandSerDe.SerializedMode.Row);

            using (var ms = new MemoryStream(serializedCommand))
            {
                var deserializedWorkerFunction = new ArrowWorkerFunction(
                    Utils.CommandSerDe.Deserialize <ArrowWorkerFunction.ExecuteDelegate>(
                        ms,
                        out Utils.CommandSerDe.SerializedMode serializerMode,
                        out Utils.CommandSerDe.SerializedMode deserializerMode,
                        out var runMode));

                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode);
                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode);
                Assert.Equal("N", runMode);

                Apache.Arrow.IArrowArray input  = ArrowArrayHelpers.ToArrowArray(new[] { "spark" });
                Apache.Arrow.IArrowArray result =
                    deserializedWorkerFunction.Func(0, new[] { input }, new[] { 0 });
                ArrowTestUtils.AssertEquals("hello spark", result);
            }
        }
Пример #4
0
        public void TestChainingArrowWorkerFunction()
        {
            var func1 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <int, string, string>(
                    (number, str) => $"{str}:{number}").Execute);

            var func2 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <string, string>(
                    (str) => $"outer1:{str}").Execute);

            var func3 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <string, string>(
                    (str) => $"outer2:{str}").Execute);

            Apache.Arrow.IArrowArray[] input = new[]
            {
                ToArrowArray(new[] { 100 }),
                ToArrowArray(new[] { "name" })
            };

            // Validate one-level chaining.
            var chainedFunc1 = ArrowWorkerFunction.Chain(func1, func2);

            ArrowTestUtils.AssertEquals(
                "outer1:name:100",
                chainedFunc1.Func(0, input, new[] { 0, 1 }));

            // Validate two-level chaining.
            var chainedFunc2 = ArrowWorkerFunction.Chain(chainedFunc1, func3);

            ArrowTestUtils.AssertEquals(
                "outer2:outer1:name:100",
                chainedFunc2.Func(0, input, new[] { 0, 1 }));
        }
Пример #5
0
        public void TestArrowWorkerFunction()
        {
            var func = new ArrowWorkerFunction(
                new ArrowUdfWrapper <StringArray, StringArray>(
                    (str) => str).Execute);

            string[] input = { "arg1" };
            ArrowTestUtils.AssertEquals(
                input[0],
                func.Func(new[] { ToArrowArray(input) }, new[] { 0 }));
        }
Пример #6
0
        public void TestChainingArrowWorkerFunction()
        {
            var func1 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <Int32Array, StringArray, StringArray>(
                    (numbers, strings) => (StringArray)ToArrowArray(
                        Enumerable.Range(0, strings.Length)
                        .Select(i => $"{strings.GetString(i)}:{numbers.Values[i]}")
                        .ToArray())).Execute);

            var func2 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <StringArray, StringArray>(
                    (strings) => (StringArray)ToArrowArray(
                        Enumerable.Range(0, strings.Length)
                        .Select(i => $"outer1:{strings.GetString(i)}")
                        .ToArray())).Execute);

            var func3 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <StringArray, StringArray>(
                    (strings) => (StringArray)ToArrowArray(
                        Enumerable.Range(0, strings.Length)
                        .Select(i => $"outer2:{strings.GetString(i)}")
                        .ToArray())).Execute);

            var input = new IArrowArray[]
            {
                ToArrowArray(new[] { 100 }),
                ToArrowArray(new[] { "name" })
            };

            // Validate one-level chaining.
            ArrowWorkerFunction chainedFunc1 = ArrowWorkerFunction.Chain(func1, func2);

            AssertEquals(
                "outer1:name:100",
                chainedFunc1.Func(input, new[] { 0, 1 }));

            // Validate two-level chaining.
            ArrowWorkerFunction chainedFunc2 = ArrowWorkerFunction.Chain(chainedFunc1, func3);

            AssertEquals(
                "outer2:outer1:name:100",
                chainedFunc2.Func(input, new[] { 0, 1 }));
        }
Пример #7
0
        public void TestArrowWorkerFunctionForBool()
        {
            var func = new ArrowWorkerFunction(
                new ArrowUdfWrapper <string, bool, bool>(
                    (str, flag) => flag || str.Contains("true")).Execute);

            IArrowArray[] input = new[]
            {
                ToArrowArray(new[] { "arg1_true", "arg1_true", "arg1_false", "arg1_false" }),
                ToArrowArray(new[] { true, false, true, false }),
            };
            var results = (BooleanArray)func.Func(0, input, new[] { 0, 1 });

            Assert.Equal(4, results.Length);
            Assert.True(results.GetBoolean(0));
            Assert.True(results.GetBoolean(1));
            Assert.True(results.GetBoolean(2));
            Assert.False(results.GetBoolean(3));
        }
Пример #8
0
        public void TestInvalidChainingArrow()
        {
            var func1 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <int, string, string>(
                    (number, str) => $"{str}:{number}").Execute);

            var func2 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <string, string>(
                    (str) => $"outer1:{str}").Execute);

            Apache.Arrow.IArrowArray[] input = new[]
            {
                ToArrowArray(new[] { 100 }),
                ToArrowArray(new[] { "name" })
            };

            // The order does not align since workerFunction2 is executed first.
            var chainedFunc1 = ArrowWorkerFunction.Chain(func2, func1);

            Assert.ThrowsAny <Exception>(() => chainedFunc1.Func(0, input, new[] { 0, 1 }));
        }
Пример #9
0
        public void TestArrowWorkerFunctionForBool()
        {
            var func = new ArrowWorkerFunction(
                new ArrowUdfWrapper <StringArray, BooleanArray, BooleanArray>(
                    (strings, flags) => (BooleanArray)ToArrowArray(
                        Enumerable.Range(0, strings.Length)
                        .Select(i => flags.GetBoolean(i) || strings.GetString(i).Contains("true"))
                        .ToArray())).Execute);

            IArrowArray[] input = new[]
            {
                ToArrowArray(new[] { "arg1_true", "arg1_true", "arg1_false", "arg1_false" }),
                ToArrowArray(new[] { true, false, true, false }),
            };
            var results = (BooleanArray)func.Func(input, new[] { 0, 1 });

            Assert.Equal(4, results.Length);
            Assert.True(results.GetBoolean(0));
            Assert.True(results.GetBoolean(1));
            Assert.True(results.GetBoolean(2));
            Assert.False(results.GetBoolean(3));
        }
Пример #10
0
        /// <summary>
        /// Read SqlCommands from the stream.
        /// </summary>
        /// <param name="stream">Stream to read from</param>
        /// <param name="evalType">Evaluation type for the current commands</param>
        /// <returns>SqlCommand objects</returns>
        private static SqlCommand[] ReadSqlCommands(
            PythonEvalType evalType,
            Stream stream)
        {
            int numUdfs  = SerDe.ReadInt32(stream);
            var commands = new SqlCommand[numUdfs];

            for (int i = 0; i < numUdfs; ++i)
            {
                var command = new SqlCommand();

                int numArgsOffsets = SerDe.ReadInt32(stream);
                command.ArgOffsets = new int[numArgsOffsets];
                for (int argIndex = 0; argIndex < numArgsOffsets; ++argIndex)
                {
                    command.ArgOffsets[argIndex] = SerDe.ReadInt32(stream);
                }

                command.NumChainedFunctions = SerDe.ReadInt32(stream);
                for (int funcIndex = 0; funcIndex < command.NumChainedFunctions; ++funcIndex)
                {
                    int commandBytesCount = SerDe.ReadInt32(stream);
                    if (commandBytesCount > 0)
                    {
                        CommandSerDe.SerializedMode serializerMode;
                        CommandSerDe.SerializedMode deserializerMode;
                        if (evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF)
                        {
                            var curWorkerFunction = new ArrowWorkerFunction(
                                CommandSerDe.Deserialize <ArrowWorkerFunction.ExecuteDelegate>(
                                    stream,
                                    out serializerMode,
                                    out deserializerMode,
                                    out string runMode));

                            command.WorkerFunction = (command.WorkerFunction == null) ?
                                                     curWorkerFunction :
                                                     ArrowWorkerFunction.Chain(
                                (ArrowWorkerFunction)command.WorkerFunction,
                                curWorkerFunction);
                        }
                        else if (evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
                        {
                            if ((numUdfs != 1) || (command.WorkerFunction != null))
                            {
                                throw new InvalidDataException(
                                          "Grouped map UDFs do not support combining multiple UDFs");
                            }

                            command.WorkerFunction = new ArrowGroupedMapWorkerFunction(
                                CommandSerDe.Deserialize <ArrowGroupedMapWorkerFunction.ExecuteDelegate>(
                                    stream,
                                    out serializerMode,
                                    out deserializerMode,
                                    out string runMode));
                        }
                        else
                        {
                            var curWorkerFunction = new PicklingWorkerFunction(
                                CommandSerDe.Deserialize <PicklingWorkerFunction.ExecuteDelegate>(
                                    stream,
                                    out serializerMode,
                                    out deserializerMode,
                                    out string runMode));

                            command.WorkerFunction = (command.WorkerFunction == null) ?
                                                     curWorkerFunction :
                                                     PicklingWorkerFunction.Chain(
                                (PicklingWorkerFunction)command.WorkerFunction,
                                curWorkerFunction);
                        }

                        command.SerializerMode   = serializerMode;
                        command.DeserializerMode = deserializerMode;
                    }
                    else
                    {
                        throw new InvalidDataException(
                                  $"Invalid command size: {commandBytesCount}");
                    }
                }

                commands[i] = command;
            }

            return(commands);
        }