Ejemplo n.º 1
0
        public void TestCommandSerDeForSqlArrow()
        {
            var udfWrapper = new Sql.ArrowUdfWrapper <StringArray, StringArray>(
                (strings) => (StringArray)ToArrowArray(
                    Enumerable.Range(0, strings.Length)
                    .Select(i => $"hello {strings.GetString(i)}")
                    .ToArray()));

            var workerFunction = new ArrowWorkerFunction(udfWrapper.Execute);

            var serializedCommand = Utils.CommandSerDe.Serialize(
                workerFunction.Func,
                Utils.CommandSerDe.SerializedMode.Row,
                Utils.CommandSerDe.SerializedMode.Row);

            using (var ms = new MemoryStream(serializedCommand))
            {
                var deserializedWorkerFunction = new ArrowWorkerFunction(
                    Utils.CommandSerDe.Deserialize <ArrowWorkerFunction.ExecuteDelegate>(
                        ms,
                        out Utils.CommandSerDe.SerializedMode serializerMode,
                        out Utils.CommandSerDe.SerializedMode deserializerMode,
                        out var runMode));

                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode);
                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode);
                Assert.Equal("N", runMode);

                Apache.Arrow.IArrowArray input  = ToArrowArray(new[] { "spark" });
                Apache.Arrow.IArrowArray result =
                    deserializedWorkerFunction.Func(new[] { input }, new[] { 0 });
                ArrowTestUtils.AssertEquals("hello spark", result);
            }
        }
Ejemplo n.º 2
0
        // Validates the given udfWrapper, whose internal UDF concatenates all the input strings.
        private void ValidateArrowWrapper(int numArgs, dynamic udfWrapper)
        {
            // Create one more input data than the given numArgs to validate
            // the indexing is working correctly inside ArrowUdfWrapper.
            var input        = new IArrowArray[numArgs + 1];
            var inputStrings = new List <string>();

            for (int i = 0; i < input.Length; ++i)
            {
                inputStrings.Add($"arg{i}");
                input[i] = ArrowArrayHelpers.ToArrowArray(new string[] { $"arg{i}" });
            }

            // First create argOffsets from 0 to numArgs.
            // For example, the numArgs was 3, the expected strings is "arg0arg1arg2"
            // where the argOffsets are created with { 0, 1, 2 }.
            ArrowTestUtils.AssertEquals(
                string.Join("", inputStrings.GetRange(0, numArgs)),
                udfWrapper.Execute(0, input, Enumerable.Range(0, numArgs).ToArray()));

            // Create argOffsets from 1 to numArgs + 1.
            // For example, the numArgs was 3, the expected strings is "arg1arg2arg3"
            // where the argOffsets are created with { 1, 2, 3 }.
            ArrowTestUtils.AssertEquals(
                string.Join("", inputStrings.GetRange(1, numArgs)),
                udfWrapper.Execute(0, input, Enumerable.Range(1, numArgs).ToArray()));
        }
Ejemplo n.º 3
0
        public void TestCommandSerDeForSqlArrow()
        {
            var udfWrapper     = new ArrowUdfWrapper <string, string>((str) => $"hello {str}");
            var workerFunction = new ArrowWorkerFunction(udfWrapper.Execute);

            var serializedCommand = Utils.CommandSerDe.Serialize(
                workerFunction.Func,
                Utils.CommandSerDe.SerializedMode.Row,
                Utils.CommandSerDe.SerializedMode.Row);

            using (var ms = new MemoryStream(serializedCommand))
            {
                var deserializedWorkerFunction = new ArrowWorkerFunction(
                    Utils.CommandSerDe.Deserialize <ArrowWorkerFunction.ExecuteDelegate>(
                        ms,
                        out Utils.CommandSerDe.SerializedMode serializerMode,
                        out Utils.CommandSerDe.SerializedMode deserializerMode,
                        out var runMode));

                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode);
                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode);
                Assert.Equal("N", runMode);

                Apache.Arrow.IArrowArray input  = ArrowArrayHelpers.ToArrowArray(new[] { "spark" });
                Apache.Arrow.IArrowArray result =
                    deserializedWorkerFunction.Func(0, new[] { input }, new[] { 0 });
                ArrowTestUtils.AssertEquals("hello spark", result);
            }
        }
Ejemplo n.º 4
0
        public void TestCommandSerDeForSqlArrowDataFrame()
        {
            var udfWrapper = new Sql.DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                (strings) => strings.Apply(cur => $"hello {cur}"));

            var workerFunction = new DataFrameWorkerFunction(udfWrapper.Execute);

            byte[] serializedCommand = Utils.CommandSerDe.Serialize(
                workerFunction.Func,
                Utils.CommandSerDe.SerializedMode.Row,
                Utils.CommandSerDe.SerializedMode.Row);

            using (var ms = new MemoryStream(serializedCommand))
            {
                var deserializedWorkerFunction = new DataFrameWorkerFunction(
                    Utils.CommandSerDe.Deserialize <DataFrameWorkerFunction.ExecuteDelegate>(
                        ms,
                        out Utils.CommandSerDe.SerializedMode serializerMode,
                        out Utils.CommandSerDe.SerializedMode deserializerMode,
                        out var runMode));

                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode);
                Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode);
                Assert.Equal("N", runMode);

                var column = (StringArray)ToArrowArray(new[] { "spark" });

                ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column);
                DataFrameColumn            result =
                    deserializedWorkerFunction.Func(new[] { ArrowStringDataFrameColumn }, new[] { 0 });
                ArrowTestUtils.AssertEquals("hello spark", result);
            }
        }
Ejemplo n.º 5
0
        public void TestChainingArrowWorkerFunction()
        {
            var func1 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <int, string, string>(
                    (number, str) => $"{str}:{number}").Execute);

            var func2 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <string, string>(
                    (str) => $"outer1:{str}").Execute);

            var func3 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <string, string>(
                    (str) => $"outer2:{str}").Execute);

            Apache.Arrow.IArrowArray[] input = new[]
            {
                ToArrowArray(new[] { 100 }),
                ToArrowArray(new[] { "name" })
            };

            // Validate one-level chaining.
            var chainedFunc1 = ArrowWorkerFunction.Chain(func1, func2);

            ArrowTestUtils.AssertEquals(
                "outer1:name:100",
                chainedFunc1.Func(0, input, new[] { 0, 1 }));

            // Validate two-level chaining.
            var chainedFunc2 = ArrowWorkerFunction.Chain(chainedFunc1, func3);

            ArrowTestUtils.AssertEquals(
                "outer2:outer1:name:100",
                chainedFunc2.Func(0, input, new[] { 0, 1 }));
        }
Ejemplo n.º 6
0
        public void TestArrowWorkerFunction()
        {
            var func = new ArrowWorkerFunction(
                new ArrowUdfWrapper <StringArray, StringArray>(
                    (str) => str).Execute);

            string[] input = { "arg1" };
            ArrowTestUtils.AssertEquals(
                input[0],
                func.Func(new[] { ToArrowArray(input) }, new[] { 0 }));
        }
Ejemplo n.º 7
0
        public void TestDataFrameWorkerFunction()
        {
            var func = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (str) => str).Execute);

            string[] input  = { "arg1" };
            var      column = (StringArray)ToArrowArray(input);
            ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column);

            ArrowTestUtils.AssertEquals(
                input[0],
                func.Func(new[] { ArrowStringDataFrameColumn }, new[] { 0 }));
        }
Ejemplo n.º 8
0
        public void TestChainingDataFrameWorkerFunction()
        {
            var func1 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <Int32DataFrameColumn, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (numbers, strings) =>
            {
                long i = 0;
                return(strings.Apply(cur => $"{cur}:{numbers[i++]}"));
            }).Execute);

            var func2 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (strings) => strings.Apply(cur => $"outer1:{cur}"))
                .Execute);

            var func3 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (strings) => strings.Apply(cur => $"outer2:{cur}"))
                .Execute);

            string[] inputString = { "name" };
            var      column      = (StringArray)ToArrowArray(inputString);
            ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column);
            var input = new DataFrameColumn[]
            {
                new Int32DataFrameColumn("Int", new List <int>()
                {
                    100
                }),
                ArrowStringDataFrameColumn
            };

            // Validate one-level chaining.
            DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2);

            ArrowTestUtils.AssertEquals(
                "outer1:name:100",
                chainedFunc1.Func(input, new[] { 0, 1 }));

            // Validate two-level chaining.
            DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3);

            ArrowTestUtils.AssertEquals(
                "outer2:outer1:name:100",
                chainedFunc2.Func(input, new[] { 0, 1 }));
        }
Ejemplo n.º 9
0
        public void TestChainingArrowWorkerFunction()
        {
            var func1 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <Int32Array, StringArray, StringArray>(
                    (numbers, strings) => (StringArray)ToArrowArray(
                        Enumerable.Range(0, strings.Length)
                        .Select(i => $"{strings.GetString(i)}:{numbers.Values[i]}")
                        .ToArray())).Execute);

            var func2 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <StringArray, StringArray>(
                    (strings) => (StringArray)ToArrowArray(
                        Enumerable.Range(0, strings.Length)
                        .Select(i => $"outer1:{strings.GetString(i)}")
                        .ToArray())).Execute);

            var func3 = new ArrowWorkerFunction(
                new ArrowUdfWrapper <StringArray, StringArray>(
                    (strings) => (StringArray)ToArrowArray(
                        Enumerable.Range(0, strings.Length)
                        .Select(i => $"outer2:{strings.GetString(i)}")
                        .ToArray())).Execute);

            Apache.Arrow.IArrowArray[] input = new[]
            {
                ToArrowArray(new[] { 100 }),
                ToArrowArray(new[] { "name" })
            };

            // Validate one-level chaining.
            var chainedFunc1 = ArrowWorkerFunction.Chain(func1, func2);

            ArrowTestUtils.AssertEquals(
                "outer1:name:100",
                chainedFunc1.Func(input, new[] { 0, 1 }));

            // Validate two-level chaining.
            var chainedFunc2 = ArrowWorkerFunction.Chain(chainedFunc1, func3);

            ArrowTestUtils.AssertEquals(
                "outer2:outer1:name:100",
                chainedFunc2.Func(input, new[] { 0, 1 }));
        }
Ejemplo n.º 10
0
        public void TestChainingDataFrameWorkerFunction()
        {
            var func1 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <PrimitiveDataFrameColumn <int>, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (numbers, strings) =>
            {
                var stringColumn = (StringArray)ToArrowArray(
                    Enumerable.Range(0, (int)strings.Length)
                    .Select(i => $"{strings[i]}:{numbers[i]}")
                    .ToArray());
                return(ToArrowStringDataFrameColumn(stringColumn));
            }).Execute);

            var func2 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (strings) =>
            {
                var stringColumn = (StringArray)ToArrowArray(
                    Enumerable.Range(0, (int)strings.Length)
                    .Select(i => $"outer1:{strings[i]}")
                    .ToArray());
                return(ToArrowStringDataFrameColumn(stringColumn));
            }).Execute);

            var func3 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (strings) =>
            {
                var stringColumn = (StringArray)ToArrowArray(
                    Enumerable.Range(0, (int)strings.Length)
                    .Select(i => $"outer2:{strings[(i)]}")
                    .ToArray());
                return(ToArrowStringDataFrameColumn(stringColumn));
            }).Execute);

            string[] inputString = { "name" };
            var      column      = (StringArray)ToArrowArray(inputString);
            ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column);
            var input = new DataFrameColumn[]
            {
                new PrimitiveDataFrameColumn <int>("Int", new List <int>()
                {
                    100
                }),
                ArrowStringDataFrameColumn
            };

            // Validate one-level chaining.
            DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2);

            ArrowTestUtils.AssertEquals(
                "outer1:name:100",
                chainedFunc1.Func(input, new[] { 0, 1 }));

            // Validate two-level chaining.
            DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3);

            ArrowTestUtils.AssertEquals(
                "outer2:outer1:name:100",
                chainedFunc2.Func(input, new[] { 0, 1 }));
        }