public void TestCommandSerDeForSqlArrow() { var udfWrapper = new Sql.ArrowUdfWrapper <StringArray, StringArray>( (strings) => (StringArray)ToArrowArray( Enumerable.Range(0, strings.Length) .Select(i => $"hello {strings.GetString(i)}") .ToArray())); var workerFunction = new ArrowWorkerFunction(udfWrapper.Execute); var serializedCommand = Utils.CommandSerDe.Serialize( workerFunction.Func, Utils.CommandSerDe.SerializedMode.Row, Utils.CommandSerDe.SerializedMode.Row); using (var ms = new MemoryStream(serializedCommand)) { var deserializedWorkerFunction = new ArrowWorkerFunction( Utils.CommandSerDe.Deserialize <ArrowWorkerFunction.ExecuteDelegate>( ms, out Utils.CommandSerDe.SerializedMode serializerMode, out Utils.CommandSerDe.SerializedMode deserializerMode, out var runMode)); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode); Assert.Equal("N", runMode); Apache.Arrow.IArrowArray input = ToArrowArray(new[] { "spark" }); Apache.Arrow.IArrowArray result = deserializedWorkerFunction.Func(new[] { input }, new[] { 0 }); ArrowTestUtils.AssertEquals("hello spark", result); } }
// Validates the given udfWrapper, whose internal UDF concatenates all the input strings. private void ValidateArrowWrapper(int numArgs, dynamic udfWrapper) { // Create one more input data than the given numArgs to validate // the indexing is working correctly inside ArrowUdfWrapper. var input = new IArrowArray[numArgs + 1]; var inputStrings = new List <string>(); for (int i = 0; i < input.Length; ++i) { inputStrings.Add($"arg{i}"); input[i] = ArrowArrayHelpers.ToArrowArray(new string[] { $"arg{i}" }); } // First create argOffsets from 0 to numArgs. // For example, the numArgs was 3, the expected strings is "arg0arg1arg2" // where the argOffsets are created with { 0, 1, 2 }. ArrowTestUtils.AssertEquals( string.Join("", inputStrings.GetRange(0, numArgs)), udfWrapper.Execute(0, input, Enumerable.Range(0, numArgs).ToArray())); // Create argOffsets from 1 to numArgs + 1. // For example, the numArgs was 3, the expected strings is "arg1arg2arg3" // where the argOffsets are created with { 1, 2, 3 }. ArrowTestUtils.AssertEquals( string.Join("", inputStrings.GetRange(1, numArgs)), udfWrapper.Execute(0, input, Enumerable.Range(1, numArgs).ToArray())); }
public void TestCommandSerDeForSqlArrow() { var udfWrapper = new ArrowUdfWrapper <string, string>((str) => $"hello {str}"); var workerFunction = new ArrowWorkerFunction(udfWrapper.Execute); var serializedCommand = Utils.CommandSerDe.Serialize( workerFunction.Func, Utils.CommandSerDe.SerializedMode.Row, Utils.CommandSerDe.SerializedMode.Row); using (var ms = new MemoryStream(serializedCommand)) { var deserializedWorkerFunction = new ArrowWorkerFunction( Utils.CommandSerDe.Deserialize <ArrowWorkerFunction.ExecuteDelegate>( ms, out Utils.CommandSerDe.SerializedMode serializerMode, out Utils.CommandSerDe.SerializedMode deserializerMode, out var runMode)); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode); Assert.Equal("N", runMode); Apache.Arrow.IArrowArray input = ArrowArrayHelpers.ToArrowArray(new[] { "spark" }); Apache.Arrow.IArrowArray result = deserializedWorkerFunction.Func(0, new[] { input }, new[] { 0 }); ArrowTestUtils.AssertEquals("hello spark", result); } }
public void TestCommandSerDeForSqlArrowDataFrame() { var udfWrapper = new Sql.DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"hello {cur}")); var workerFunction = new DataFrameWorkerFunction(udfWrapper.Execute); byte[] serializedCommand = Utils.CommandSerDe.Serialize( workerFunction.Func, Utils.CommandSerDe.SerializedMode.Row, Utils.CommandSerDe.SerializedMode.Row); using (var ms = new MemoryStream(serializedCommand)) { var deserializedWorkerFunction = new DataFrameWorkerFunction( Utils.CommandSerDe.Deserialize <DataFrameWorkerFunction.ExecuteDelegate>( ms, out Utils.CommandSerDe.SerializedMode serializerMode, out Utils.CommandSerDe.SerializedMode deserializerMode, out var runMode)); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode); Assert.Equal("N", runMode); var column = (StringArray)ToArrowArray(new[] { "spark" }); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); DataFrameColumn result = deserializedWorkerFunction.Func(new[] { ArrowStringDataFrameColumn }, new[] { 0 }); ArrowTestUtils.AssertEquals("hello spark", result); } }
public void TestChainingArrowWorkerFunction() { var func1 = new ArrowWorkerFunction( new ArrowUdfWrapper <int, string, string>( (number, str) => $"{str}:{number}").Execute); var func2 = new ArrowWorkerFunction( new ArrowUdfWrapper <string, string>( (str) => $"outer1:{str}").Execute); var func3 = new ArrowWorkerFunction( new ArrowUdfWrapper <string, string>( (str) => $"outer2:{str}").Execute); Apache.Arrow.IArrowArray[] input = new[] { ToArrowArray(new[] { 100 }), ToArrowArray(new[] { "name" }) }; // Validate one-level chaining. var chainedFunc1 = ArrowWorkerFunction.Chain(func1, func2); ArrowTestUtils.AssertEquals( "outer1:name:100", chainedFunc1.Func(0, input, new[] { 0, 1 })); // Validate two-level chaining. var chainedFunc2 = ArrowWorkerFunction.Chain(chainedFunc1, func3); ArrowTestUtils.AssertEquals( "outer2:outer1:name:100", chainedFunc2.Func(0, input, new[] { 0, 1 })); }
public void TestArrowWorkerFunction() { var func = new ArrowWorkerFunction( new ArrowUdfWrapper <StringArray, StringArray>( (str) => str).Execute); string[] input = { "arg1" }; ArrowTestUtils.AssertEquals( input[0], func.Func(new[] { ToArrowArray(input) }, new[] { 0 })); }
public void TestDataFrameWorkerFunction() { var func = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (str) => str).Execute); string[] input = { "arg1" }; var column = (StringArray)ToArrowArray(input); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); ArrowTestUtils.AssertEquals( input[0], func.Func(new[] { ArrowStringDataFrameColumn }, new[] { 0 })); }
public void TestChainingDataFrameWorkerFunction() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <Int32DataFrameColumn, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { long i = 0; return(strings.Apply(cur => $"{cur}:{numbers[i++]}")); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"outer1:{cur}")) .Execute); var func3 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"outer2:{cur}")) .Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new Int32DataFrameColumn("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // Validate one-level chaining. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2); ArrowTestUtils.AssertEquals( "outer1:name:100", chainedFunc1.Func(input, new[] { 0, 1 })); // Validate two-level chaining. DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3); ArrowTestUtils.AssertEquals( "outer2:outer1:name:100", chainedFunc2.Func(input, new[] { 0, 1 })); }
public void TestChainingArrowWorkerFunction() { var func1 = new ArrowWorkerFunction( new ArrowUdfWrapper <Int32Array, StringArray, StringArray>( (numbers, strings) => (StringArray)ToArrowArray( Enumerable.Range(0, strings.Length) .Select(i => $"{strings.GetString(i)}:{numbers.Values[i]}") .ToArray())).Execute); var func2 = new ArrowWorkerFunction( new ArrowUdfWrapper <StringArray, StringArray>( (strings) => (StringArray)ToArrowArray( Enumerable.Range(0, strings.Length) .Select(i => $"outer1:{strings.GetString(i)}") .ToArray())).Execute); var func3 = new ArrowWorkerFunction( new ArrowUdfWrapper <StringArray, StringArray>( (strings) => (StringArray)ToArrowArray( Enumerable.Range(0, strings.Length) .Select(i => $"outer2:{strings.GetString(i)}") .ToArray())).Execute); Apache.Arrow.IArrowArray[] input = new[] { ToArrowArray(new[] { 100 }), ToArrowArray(new[] { "name" }) }; // Validate one-level chaining. var chainedFunc1 = ArrowWorkerFunction.Chain(func1, func2); ArrowTestUtils.AssertEquals( "outer1:name:100", chainedFunc1.Func(input, new[] { 0, 1 })); // Validate two-level chaining. var chainedFunc2 = ArrowWorkerFunction.Chain(chainedFunc1, func3); ArrowTestUtils.AssertEquals( "outer2:outer1:name:100", chainedFunc2.Func(input, new[] { 0, 1 })); }
public void TestChainingDataFrameWorkerFunction() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <PrimitiveDataFrameColumn <int>, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"{strings[i]}:{numbers[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"outer1:{strings[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); var func3 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"outer2:{strings[(i)]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new PrimitiveDataFrameColumn <int>("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // Validate one-level chaining. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2); ArrowTestUtils.AssertEquals( "outer1:name:100", chainedFunc1.Func(input, new[] { 0, 1 })); // Validate two-level chaining. DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3); ArrowTestUtils.AssertEquals( "outer2:outer1:name:100", chainedFunc2.Func(input, new[] { 0, 1 })); }