public void TestChainingDataFrameWorkerFunction() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <Int32DataFrameColumn, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { long i = 0; return(strings.Apply(cur => $"{cur}:{numbers[i++]}")); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"outer1:{cur}")) .Execute); var func3 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"outer2:{cur}")) .Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new Int32DataFrameColumn("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // Validate one-level chaining. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2); ArrowTestUtils.AssertEquals( "outer1:name:100", chainedFunc1.Func(input, new[] { 0, 1 })); // Validate two-level chaining. DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3); ArrowTestUtils.AssertEquals( "outer2:outer1:name:100", chainedFunc2.Func(input, new[] { 0, 1 })); }
public void TestChainingArrowWorkerFunction() { var func1 = new ArrowWorkerFunction( new ArrowUdfWrapper <Int32Array, StringArray, StringArray>( (numbers, strings) => (StringArray)ToArrowArray( Enumerable.Range(0, strings.Length) .Select(i => $"{strings.GetString(i)}:{numbers.Values[i]}") .ToArray())).Execute); var func2 = new ArrowWorkerFunction( new ArrowUdfWrapper <StringArray, StringArray>( (strings) => (StringArray)ToArrowArray( Enumerable.Range(0, strings.Length) .Select(i => $"outer1:{strings.GetString(i)}") .ToArray())).Execute); var func3 = new ArrowWorkerFunction( new ArrowUdfWrapper <StringArray, StringArray>( (strings) => (StringArray)ToArrowArray( Enumerable.Range(0, strings.Length) .Select(i => $"outer2:{strings.GetString(i)}") .ToArray())).Execute); Apache.Arrow.IArrowArray[] input = new[] { ToArrowArray(new[] { 100 }), ToArrowArray(new[] { "name" }) }; // Validate one-level chaining. var chainedFunc1 = ArrowWorkerFunction.Chain(func1, func2); ArrowTestUtils.AssertEquals( "outer1:name:100", chainedFunc1.Func(input, new[] { 0, 1 })); // Validate two-level chaining. var chainedFunc2 = ArrowWorkerFunction.Chain(chainedFunc1, func3); ArrowTestUtils.AssertEquals( "outer2:outer1:name:100", chainedFunc2.Func(input, new[] { 0, 1 })); }
public void TestCommandSerDeForSqlArrowDataFrame() { var udfWrapper = new Sql.DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"hello {strings[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }); var workerFunction = new DataFrameWorkerFunction(udfWrapper.Execute); byte[] serializedCommand = Utils.CommandSerDe.Serialize( workerFunction.Func, Utils.CommandSerDe.SerializedMode.Row, Utils.CommandSerDe.SerializedMode.Row); using (var ms = new MemoryStream(serializedCommand)) { var deserializedWorkerFunction = new DataFrameWorkerFunction( Utils.CommandSerDe.Deserialize <DataFrameWorkerFunction.ExecuteDelegate>( ms, out Utils.CommandSerDe.SerializedMode serializerMode, out Utils.CommandSerDe.SerializedMode deserializerMode, out var runMode)); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode); Assert.Equal("N", runMode); var column = (StringArray)ToArrowArray(new[] { "spark" }); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); DataFrameColumn result = deserializedWorkerFunction.Func(new[] { ArrowStringDataFrameColumn }, new[] { 0 }); ArrowTestUtils.AssertEquals("hello spark", result); } }
public void TestChainingDataFrameWorkerFunction() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <PrimitiveDataFrameColumn <int>, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"{strings[i]}:{numbers[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"outer1:{strings[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); var func3 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"outer2:{strings[(i)]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new PrimitiveDataFrameColumn <int>("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // Validate one-level chaining. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2); ArrowTestUtils.AssertEquals( "outer1:name:100", chainedFunc1.Func(input, new[] { 0, 1 })); // Validate two-level chaining. DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3); ArrowTestUtils.AssertEquals( "outer2:outer1:name:100", chainedFunc2.Func(input, new[] { 0, 1 })); }