public void TestDataFrameWorkerFunctionForBool() { var func = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, PrimitiveDataFrameColumn <bool>, PrimitiveDataFrameColumn <bool> >( (strings, flags) => { for (long i = 0; i < strings.Length; ++i) { flags[i] = flags[i].Value || strings[i].Contains("true"); } return(flags); }).Execute); var stringColumn = (StringArray)ToArrowArray(new[] { "arg1_true", "arg1_true", "arg1_false", "arg1_false" }); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(stringColumn); var boolColumn = new PrimitiveDataFrameColumn <bool>("Bool", Enumerable.Range(0, 4).Select(x => x % 2 == 0)); var input = new DataFrameColumn[] { ArrowStringDataFrameColumn, boolColumn }; var results = (PrimitiveDataFrameColumn <bool>)func.Func(input, new[] { 0, 1 }); Assert.Equal(4, results.Length); Assert.True(results[0]); Assert.True(results[1]); Assert.True(results[2]); Assert.False(results[3]); }
public void TestCommandSerDeForSqlArrowDataFrame() { var udfWrapper = new Sql.DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"hello {cur}")); var workerFunction = new DataFrameWorkerFunction(udfWrapper.Execute); byte[] serializedCommand = Utils.CommandSerDe.Serialize( workerFunction.Func, Utils.CommandSerDe.SerializedMode.Row, Utils.CommandSerDe.SerializedMode.Row); using var ms = new MemoryStream(serializedCommand); var deserializedWorkerFunction = new DataFrameWorkerFunction( Utils.CommandSerDe.Deserialize <DataFrameWorkerFunction.ExecuteDelegate>( ms, out Utils.CommandSerDe.SerializedMode serializerMode, out Utils.CommandSerDe.SerializedMode deserializerMode, out var runMode)); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode); Assert.Equal("N", runMode); var column = (StringArray)ToArrowArray(new[] { "spark" }); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); DataFrameColumn result = deserializedWorkerFunction.Func(new[] { ArrowStringDataFrameColumn }, new[] { 0 }); AssertEquals("hello spark", result); }
public void TestInvalidChainingDataFrame() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <Int32DataFrameColumn, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { long i = 0; return(strings.Apply(cur => $"{cur}:{numbers[i++]}")); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"outer1:{cur}")) .Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new Int32DataFrameColumn("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // The order does not align since workerFunction2 is executed first. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func2, func1); Assert.ThrowsAny <Exception>(() => chainedFunc1.Func(input, new[] { 0, 1 })); }
public void TestDataFrameWorkerFunction() { var func = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (str) => str).Execute); string[] input = { "arg1" }; var column = (StringArray)ToArrowArray(input); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); ArrowTestUtils.AssertEquals( input[0], func.Func(new[] { ArrowStringDataFrameColumn }, new[] { 0 })); }
public void TestChainingDataFrameWorkerFunction() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <Int32DataFrameColumn, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { long i = 0; return(strings.Apply(cur => $"{cur}:{numbers[i++]}")); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"outer1:{cur}")) .Execute); var func3 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"outer2:{cur}")) .Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new Int32DataFrameColumn("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // Validate one-level chaining. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2); ArrowTestUtils.AssertEquals( "outer1:name:100", chainedFunc1.Func(input, new[] { 0, 1 })); // Validate two-level chaining. DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3); ArrowTestUtils.AssertEquals( "outer2:outer1:name:100", chainedFunc2.Func(input, new[] { 0, 1 })); }
public void TestInvalidChainingDataFrame() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <PrimitiveDataFrameColumn <int>, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { var stringArray = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"{strings[i]}:{numbers[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringArray)); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringArray = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"outer1:{strings[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringArray)); }).Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new PrimitiveDataFrameColumn <int>("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // The order does not align since workerFunction2 is executed first. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func2, func1); Assert.ThrowsAny <Exception>(() => chainedFunc1.Func(input, new[] { 0, 1 })); }
public void TestCommandSerDeForSqlArrowDataFrame() { var udfWrapper = new Sql.DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"hello {strings[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }); var workerFunction = new DataFrameWorkerFunction(udfWrapper.Execute); byte[] serializedCommand = Utils.CommandSerDe.Serialize( workerFunction.Func, Utils.CommandSerDe.SerializedMode.Row, Utils.CommandSerDe.SerializedMode.Row); using (var ms = new MemoryStream(serializedCommand)) { var deserializedWorkerFunction = new DataFrameWorkerFunction( Utils.CommandSerDe.Deserialize <DataFrameWorkerFunction.ExecuteDelegate>( ms, out Utils.CommandSerDe.SerializedMode serializerMode, out Utils.CommandSerDe.SerializedMode deserializerMode, out var runMode)); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, serializerMode); Assert.Equal(Utils.CommandSerDe.SerializedMode.Row, deserializerMode); Assert.Equal("N", runMode); var column = (StringArray)ToArrowArray(new[] { "spark" }); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); DataFrameColumn result = deserializedWorkerFunction.Func(new[] { ArrowStringDataFrameColumn }, new[] { 0 }); ArrowTestUtils.AssertEquals("hello spark", result); } }
public void TestChainingDataFrameWorkerFunction() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <PrimitiveDataFrameColumn <int>, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"{strings[i]}:{numbers[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"outer1:{strings[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); var func3 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"outer2:{strings[(i)]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new PrimitiveDataFrameColumn <int>("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // Validate one-level chaining. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2); ArrowTestUtils.AssertEquals( "outer1:name:100", chainedFunc1.Func(input, new[] { 0, 1 })); // Validate two-level chaining. DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3); ArrowTestUtils.AssertEquals( "outer2:outer1:name:100", chainedFunc2.Func(input, new[] { 0, 1 })); }