public void TestDataFrameWorkerFunctionForBool() { var func = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, PrimitiveDataFrameColumn <bool>, PrimitiveDataFrameColumn <bool> >( (strings, flags) => { for (long i = 0; i < strings.Length; ++i) { flags[i] = flags[i].Value || strings[i].Contains("true"); } return(flags); }).Execute); var stringColumn = (StringArray)ToArrowArray(new[] { "arg1_true", "arg1_true", "arg1_false", "arg1_false" }); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(stringColumn); var boolColumn = new PrimitiveDataFrameColumn <bool>("Bool", Enumerable.Range(0, 4).Select(x => x % 2 == 0)); var input = new DataFrameColumn[] { ArrowStringDataFrameColumn, boolColumn }; var results = (PrimitiveDataFrameColumn <bool>)func.Func(input, new[] { 0, 1 }); Assert.Equal(4, results.Length); Assert.True(results[0]); Assert.True(results[1]); Assert.True(results[2]); Assert.False(results[3]); }
public void TestInvalidChainingDataFrame() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <Int32DataFrameColumn, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { long i = 0; return(strings.Apply(cur => $"{cur}:{numbers[i++]}")); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"outer1:{cur}")) .Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new Int32DataFrameColumn("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // The order does not align since workerFunction2 is executed first. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func2, func1); Assert.ThrowsAny <Exception>(() => chainedFunc1.Func(input, new[] { 0, 1 })); }
public void TestDataFrameWorkerFunction() { var func = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (str) => str).Execute); string[] input = { "arg1" }; var column = (StringArray)ToArrowArray(input); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); ArrowTestUtils.AssertEquals( input[0], func.Func(new[] { ArrowStringDataFrameColumn }, new[] { 0 })); }
public void TestChainingDataFrameWorkerFunction() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <Int32DataFrameColumn, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { long i = 0; return(strings.Apply(cur => $"{cur}:{numbers[i++]}")); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"outer1:{cur}")) .Execute); var func3 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => strings.Apply(cur => $"outer2:{cur}")) .Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new Int32DataFrameColumn("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // Validate one-level chaining. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2); ArrowTestUtils.AssertEquals( "outer1:name:100", chainedFunc1.Func(input, new[] { 0, 1 })); // Validate two-level chaining. DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3); ArrowTestUtils.AssertEquals( "outer2:outer1:name:100", chainedFunc2.Func(input, new[] { 0, 1 })); }
public void TestInvalidChainingDataFrame() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <PrimitiveDataFrameColumn <int>, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { var stringArray = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"{strings[i]}:{numbers[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringArray)); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringArray = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"outer1:{strings[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringArray)); }).Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new PrimitiveDataFrameColumn <int>("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // The order does not align since workerFunction2 is executed first. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func2, func1); Assert.ThrowsAny <Exception>(() => chainedFunc1.Func(input, new[] { 0, 1 })); }
public void TestChainingDataFrameWorkerFunction() { var func1 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <PrimitiveDataFrameColumn <int>, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (numbers, strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"{strings[i]}:{numbers[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); var func2 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"outer1:{strings[i]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); var func3 = new DataFrameWorkerFunction( new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>( (strings) => { var stringColumn = (StringArray)ToArrowArray( Enumerable.Range(0, (int)strings.Length) .Select(i => $"outer2:{strings[(i)]}") .ToArray()); return(ToArrowStringDataFrameColumn(stringColumn)); }).Execute); string[] inputString = { "name" }; var column = (StringArray)ToArrowArray(inputString); ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column); var input = new DataFrameColumn[] { new PrimitiveDataFrameColumn <int>("Int", new List <int>() { 100 }), ArrowStringDataFrameColumn }; // Validate one-level chaining. DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2); ArrowTestUtils.AssertEquals( "outer1:name:100", chainedFunc1.Func(input, new[] { 0, 1 })); // Validate two-level chaining. DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3); ArrowTestUtils.AssertEquals( "outer2:outer1:name:100", chainedFunc2.Func(input, new[] { 0, 1 })); }