예제 #1
0
        public void TestDataFrameWorkerFunctionForBool()
        {
            var func = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, PrimitiveDataFrameColumn <bool>, PrimitiveDataFrameColumn <bool> >(
                    (strings, flags) =>
            {
                for (long i = 0; i < strings.Length; ++i)
                {
                    flags[i] = flags[i].Value || strings[i].Contains("true");
                }
                return(flags);
            }).Execute);

            var stringColumn = (StringArray)ToArrowArray(new[] { "arg1_true", "arg1_true", "arg1_false", "arg1_false" });

            ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(stringColumn);
            var boolColumn = new PrimitiveDataFrameColumn <bool>("Bool", Enumerable.Range(0, 4).Select(x => x % 2 == 0));
            var input      = new DataFrameColumn[]
            {
                ArrowStringDataFrameColumn,
                boolColumn
            };
            var results = (PrimitiveDataFrameColumn <bool>)func.Func(input, new[] { 0, 1 });

            Assert.Equal(4, results.Length);
            Assert.True(results[0]);
            Assert.True(results[1]);
            Assert.True(results[2]);
            Assert.False(results[3]);
        }
예제 #2
0
        public void TestInvalidChainingDataFrame()
        {
            var func1 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <Int32DataFrameColumn, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (numbers, strings) =>
            {
                long i = 0;
                return(strings.Apply(cur => $"{cur}:{numbers[i++]}"));
            }).Execute);

            var func2 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (strings) => strings.Apply(cur => $"outer1:{cur}"))
                .Execute);

            string[] inputString = { "name" };
            var      column      = (StringArray)ToArrowArray(inputString);
            ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column);
            var input = new DataFrameColumn[]
            {
                new Int32DataFrameColumn("Int", new List <int>()
                {
                    100
                }),
                ArrowStringDataFrameColumn
            };

            // The order does not align since workerFunction2 is executed first.
            DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func2, func1);

            Assert.ThrowsAny <Exception>(() => chainedFunc1.Func(input, new[] { 0, 1 }));
        }
예제 #3
0
        public void TestDataFrameWorkerFunction()
        {
            var func = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (str) => str).Execute);

            string[] input  = { "arg1" };
            var      column = (StringArray)ToArrowArray(input);
            ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column);

            ArrowTestUtils.AssertEquals(
                input[0],
                func.Func(new[] { ArrowStringDataFrameColumn }, new[] { 0 }));
        }
예제 #4
0
        public void TestChainingDataFrameWorkerFunction()
        {
            var func1 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <Int32DataFrameColumn, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (numbers, strings) =>
            {
                long i = 0;
                return(strings.Apply(cur => $"{cur}:{numbers[i++]}"));
            }).Execute);

            var func2 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (strings) => strings.Apply(cur => $"outer1:{cur}"))
                .Execute);

            var func3 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (strings) => strings.Apply(cur => $"outer2:{cur}"))
                .Execute);

            string[] inputString = { "name" };
            var      column      = (StringArray)ToArrowArray(inputString);
            ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column);
            var input = new DataFrameColumn[]
            {
                new Int32DataFrameColumn("Int", new List <int>()
                {
                    100
                }),
                ArrowStringDataFrameColumn
            };

            // Validate one-level chaining.
            DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2);

            ArrowTestUtils.AssertEquals(
                "outer1:name:100",
                chainedFunc1.Func(input, new[] { 0, 1 }));

            // Validate two-level chaining.
            DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3);

            ArrowTestUtils.AssertEquals(
                "outer2:outer1:name:100",
                chainedFunc2.Func(input, new[] { 0, 1 }));
        }
예제 #5
0
        public void TestInvalidChainingDataFrame()
        {
            var func1 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <PrimitiveDataFrameColumn <int>, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (numbers, strings) =>
            {
                var stringArray = (StringArray)ToArrowArray(
                    Enumerable.Range(0, (int)strings.Length)
                    .Select(i => $"{strings[i]}:{numbers[i]}")
                    .ToArray());
                return(ToArrowStringDataFrameColumn(stringArray));
            }).Execute);

            var func2 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (strings) =>
            {
                var stringArray = (StringArray)ToArrowArray(
                    Enumerable.Range(0, (int)strings.Length)
                    .Select(i => $"outer1:{strings[i]}")
                    .ToArray());
                return(ToArrowStringDataFrameColumn(stringArray));
            }).Execute);

            string[] inputString = { "name" };
            var      column      = (StringArray)ToArrowArray(inputString);
            ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column);
            var input = new DataFrameColumn[]
            {
                new PrimitiveDataFrameColumn <int>("Int", new List <int>()
                {
                    100
                }),
                ArrowStringDataFrameColumn
            };

            // The order does not align since workerFunction2 is executed first.
            DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func2, func1);

            Assert.ThrowsAny <Exception>(() => chainedFunc1.Func(input, new[] { 0, 1 }));
        }
예제 #6
0
        public void TestChainingDataFrameWorkerFunction()
        {
            var func1 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <PrimitiveDataFrameColumn <int>, ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (numbers, strings) =>
            {
                var stringColumn = (StringArray)ToArrowArray(
                    Enumerable.Range(0, (int)strings.Length)
                    .Select(i => $"{strings[i]}:{numbers[i]}")
                    .ToArray());
                return(ToArrowStringDataFrameColumn(stringColumn));
            }).Execute);

            var func2 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (strings) =>
            {
                var stringColumn = (StringArray)ToArrowArray(
                    Enumerable.Range(0, (int)strings.Length)
                    .Select(i => $"outer1:{strings[i]}")
                    .ToArray());
                return(ToArrowStringDataFrameColumn(stringColumn));
            }).Execute);

            var func3 = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, ArrowStringDataFrameColumn>(
                    (strings) =>
            {
                var stringColumn = (StringArray)ToArrowArray(
                    Enumerable.Range(0, (int)strings.Length)
                    .Select(i => $"outer2:{strings[(i)]}")
                    .ToArray());
                return(ToArrowStringDataFrameColumn(stringColumn));
            }).Execute);

            string[] inputString = { "name" };
            var      column      = (StringArray)ToArrowArray(inputString);
            ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(column);
            var input = new DataFrameColumn[]
            {
                new PrimitiveDataFrameColumn <int>("Int", new List <int>()
                {
                    100
                }),
                ArrowStringDataFrameColumn
            };

            // Validate one-level chaining.
            DataFrameWorkerFunction chainedFunc1 = DataFrameWorkerFunction.Chain(func1, func2);

            ArrowTestUtils.AssertEquals(
                "outer1:name:100",
                chainedFunc1.Func(input, new[] { 0, 1 }));

            // Validate two-level chaining.
            DataFrameWorkerFunction chainedFunc2 = DataFrameWorkerFunction.Chain(chainedFunc1, func3);

            ArrowTestUtils.AssertEquals(
                "outer2:outer1:name:100",
                chainedFunc2.Func(input, new[] { 0, 1 }));
        }