Пример #1
0
 private void addChildren(ExprNodeGenericFuncDesc node)
 {
     foreach (ExprNodeDesc child in node.getChildren())
     {
         parse(child);
     }
 }
Пример #2
0
        private static object[] getLiteralList(ExprNodeGenericFuncDesc expr,
                                               PredicateLeaf.Type type,
                                               int start)
        {
            List <ExprNodeDesc> children = expr.getChildren();

            object[] result = new object[children.Count - start];

            // ignore the first child, since it is the variable
            int posn = 0;

            foreach (ExprNodeDesc child in children.subList(start, children.Count))
            {
                if (child is ExprNodeConstantDesc)
                {
                    result[posn++] = boxLiteral((ExprNodeConstantDesc)child, type);
                }
                else
                {
                    // if we get some non-literals, we need to punt
                    return(null);
                }
            }
            return(result);
        }
Пример #3
0
        /**
         * Return the boxed literal at the given position
         * @param expr the parent node
         * @param type the type of the expression
         * @param position the child position to check
         * @return the boxed literal if found otherwise null
         */
        private static object getLiteral(ExprNodeGenericFuncDesc expr,
                                         PredicateLeaf.Type type,
                                         int position)
        {
            List <ExprNodeDesc> children = expr.getChildren();
            object child = children.get(position);

            if (child is ExprNodeConstantDesc)
            {
                return(boxLiteral((ExprNodeConstantDesc)child, type));
            }
            return(null);
        }
Пример #4
0
        /**
         * Get the column name referenced in the expression. It must be at the top
         * level of this expression and there must be exactly one column.
         * @param expr the expression to look in
         * @param variable the slot the variable is expected in
         * @return the column name or null if there isn't exactly one column
         */
        private static string getColumnName(ExprNodeGenericFuncDesc expr,
                                            int variable)
        {
            List <ExprNodeDesc> children = expr.getChildren();

            if (variable < 0 || variable >= children.Count)
            {
                return(null);
            }
            ExprNodeDesc child = children[variable];

            if (child is ExprNodeColumnDesc)
            {
                return(((ExprNodeColumnDesc)child).getColumn());
            }
            return(null);
        }
Пример #5
0
        /**
         * Find the child that is the literal.
         * @param expr the parent node to check
         * @param type the type of the expression
         * @return the literal boxed if found or null
         */
        private static object findLiteral(ExprNodeGenericFuncDesc expr,
                                          PredicateLeaf.Type type)
        {
            List <ExprNodeDesc> children = expr.getChildren();

            if (children.Count != 2)
            {
                return(null);
            }
            object result = null;

            foreach (ExprNodeDesc child in children)
            {
                if (child is ExprNodeConstantDesc)
                {
                    if (result != null)
                    {
                        return(null);
                    }
                    result = boxLiteral((ExprNodeConstantDesc)child, type);
                }
            }
            return(result);
        }
 public static SearchArgument create(ExprNodeGenericFuncDesc expression)
 {
     return new ConvertAstToSearchArg(expression).buildSearchArgument();
 }
Пример #7
0
        public void testSplitEliminationSmallMaxSplit()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRow));

            using (Stream file = File.OpenWrite(testFilePath))
                using (Writer writer = OrcFile.createWriter(testFilePath, file, conf, inspector,
                                                            100000, CompressionKind.NONE, 10000, 10000))
                {
                    writeData(writer);
                }
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), "1000");
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), "5000");
            InputFormat @in = new OrcInputFormat();

            FileInputFormat.setInputPaths(conf, testFilePath.ToString());

            GenericUDF           udf       = new GenericUDFOPEqualOrLessThan();
            List <ExprNodeDesc>  childExpr = new List <ExprNodeDesc>();
            ExprNodeColumnDesc   col       = new ExprNodeColumnDesc(typeof(long), "userid", "T", false);
            ExprNodeConstantDesc con       = new ExprNodeConstantDesc(100);

            childExpr.Add(col);
            childExpr.Add(con);
            ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            string sargStr             = Utilities.serializeExpression(en);

            conf.set("hive.io.filter.expr.serialized", sargStr);
            InputSplit[] splits = @in.getSplits(conf, 1);
            Assert.Equal(5, splits.Length);

            con          = new ExprNodeConstantDesc(1);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(0, splits.Length);

            con          = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(1, splits.Length);

            con          = new ExprNodeConstantDesc(5);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(2, splits.Length);

            con          = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(3, splits.Length);

            con          = new ExprNodeConstantDesc(29);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(4, splits.Length);

            con          = new ExprNodeConstantDesc(70);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(5, splits.Length);
        }
Пример #8
0
 public static SearchArgument create(ExprNodeGenericFuncDesc expression)
 {
     return(new ConvertAstToSearchArg(expression).buildSearchArgument());
 }
Пример #9
0
 /**
  * Builds the expression and leaf list from the original predicate.
  * @param expression the expression to translate.
  */
 ConvertAstToSearchArg(ExprNodeGenericFuncDesc expression)
 {
     parse(expression);
 }
        private static object[] getLiteralList(ExprNodeGenericFuncDesc expr,
                                               PredicateLeaf.Type type,
                                               int start)
        {
            List<ExprNodeDesc> children = expr.getChildren();
            object[] result = new object[children.Count - start];

            // ignore the first child, since it is the variable
            int posn = 0;
            foreach (ExprNodeDesc child in children.subList(start, children.Count))
            {
                if (child is ExprNodeConstantDesc)
                {
                    result[posn++] = boxLiteral((ExprNodeConstantDesc)child, type);
                }
                else
                {
                    // if we get some non-literals, we need to punt
                    return null;
                }
            }
            return result;
        }
 /**
  * Return the boxed literal at the given position
  * @param expr the parent node
  * @param type the type of the expression
  * @param position the child position to check
  * @return the boxed literal if found otherwise null
  */
 private static object getLiteral(ExprNodeGenericFuncDesc expr,
                                  PredicateLeaf.Type type,
                                  int position)
 {
     List<ExprNodeDesc> children = expr.getChildren();
     object child = children.get(position);
     if (child is ExprNodeConstantDesc)
     {
         return boxLiteral((ExprNodeConstantDesc)child, type);
     }
     return null;
 }
 /**
  * Get the column name referenced in the expression. It must be at the top
  * level of this expression and there must be exactly one column.
  * @param expr the expression to look in
  * @param variable the slot the variable is expected in
  * @return the column name or null if there isn't exactly one column
  */
 private static string getColumnName(ExprNodeGenericFuncDesc expr,
                                     int variable)
 {
     List<ExprNodeDesc> children = expr.getChildren();
     if (variable < 0 || variable >= children.Count)
     {
         return null;
     }
     ExprNodeDesc child = children[variable];
     if (child is ExprNodeColumnDesc)
     {
         return ((ExprNodeColumnDesc)child).getColumn();
     }
     return null;
 }
 /**
  * Find the child that is the literal.
  * @param expr the parent node to check
  * @param type the type of the expression
  * @return the literal boxed if found or null
  */
 private static object findLiteral(ExprNodeGenericFuncDesc expr,
                                   PredicateLeaf.Type type)
 {
     List<ExprNodeDesc> children = expr.getChildren();
     if (children.Count != 2)
     {
         return null;
     }
     object result = null;
     foreach (ExprNodeDesc child in children)
     {
         if (child is ExprNodeConstantDesc)
         {
             if (result != null)
             {
                 return null;
             }
             result = boxLiteral((ExprNodeConstantDesc)child, type);
         }
     }
     return result;
 }
        public void testSplitEliminationComplexExpr()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRow));

            using (Stream file = File.OpenWrite(testFilePath))
            using (Writer writer = OrcFile.createWriter(testFilePath, file, conf, inspector,
                100000, CompressionKind.NONE, 10000, 10000))
            {
                writeData(writer);
            }

            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), "1000");
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), "150000");
            InputFormat @in = new OrcInputFormat();
            FileInputFormat.setInputPaths(conf, testFilePath.ToString());

            // predicate expression: userid <= 100 and subtype <= 1000.0
            GenericUDF udf = new GenericUDFOPEqualOrLessThan();
            List<ExprNodeDesc> childExpr = new List<ExprNodeDesc>();
            ExprNodeColumnDesc col = new ExprNodeColumnDesc(typeof(long), "userid", "T", false);
            ExprNodeConstantDesc con = new ExprNodeConstantDesc(100);
            childExpr.Add(col);
            childExpr.Add(con);
            ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            GenericUDF udf1 = new GenericUDFOPEqualOrLessThan();
            List<ExprNodeDesc> childExpr1 = new List<ExprNodeDesc>();
            ExprNodeColumnDesc col1 = new ExprNodeColumnDesc(typeof(double), "subtype", "T", false);
            ExprNodeConstantDesc con1 = new ExprNodeConstantDesc(1000.0);
            childExpr1.Add(col1);
            childExpr1.Add(con1);
            ExprNodeGenericFuncDesc en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            GenericUDF udf2 = new GenericUDFOPAnd();
            List<ExprNodeDesc> childExpr2 = new List<ExprNodeDesc>();
            childExpr2.Add(en);
            childExpr2.Add(en1);
            ExprNodeGenericFuncDesc en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            string sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            InputSplit[] splits = @in.getSplits(conf, 1);
            Assert.Equal(2, splits.Length);

            con = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1 = new ExprNodeConstantDesc(0.0);
            childExpr1[1] = con1;
            en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // no stripe will satisfy the predicate
            Assert.Equal(0, splits.Length);

            con = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1 = new ExprNodeConstantDesc(1.0);
            childExpr1[1] = con1;
            en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // only first stripe will satisfy condition and hence single split
            Assert.Equal(1, splits.Length);

            udf = new GenericUDFOPEqual();
            con = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1 = new ExprNodeConstantDesc(80.0);
            childExpr1[1] = con1;
            en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // first two stripes will satisfy condition and hence single split
            Assert.Equal(2, splits.Length);

            udf = new GenericUDFOPEqual();
            con = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            udf1 = new GenericUDFOPEqual();
            con1 = new ExprNodeConstantDesc(80.0);
            childExpr1[1] = con1;
            en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // only second stripes will satisfy condition and hence single split
            Assert.Equal(1, splits.Length);
        }
        public void testSplitEliminationSmallMaxSplit()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRow));

            using (Stream file = File.OpenWrite(testFilePath))
            using (Writer writer = OrcFile.createWriter(testFilePath, file, conf, inspector,
                100000, CompressionKind.NONE, 10000, 10000))
            {
                writeData(writer);
            }
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), "1000");
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), "5000");
            InputFormat @in = new OrcInputFormat();
            FileInputFormat.setInputPaths(conf, testFilePath.ToString());

            GenericUDF udf = new GenericUDFOPEqualOrLessThan();
            List<ExprNodeDesc> childExpr = new List<ExprNodeDesc>();
            ExprNodeColumnDesc col = new ExprNodeColumnDesc(typeof(long), "userid", "T", false);
            ExprNodeConstantDesc con = new ExprNodeConstantDesc(100);
            childExpr.Add(col);
            childExpr.Add(con);
            ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            string sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            InputSplit[] splits = @in.getSplits(conf, 1);
            Assert.Equal(5, splits.Length);

            con = new ExprNodeConstantDesc(1);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(0, splits.Length);

            con = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(1, splits.Length);

            con = new ExprNodeConstantDesc(5);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(2, splits.Length);

            con = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(3, splits.Length);

            con = new ExprNodeConstantDesc(29);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(4, splits.Length);

            con = new ExprNodeConstantDesc(70);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(5, splits.Length);
        }
Пример #16
0
        private void createLeaf(PredicateLeaf.Operator @operator,
                                ExprNodeGenericFuncDesc expression,
                                int variable)
        {
            string columnName = getColumnName(expression, variable);

            if (columnName == null)
            {
                builder.literal(TruthValue.YES_NO_NULL);
                return;
            }
            PredicateLeaf.Type type = getType(expression.getChildren().get(variable));
            if (type == null)
            {
                builder.literal(TruthValue.YES_NO_NULL);
                return;
            }

            // if the variable was on the right, we need to swap things around
            bool needSwap = false;

            if (variable != 0)
            {
                if (@operator == PredicateLeaf.Operator.LESS_THAN)
                {
                    needSwap  = true;
                    @operator = PredicateLeaf.Operator.LESS_THAN_EQUALS;
                }
                else if (@operator == PredicateLeaf.Operator.LESS_THAN_EQUALS)
                {
                    needSwap  = true;
                    @operator = PredicateLeaf.Operator.LESS_THAN;
                }
            }
            if (needSwap)
            {
                builder.startNot();
            }

            switch (@operator)
            {
            case PredicateLeaf.Operator.IS_NULL:
                builder.isNull(columnName, type);
                break;

            case PredicateLeaf.Operator.EQUALS:
                builder.equals(columnName, type, findLiteral(expression, type));
                break;

            case PredicateLeaf.Operator.NULL_SAFE_EQUALS:
                builder.nullSafeEquals(columnName, type, findLiteral(expression, type));
                break;

            case PredicateLeaf.Operator.LESS_THAN:
                builder.lessThan(columnName, type, findLiteral(expression, type));
                break;

            case PredicateLeaf.Operator.LESS_THAN_EQUALS:
                builder.lessThanEquals(columnName, type, findLiteral(expression, type));
                break;

            case PredicateLeaf.Operator.IN:
                builder.@in(columnName, type,
                            getLiteralList(expression, type, variable + 1));
                break;

            case PredicateLeaf.Operator.BETWEEN:
                builder.between(columnName, type,
                                getLiteral(expression, type, variable + 1),
                                getLiteral(expression, type, variable + 2));
                break;
            }

            if (needSwap)
            {
                builder.end();
            }
        }
 private void addChildren(ExprNodeGenericFuncDesc node)
 {
     foreach (ExprNodeDesc child in node.getChildren())
     {
         parse(child);
     }
 }
Пример #18
0
 /**
  * Create a leaf expression when we aren't sure where the variable is
  * located.
  * @param operator the operator type that was found
  * @param expression the expression to check
  */
 private void createLeaf(PredicateLeaf.Operator @operator,
                         ExprNodeGenericFuncDesc expression)
 {
     createLeaf(@operator, expression, findVariable(expression));
 }
        private void createLeaf(PredicateLeaf.Operator @operator,
                                ExprNodeGenericFuncDesc expression,
                                int variable)
        {
            string columnName = getColumnName(expression, variable);
            if (columnName == null)
            {
                builder.literal(TruthValue.YES_NO_NULL);
                return;
            }
            PredicateLeaf.Type type = getType(expression.getChildren().get(variable));
            if (type == null)
            {
                builder.literal(TruthValue.YES_NO_NULL);
                return;
            }

            // if the variable was on the right, we need to swap things around
            bool needSwap = false;
            if (variable != 0)
            {
                if (@operator == PredicateLeaf.Operator.LESS_THAN)
                {
                    needSwap = true;
                    @operator = PredicateLeaf.Operator.LESS_THAN_EQUALS;
                }
                else if (@operator == PredicateLeaf.Operator.LESS_THAN_EQUALS)
                {
                    needSwap = true;
                    @operator = PredicateLeaf.Operator.LESS_THAN;
                }
            }
            if (needSwap)
            {
                builder.startNot();
            }

            switch (@operator)
            {
                case PredicateLeaf.Operator.IS_NULL:
                    builder.isNull(columnName, type);
                    break;
                case PredicateLeaf.Operator.EQUALS:
                    builder.equals(columnName, type, findLiteral(expression, type));
                    break;
                case PredicateLeaf.Operator.NULL_SAFE_EQUALS:
                    builder.nullSafeEquals(columnName, type, findLiteral(expression, type));
                    break;
                case PredicateLeaf.Operator.LESS_THAN:
                    builder.lessThan(columnName, type, findLiteral(expression, type));
                    break;
                case PredicateLeaf.Operator.LESS_THAN_EQUALS:
                    builder.lessThanEquals(columnName, type, findLiteral(expression, type));
                    break;
                case PredicateLeaf.Operator.IN:
                    builder.@in(columnName, type,
                        getLiteralList(expression, type, variable + 1));
                    break;
                case PredicateLeaf.Operator.BETWEEN:
                    builder.between(columnName, type,
                        getLiteral(expression, type, variable + 1),
                        getLiteral(expression, type, variable + 2));
                    break;
            }

            if (needSwap)
            {
                builder.end();
            }
        }
Пример #20
0
        /**
         * Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree.
         * @param expression the Hive ExprNodeDesc
         */
        private void parse(ExprNodeDesc expression)
        {
            // Most of the stuff we can handle are generic function descriptions, so
            // handle the special cases.
            if (expression.GetType() != typeof(ExprNodeGenericFuncDesc))
            {
                // if it is a reference to a boolean column, covert it to a truth test.
                if (expression is ExprNodeColumnDesc)
                {
                    ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc)expression;
                    if (columnDesc.getTypeString().Equals("boolean"))
                    {
                        builder.equals(columnDesc.getColumn(), PredicateLeaf.Type.BOOLEAN,
                                       true);
                        return;
                    }
                }

                // otherwise, we don't know what to do so make it a maybe
                builder.literal(TruthValue.YES_NO_NULL);
                return;
            }

            // get the kind of expression
            ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc)expression;
            Type op = expr.getGenericUDF().GetType();

            // handle the logical operators
            if (op == typeof(GenericUDFOPOr))
            {
                builder.startOr();
                addChildren(expr);
                builder.end();
            }
            else if (op == typeof(GenericUDFOPAnd))
            {
                builder.startAnd();
                addChildren(expr);
                builder.end();
            }
            else if (op == typeof(GenericUDFOPNot))
            {
                builder.startNot();
                addChildren(expr);
                builder.end();
            }
            else if (op == typeof(GenericUDFOPEqual))
            {
                createLeaf(PredicateLeaf.Operator.EQUALS, expr);
            }
            else if (op == typeof(GenericUDFOPNotEqual))
            {
                builder.startNot();
                createLeaf(PredicateLeaf.Operator.EQUALS, expr);
                builder.end();
            }
            else if (op == typeof(GenericUDFOPEqualNS))
            {
                createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr);
            }
            else if (op == typeof(GenericUDFOPGreaterThan))
            {
                builder.startNot();
                createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
                builder.end();
            }
            else if (op == typeof(GenericUDFOPEqualOrGreaterThan))
            {
                builder.startNot();
                createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
                builder.end();
            }
            else if (op == typeof(GenericUDFOPLessThan))
            {
                createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
            }
            else if (op == typeof(GenericUDFOPEqualOrLessThan))
            {
                createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
            }
            else if (op == typeof(GenericUDFIn))
            {
                createLeaf(PredicateLeaf.Operator.IN, expr, 0);
            }
            else if (op == typeof(GenericUDFBetween))
            {
                createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1);
            }
            else if (op == typeof(GenericUDFOPNull))
            {
                createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
            }
            else if (op == typeof(GenericUDFOPNotNull))
            {
                builder.startNot();
                createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
                builder.end();

                // otherwise, we didn't understand it, so mark it maybe
            }
            else
            {
                builder.literal(TruthValue.YES_NO_NULL);
            }
        }
 /**
  * Create a leaf expression when we aren't sure where the variable is
  * located.
  * @param operator the operator type that was found
  * @param expression the expression to check
  */
 private void createLeaf(PredicateLeaf.Operator @operator,
                         ExprNodeGenericFuncDesc expression)
 {
     createLeaf(@operator, expression, findVariable(expression));
 }
 /**
  * Builds the expression and leaf list from the original predicate.
  * @param expression the expression to translate.
  */
 ConvertAstToSearchArg(ExprNodeGenericFuncDesc expression)
 {
     parse(expression);
 }
Пример #23
0
        public void testSplitEliminationComplexExpr()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRow));

            using (Stream file = File.OpenWrite(testFilePath))
                using (Writer writer = OrcFile.createWriter(testFilePath, file, conf, inspector,
                                                            100000, CompressionKind.NONE, 10000, 10000))
                {
                    writeData(writer);
                }

            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), "1000");
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), "150000");
            InputFormat @in = new OrcInputFormat();

            FileInputFormat.setInputPaths(conf, testFilePath.ToString());

            // predicate expression: userid <= 100 and subtype <= 1000.0
            GenericUDF           udf       = new GenericUDFOPEqualOrLessThan();
            List <ExprNodeDesc>  childExpr = new List <ExprNodeDesc>();
            ExprNodeColumnDesc   col       = new ExprNodeColumnDesc(typeof(long), "userid", "T", false);
            ExprNodeConstantDesc con       = new ExprNodeConstantDesc(100);

            childExpr.Add(col);
            childExpr.Add(con);
            ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            GenericUDF           udf1       = new GenericUDFOPEqualOrLessThan();
            List <ExprNodeDesc>  childExpr1 = new List <ExprNodeDesc>();
            ExprNodeColumnDesc   col1       = new ExprNodeColumnDesc(typeof(double), "subtype", "T", false);
            ExprNodeConstantDesc con1       = new ExprNodeConstantDesc(1000.0);

            childExpr1.Add(col1);
            childExpr1.Add(con1);
            ExprNodeGenericFuncDesc en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            GenericUDF          udf2       = new GenericUDFOPAnd();
            List <ExprNodeDesc> childExpr2 = new List <ExprNodeDesc>();

            childExpr2.Add(en);
            childExpr2.Add(en1);
            ExprNodeGenericFuncDesc en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            string sargStr = Utilities.serializeExpression(en2);

            conf.set("hive.io.filter.expr.serialized", sargStr);
            InputSplit[] splits = @in.getSplits(conf, 1);
            Assert.Equal(2, splits.Length);

            con          = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1          = new ExprNodeConstantDesc(0.0);
            childExpr1[1] = con1;
            en1           = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2           = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // no stripe will satisfy the predicate
            Assert.Equal(0, splits.Length);

            con          = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1          = new ExprNodeConstantDesc(1.0);
            childExpr1[1] = con1;
            en1           = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2           = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // only first stripe will satisfy condition and hence single split
            Assert.Equal(1, splits.Length);

            udf          = new GenericUDFOPEqual();
            con          = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1          = new ExprNodeConstantDesc(80.0);
            childExpr1[1] = con1;
            en1           = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2           = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // first two stripes will satisfy condition and hence single split
            Assert.Equal(2, splits.Length);

            udf          = new GenericUDFOPEqual();
            con          = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            udf1          = new GenericUDFOPEqual();
            con1          = new ExprNodeConstantDesc(80.0);
            childExpr1[1] = con1;
            en1           = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2           = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // only second stripes will satisfy condition and hence single split
            Assert.Equal(1, splits.Length);
        }