Example #1
0
        private static object boxLiteral(ExprNodeConstantDesc constantDesc,
                                         PredicateLeaf.Type type)
        {
            object lit = constantDesc.getValue();

            if (lit == null)
            {
                return(null);
            }
            switch (type)
            {
            case PredicateLeaf.Type.LONG:
                return(((Number)lit).longValue());

            case PredicateLeaf.Type.STRING:
                if (lit is HiveChar)
                {
                    return(((HiveChar)lit).getPaddedValue());
                }
                else if (lit is String)
                {
                    return(lit);
                }
                else
                {
                    return(lit.toString());
                }

            case PredicateLeaf.Type.FLOAT:
                if (lit is Float)
                {
                    // converting a float directly to a double causes annoying conversion
                    // problems
                    return(Double.parseDouble(lit.toString()));
                }
                else
                {
                    return(((Number)lit).doubleValue());
                }

            case PredicateLeaf.Type.TIMESTAMP:
                return(Timestamp.valueOf(lit.toString()));

            case PredicateLeaf.Type.DATE:
                return(Date.valueOf(lit.toString()));

            case PredicateLeaf.Type.DECIMAL:
                LOG.warn("boxing " + lit);
                return(new HiveDecimalWritable(lit.toString()));

            case PredicateLeaf.Type.BOOLEAN:
                return(lit);

            default:
                throw new ArgumentException("Unknown literal " + type);
            }
        }
 private static object boxLiteral(ExprNodeConstantDesc constantDesc,
                                  PredicateLeaf.Type type)
 {
     object lit = constantDesc.getValue();
     if (lit == null)
     {
         return null;
     }
     switch (type)
     {
         case PredicateLeaf.Type.LONG:
             return ((Number)lit).longValue();
         case PredicateLeaf.Type.STRING:
             if (lit is HiveChar)
             {
                 return ((HiveChar)lit).getPaddedValue();
             }
             else if (lit is String)
             {
                 return lit;
             }
             else
             {
                 return lit.toString();
             }
         case PredicateLeaf.Type.FLOAT:
             if (lit is Float)
             {
                 // converting a float directly to a double causes annoying conversion
                 // problems
                 return Double.parseDouble(lit.toString());
             }
             else
             {
                 return ((Number)lit).doubleValue();
             }
         case PredicateLeaf.Type.TIMESTAMP:
             return Timestamp.valueOf(lit.toString());
         case PredicateLeaf.Type.DATE:
             return Date.valueOf(lit.toString());
         case PredicateLeaf.Type.DECIMAL:
             LOG.warn("boxing " + lit);
             return new HiveDecimalWritable(lit.toString());
         case PredicateLeaf.Type.BOOLEAN:
             return lit;
         default:
             throw new ArgumentException("Unknown literal " + type);
     }
 }
        public void testSplitEliminationComplexExpr()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRow));

            using (Stream file = File.OpenWrite(testFilePath))
            using (Writer writer = OrcFile.createWriter(testFilePath, file, conf, inspector,
                100000, CompressionKind.NONE, 10000, 10000))
            {
                writeData(writer);
            }

            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), "1000");
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), "150000");
            InputFormat @in = new OrcInputFormat();
            FileInputFormat.setInputPaths(conf, testFilePath.ToString());

            // predicate expression: userid <= 100 and subtype <= 1000.0
            GenericUDF udf = new GenericUDFOPEqualOrLessThan();
            List<ExprNodeDesc> childExpr = new List<ExprNodeDesc>();
            ExprNodeColumnDesc col = new ExprNodeColumnDesc(typeof(long), "userid", "T", false);
            ExprNodeConstantDesc con = new ExprNodeConstantDesc(100);
            childExpr.Add(col);
            childExpr.Add(con);
            ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            GenericUDF udf1 = new GenericUDFOPEqualOrLessThan();
            List<ExprNodeDesc> childExpr1 = new List<ExprNodeDesc>();
            ExprNodeColumnDesc col1 = new ExprNodeColumnDesc(typeof(double), "subtype", "T", false);
            ExprNodeConstantDesc con1 = new ExprNodeConstantDesc(1000.0);
            childExpr1.Add(col1);
            childExpr1.Add(con1);
            ExprNodeGenericFuncDesc en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            GenericUDF udf2 = new GenericUDFOPAnd();
            List<ExprNodeDesc> childExpr2 = new List<ExprNodeDesc>();
            childExpr2.Add(en);
            childExpr2.Add(en1);
            ExprNodeGenericFuncDesc en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            string sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            InputSplit[] splits = @in.getSplits(conf, 1);
            Assert.Equal(2, splits.Length);

            con = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1 = new ExprNodeConstantDesc(0.0);
            childExpr1[1] = con1;
            en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // no stripe will satisfy the predicate
            Assert.Equal(0, splits.Length);

            con = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1 = new ExprNodeConstantDesc(1.0);
            childExpr1[1] = con1;
            en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // only first stripe will satisfy condition and hence single split
            Assert.Equal(1, splits.Length);

            udf = new GenericUDFOPEqual();
            con = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1 = new ExprNodeConstantDesc(80.0);
            childExpr1[1] = con1;
            en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // first two stripes will satisfy condition and hence single split
            Assert.Equal(2, splits.Length);

            udf = new GenericUDFOPEqual();
            con = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            udf1 = new GenericUDFOPEqual();
            con1 = new ExprNodeConstantDesc(80.0);
            childExpr1[1] = con1;
            en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // only second stripes will satisfy condition and hence single split
            Assert.Equal(1, splits.Length);
        }
        public void testSplitEliminationSmallMaxSplit()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRow));

            using (Stream file = File.OpenWrite(testFilePath))
            using (Writer writer = OrcFile.createWriter(testFilePath, file, conf, inspector,
                100000, CompressionKind.NONE, 10000, 10000))
            {
                writeData(writer);
            }
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), "1000");
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), "5000");
            InputFormat @in = new OrcInputFormat();
            FileInputFormat.setInputPaths(conf, testFilePath.ToString());

            GenericUDF udf = new GenericUDFOPEqualOrLessThan();
            List<ExprNodeDesc> childExpr = new List<ExprNodeDesc>();
            ExprNodeColumnDesc col = new ExprNodeColumnDesc(typeof(long), "userid", "T", false);
            ExprNodeConstantDesc con = new ExprNodeConstantDesc(100);
            childExpr.Add(col);
            childExpr.Add(con);
            ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            string sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            InputSplit[] splits = @in.getSplits(conf, 1);
            Assert.Equal(5, splits.Length);

            con = new ExprNodeConstantDesc(1);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(0, splits.Length);

            con = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(1, splits.Length);

            con = new ExprNodeConstantDesc(5);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(2, splits.Length);

            con = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(3, splits.Length);

            con = new ExprNodeConstantDesc(29);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(4, splits.Length);

            con = new ExprNodeConstantDesc(70);
            childExpr[1] = con;
            en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(5, splits.Length);
        }
Example #5
0
        public void testSplitEliminationSmallMaxSplit()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRow));

            using (Stream file = File.OpenWrite(testFilePath))
                using (Writer writer = OrcFile.createWriter(testFilePath, file, conf, inspector,
                                                            100000, CompressionKind.NONE, 10000, 10000))
                {
                    writeData(writer);
                }
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), "1000");
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), "5000");
            InputFormat @in = new OrcInputFormat();

            FileInputFormat.setInputPaths(conf, testFilePath.ToString());

            GenericUDF           udf       = new GenericUDFOPEqualOrLessThan();
            List <ExprNodeDesc>  childExpr = new List <ExprNodeDesc>();
            ExprNodeColumnDesc   col       = new ExprNodeColumnDesc(typeof(long), "userid", "T", false);
            ExprNodeConstantDesc con       = new ExprNodeConstantDesc(100);

            childExpr.Add(col);
            childExpr.Add(con);
            ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            string sargStr             = Utilities.serializeExpression(en);

            conf.set("hive.io.filter.expr.serialized", sargStr);
            InputSplit[] splits = @in.getSplits(conf, 1);
            Assert.Equal(5, splits.Length);

            con          = new ExprNodeConstantDesc(1);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(0, splits.Length);

            con          = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(1, splits.Length);

            con          = new ExprNodeConstantDesc(5);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(2, splits.Length);

            con          = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(3, splits.Length);

            con          = new ExprNodeConstantDesc(29);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(4, splits.Length);

            con          = new ExprNodeConstantDesc(70);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
            sargStr      = Utilities.serializeExpression(en);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            Assert.Equal(5, splits.Length);
        }
Example #6
0
        public void testSplitEliminationComplexExpr()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRow));

            using (Stream file = File.OpenWrite(testFilePath))
                using (Writer writer = OrcFile.createWriter(testFilePath, file, conf, inspector,
                                                            100000, CompressionKind.NONE, 10000, 10000))
                {
                    writeData(writer);
                }

            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), "1000");
            conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), "150000");
            InputFormat @in = new OrcInputFormat();

            FileInputFormat.setInputPaths(conf, testFilePath.ToString());

            // predicate expression: userid <= 100 and subtype <= 1000.0
            GenericUDF           udf       = new GenericUDFOPEqualOrLessThan();
            List <ExprNodeDesc>  childExpr = new List <ExprNodeDesc>();
            ExprNodeColumnDesc   col       = new ExprNodeColumnDesc(typeof(long), "userid", "T", false);
            ExprNodeConstantDesc con       = new ExprNodeConstantDesc(100);

            childExpr.Add(col);
            childExpr.Add(con);
            ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            GenericUDF           udf1       = new GenericUDFOPEqualOrLessThan();
            List <ExprNodeDesc>  childExpr1 = new List <ExprNodeDesc>();
            ExprNodeColumnDesc   col1       = new ExprNodeColumnDesc(typeof(double), "subtype", "T", false);
            ExprNodeConstantDesc con1       = new ExprNodeConstantDesc(1000.0);

            childExpr1.Add(col1);
            childExpr1.Add(con1);
            ExprNodeGenericFuncDesc en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            GenericUDF          udf2       = new GenericUDFOPAnd();
            List <ExprNodeDesc> childExpr2 = new List <ExprNodeDesc>();

            childExpr2.Add(en);
            childExpr2.Add(en1);
            ExprNodeGenericFuncDesc en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            string sargStr = Utilities.serializeExpression(en2);

            conf.set("hive.io.filter.expr.serialized", sargStr);
            InputSplit[] splits = @in.getSplits(conf, 1);
            Assert.Equal(2, splits.Length);

            con          = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1          = new ExprNodeConstantDesc(0.0);
            childExpr1[1] = con1;
            en1           = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2           = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // no stripe will satisfy the predicate
            Assert.Equal(0, splits.Length);

            con          = new ExprNodeConstantDesc(2);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1          = new ExprNodeConstantDesc(1.0);
            childExpr1[1] = con1;
            en1           = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2           = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // only first stripe will satisfy condition and hence single split
            Assert.Equal(1, splits.Length);

            udf          = new GenericUDFOPEqual();
            con          = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            con1          = new ExprNodeConstantDesc(80.0);
            childExpr1[1] = con1;
            en1           = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2           = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // first two stripes will satisfy condition and hence single split
            Assert.Equal(2, splits.Length);

            udf          = new GenericUDFOPEqual();
            con          = new ExprNodeConstantDesc(13);
            childExpr[1] = con;
            en           = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);

            udf1          = new GenericUDFOPEqual();
            con1          = new ExprNodeConstantDesc(80.0);
            childExpr1[1] = con1;
            en1           = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);

            childExpr2[0] = en;
            childExpr2[1] = en1;
            en2           = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);

            sargStr = Utilities.serializeExpression(en2);
            conf.set("hive.io.filter.expr.serialized", sargStr);
            splits = @in.getSplits(conf, 1);
            // only second stripes will satisfy condition and hence single split
            Assert.Equal(1, splits.Length);
        }