public void testHasNull()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(SimpleStruct));

            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                                                            .inspector(inspector)
                                                            .rowIndexStride(1000)
                                                            .stripeSize(10000)
                                                            .bufferSize(10000)))
                {
                    // STRIPE 1
                    // RG1
                    for (int i = 0; i < 1000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), "RG1"));
                    }
                    // RG2
                    for (int i = 0; i < 1000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), null));
                    }
                    // RG3
                    for (int i = 0; i < 1000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), "RG3"));
                    }
                    // RG4
                    for (int i = 0; i < 1000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), null));
                    }
                    // RG5
                    for (int i = 0; i < 1000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), null));
                    }
                    // STRIPE 2
                    for (int i = 0; i < 5000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), null));
                    }
                    // STRIPE 3
                    for (int i = 0; i < 5000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), "STRIPE-3"));
                    }
                    // STRIPE 4
                    for (int i = 0; i < 5000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), null));
                    }
                }

            Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf));

            // check the file level stats
            ColumnStatistics[] stats = reader.getStatistics();
            Assert.Equal(20000, stats[0].getNumberOfValues());
            Assert.Equal(20000, stats[1].getNumberOfValues());
            Assert.Equal(7000, stats[2].getNumberOfValues());
            Assert.Equal(false, stats[0].hasNull());
            Assert.Equal(false, stats[1].hasNull());
            Assert.Equal(true, stats[2].hasNull());

            // check the stripe level stats
            List <StripeStatistics> stripeStats = reader.getStripeStatistics();
            // stripe 1 stats
            StripeStatistics ss1     = stripeStats[0];
            ColumnStatistics ss1_cs1 = ss1.getColumnStatistics()[0];
            ColumnStatistics ss1_cs2 = ss1.getColumnStatistics()[1];
            ColumnStatistics ss1_cs3 = ss1.getColumnStatistics()[2];

            Assert.Equal(false, ss1_cs1.hasNull());
            Assert.Equal(false, ss1_cs2.hasNull());
            Assert.Equal(true, ss1_cs3.hasNull());

            // stripe 2 stats
            StripeStatistics ss2     = stripeStats[1];
            ColumnStatistics ss2_cs1 = ss2.getColumnStatistics()[0];
            ColumnStatistics ss2_cs2 = ss2.getColumnStatistics()[1];
            ColumnStatistics ss2_cs3 = ss2.getColumnStatistics()[2];

            Assert.Equal(false, ss2_cs1.hasNull());
            Assert.Equal(false, ss2_cs2.hasNull());
            Assert.Equal(true, ss2_cs3.hasNull());

            // stripe 3 stats
            StripeStatistics ss3     = stripeStats[2];
            ColumnStatistics ss3_cs1 = ss3.getColumnStatistics()[0];
            ColumnStatistics ss3_cs2 = ss3.getColumnStatistics()[1];
            ColumnStatistics ss3_cs3 = ss3.getColumnStatistics()[2];

            Assert.Equal(false, ss3_cs1.hasNull());
            Assert.Equal(false, ss3_cs2.hasNull());
            Assert.Equal(false, ss3_cs3.hasNull());

            // stripe 4 stats
            StripeStatistics ss4     = stripeStats[3];
            ColumnStatistics ss4_cs1 = ss4.getColumnStatistics()[0];
            ColumnStatistics ss4_cs2 = ss4.getColumnStatistics()[1];
            ColumnStatistics ss4_cs3 = ss4.getColumnStatistics()[2];

            Assert.Equal(false, ss4_cs1.hasNull());
            Assert.Equal(false, ss4_cs2.hasNull());
            Assert.Equal(true, ss4_cs3.hasNull());

#if false
            // Test file dump
            TextWriter       origOut        = System.Console.Out;
            string           outputFilename = "orc-file-has-null.out";
            FileOutputStream myOut          = new FileOutputStream(workDir + File.separator + outputFilename);

            // replace stdout and run command
            System.Console.SetOut(new StreamWriter(myOut));
            FileDump.main(new String[] { testFilePath.toString(), "--rowindex=2" });
            System.Console.Out.Flush();
            System.SetOut(origOut);

            TestFileDump.checkOutput(outputFilename, workDir + File.separator + outputFilename);
#endif
        }
Exemplo n.º 2
0
        private static void writeColumnStatistics(JsonWriter writer, ColumnStatistics cs)
        {
            if (cs != null)
            {
                writer.key("count").value(cs.getNumberOfValues());
                writer.key("hasNull").value(cs.hasNull());
                if (cs is BinaryColumnStatistics)
                {
                    writer.key("totalLength").value(((BinaryColumnStatistics)cs).getSum());
                    writer.key("type").value(OrcProto.Type.Types.Kind.BINARY.ToString());
                }
                else if (cs is BooleanColumnStatistics)
                {
                    writer.key("trueCount").value(((BooleanColumnStatistics)cs).getTrueCount());
                    writer.key("falseCount").value(((BooleanColumnStatistics)cs).getFalseCount());
                    writer.key("type").value(OrcProto.Type.Types.Kind.BOOLEAN.ToString());
                }
                else if (cs is IntegerColumnStatistics)
                {
                    writer.key("min").value(((IntegerColumnStatistics)cs).getMinimum());
                    writer.key("max").value(((IntegerColumnStatistics)cs).getMaximum());
                    if (((IntegerColumnStatistics)cs).isSumDefined())
                    {
                        writer.key("sum").value(((IntegerColumnStatistics)cs).getSum());
                    }
                    writer.key("type").value(OrcProto.Type.Types.Kind.LONG.ToString());
                }
                else if (cs is DoubleColumnStatistics)
                {
                    writer.key("min").value(((DoubleColumnStatistics)cs).getMinimum());
                    writer.key("max").value(((DoubleColumnStatistics)cs).getMaximum());
                    writer.key("sum").value(((DoubleColumnStatistics)cs).getSum());
                    writer.key("type").value(OrcProto.Type.Types.Kind.DOUBLE.ToString());
                }
                else if (cs is StringColumnStatistics)
                {
                    writer.key("min").value(((StringColumnStatistics)cs).getMinimum());
                    writer.key("max").value(((StringColumnStatistics)cs).getMaximum());
                    writer.key("totalLength").value(((StringColumnStatistics)cs).getSum());
                    writer.key("type").value(OrcProto.Type.Types.Kind.STRING.ToString());
                }
                else if (cs is DateColumnStatistics)
                {
                    if (((DateColumnStatistics)cs).getMaximum() != null)
                    {
#if false
                        writer.key("min").value(((DateColumnStatistics)cs).getMinimum());
                        writer.key("max").value(((DateColumnStatistics)cs).getMaximum());
#endif
                    }
                    writer.key("type").value(OrcProto.Type.Types.Kind.DATE.ToString());
                }
                else if (cs is TimestampColumnStatistics)
                {
                    if (((TimestampColumnStatistics)cs).getMaximum() != null)
                    {
#if false
                        writer.key("min").value(((TimestampColumnStatistics)cs).getMinimum());
                        writer.key("max").value(((TimestampColumnStatistics)cs).getMaximum());
#endif
                    }
                    writer.key("type").value(OrcProto.Type.Types.Kind.TIMESTAMP.ToString());
                }
                else if (cs is DecimalColumnStatistics)
                {
                    if (((DecimalColumnStatistics)cs).getMaximum() != null)
                    {
#if false
                        writer.key("min").value(((DecimalColumnStatistics)cs).getMinimum());
                        writer.key("max").value(((DecimalColumnStatistics)cs).getMaximum());
                        writer.key("sum").value(((DecimalColumnStatistics)cs).getSum());
#endif
                    }
                    writer.key("type").value(OrcProto.Type.Types.Kind.DECIMAL.ToString());
                }
            }
        }
Exemplo n.º 3
0
 private static void writeColumnStatistics(JsonWriter writer, ColumnStatistics cs)
 {
     if (cs != null)
     {
         writer.key("count").value(cs.getNumberOfValues());
         writer.key("hasNull").value(cs.hasNull());
         if (cs is BinaryColumnStatistics) {
             writer.key("totalLength").value(((BinaryColumnStatistics)cs).getSum());
             writer.key("type").value(OrcProto.Type.Types.Kind.BINARY.ToString());
         } else if (cs is BooleanColumnStatistics) {
             writer.key("trueCount").value(((BooleanColumnStatistics)cs).getTrueCount());
             writer.key("falseCount").value(((BooleanColumnStatistics)cs).getFalseCount());
             writer.key("type").value(OrcProto.Type.Types.Kind.BOOLEAN.ToString());
         } else if (cs is IntegerColumnStatistics) {
             writer.key("min").value(((IntegerColumnStatistics)cs).getMinimum());
             writer.key("max").value(((IntegerColumnStatistics)cs).getMaximum());
             if (((IntegerColumnStatistics)cs).isSumDefined())
             {
                 writer.key("sum").value(((IntegerColumnStatistics)cs).getSum());
             }
             writer.key("type").value(OrcProto.Type.Types.Kind.LONG.ToString());
         } else if (cs is DoubleColumnStatistics) {
             writer.key("min").value(((DoubleColumnStatistics)cs).getMinimum());
             writer.key("max").value(((DoubleColumnStatistics)cs).getMaximum());
             writer.key("sum").value(((DoubleColumnStatistics)cs).getSum());
             writer.key("type").value(OrcProto.Type.Types.Kind.DOUBLE.ToString());
         } else if (cs is StringColumnStatistics) {
             writer.key("min").value(((StringColumnStatistics)cs).getMinimum());
             writer.key("max").value(((StringColumnStatistics)cs).getMaximum());
             writer.key("totalLength").value(((StringColumnStatistics)cs).getSum());
             writer.key("type").value(OrcProto.Type.Types.Kind.STRING.ToString());
         } else if (cs is DateColumnStatistics) {
             if (((DateColumnStatistics)cs).getMaximum() != null)
             {
     #if false
                 writer.key("min").value(((DateColumnStatistics)cs).getMinimum());
                 writer.key("max").value(((DateColumnStatistics)cs).getMaximum());
     #endif
             }
             writer.key("type").value(OrcProto.Type.Types.Kind.DATE.ToString());
         } else if (cs is TimestampColumnStatistics) {
             if (((TimestampColumnStatistics)cs).getMaximum() != null)
             {
     #if false
                 writer.key("min").value(((TimestampColumnStatistics)cs).getMinimum());
                 writer.key("max").value(((TimestampColumnStatistics)cs).getMaximum());
     #endif
             }
             writer.key("type").value(OrcProto.Type.Types.Kind.TIMESTAMP.ToString());
         } else if (cs is DecimalColumnStatistics) {
             if (((DecimalColumnStatistics)cs).getMaximum() != null)
             {
     #if false
                 writer.key("min").value(((DecimalColumnStatistics)cs).getMinimum());
                 writer.key("max").value(((DecimalColumnStatistics)cs).getMaximum());
                 writer.key("sum").value(((DecimalColumnStatistics)cs).getSum());
     #endif
             }
             writer.key("type").value(OrcProto.Type.Types.Kind.DECIMAL.ToString());
         }
     }
 }