public void testStruct()
        {
            OrcStruct st1 = new OrcStruct(4);
            OrcStruct st2 = new OrcStruct(4);
            OrcStruct st3 = new OrcStruct(3);

            st1.setFieldValue(0, "hop");
            st1.setFieldValue(1, "on");
            st1.setFieldValue(2, "pop");
            st1.setFieldValue(3, 42);
            Assert.Equal(false, st1.Equals(null));
            st2.setFieldValue(0, "hop");
            st2.setFieldValue(1, "on");
            st2.setFieldValue(2, "pop");
            st2.setFieldValue(3, 42);
            Assert.Equal(st1, st2);
            st3.setFieldValue(0, "hop");
            st3.setFieldValue(1, "on");
            st3.setFieldValue(2, "pop");
            Assert.Equal(false, st1.Equals(st3));
#if PREDICTABLE_STRING_HASH
            Assert.Equal(11241, st1.GetHashCode());
#endif
            Assert.Equal(st1.GetHashCode(), st2.GetHashCode());
#if PREDICTABLE_STRING_HASH
            Assert.Equal(11204, st3.GetHashCode());
#endif
            Assert.Equal("{hop, on, pop, 42}", st1.ToString());
            st1.setFieldValue(3, null);
            Assert.Equal(false, st1.Equals(st2));
            Assert.Equal(false, st2.Equals(st1));
            st2.setFieldValue(3, null);
            Assert.Equal(st1, st2);
        }
 VectorizedOrcAcidRowReader(AcidInputFormat.RowReader <OrcStruct> inner,
                            Configuration conf,
                            FileSplit split)
 {
     this.innerReader     = inner;
     this.key             = inner.createKey();
     this.rowBatchCtx     = new VectorizedRowBatchCtx();
     this.value           = inner.createValue();
     this.objectInspector = inner.getObjectInspector();
     try
     {
         rowBatchCtx.init(conf, split);
     }
     catch (ClassNotFoundException e)
     {
         throw new IOException("Failed to initialize context", e);
     }
     catch (SerDeException e)
     {
         throw new IOException("Failed to initialize context", e);
     }
     catch (InstantiationException e)
     {
         throw new IOException("Failed to initialize context", e);
     }
     catch (IllegalAccessException e)
     {
         throw new IOException("Failed to initialize context", e);
     }
     catch (HiveException e)
     {
         throw new IOException("Failed to initialize context", e);
     }
 }
        public void testInspectorFromTypeInfo()
        {
            TypeInfo typeInfo =
                TypeInfoUtils.getTypeInfoFromTypeString("struct<c1:boolean,c2:tinyint" +
                                                        ",c3:smallint,c4:int,c5:bigint,c6:float,c7:double,c8:binary," +
                                                        "c9:string,c10:struct<c1:int>,c11:map<int,int>,c12:uniontype<int>" +
                                                        ",c13:array<timestamp>>");
            StructObjectInspector inspector = (StructObjectInspector)
                                              OrcStruct.createObjectInspector(typeInfo);

            Assert.Equal("struct<c1:boolean,c2:tinyint,c3:smallint,c4:int,c5:" +
                         "bigint,c6:float,c7:double,c8:binary,c9:string,c10:struct<" +
                         "c1:int>,c11:map<int,int>,c12:uniontype<int>,c13:array<timestamp>>",
                         inspector.getTypeName());
            Assert.Equal(null,
                         inspector.getAllStructFieldRefs()[0].getFieldComment());
            Assert.Equal(null, inspector.getStructFieldRef("UNKNOWN"));
            OrcStruct s1 = new OrcStruct(13);

            for (int i = 0; i < 13; ++i)
            {
                s1.setFieldValue(i, i);
            }

            List <object> list = new List <object> {
                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
            };

            Assert.Equal(list, inspector.getStructFieldsDataAsList(s1));
            ListObjectInspector listOI = (ListObjectInspector)
                                         inspector.getAllStructFieldRefs()[12].getFieldObjectInspector();

            Assert.Equal(ObjectInspectorCategory.LIST, listOI.getCategory());
            Assert.Equal(10, listOI.getListElement(list, 10));
            Assert.Equal(null, listOI.getListElement(list, -1));
            Assert.Equal(null, listOI.getListElement(list, 13));
            Assert.Equal(13, listOI.getListLength(list));

            Dictionary <object, object> map = new Dictionary <object, object>()
            {
                { 1, 2 },
                { 2, 4 },
                { 3, 6 },
            };
            MapObjectInspector mapOI = (MapObjectInspector)
                                       inspector.getAllStructFieldRefs()[10].getFieldObjectInspector();

            Assert.Equal(3, mapOI.getMapSize(map));
            Assert.Equal(4, mapOI.getMapValueElement(map, 2));
        }
 private static void setRow(OrcStruct event,
        private void checkVectorizedReader()
        {
            Reader vreader = OrcFile.createReader(TestFilePath,
                                                  OrcFile.readerOptions(conf));
            Reader reader = OrcFile.createReader(TestFilePath,
                                                 OrcFile.readerOptions(conf));

            using (RecordReaderImpl vrr = (RecordReaderImpl)vreader.rows())
                using (RecordReaderImpl rr = (RecordReaderImpl)reader.rows())
                {
                    VectorizedRowBatch batch = null;

                    // Check Vectorized ORC reader against ORC row reader
                    while (vrr.hasNext())
                    {
                        batch = vrr.nextBatch(batch);
                        for (int i = 0; i < batch.size; i++)
                        {
                            OrcStruct row = (OrcStruct)rr.next();
                            for (int j = 0; j < batch.cols.Length; j++)
                            {
                                object       a  = (row.getFieldValue(j));
                                ColumnVector cv = batch.cols[j];
                                // if the value is repeating, use row 0
                                int rowId = cv.isRepeating ? 0 : i;

                                // make sure the null flag agrees
                                if (a == null)
                                {
                                    Assert.True(!cv.noNulls && cv.isNull[rowId]);
                                }
                                else if (a is bool)
                                {
                                    // bool values are stores a 1's and 0's, so convert and compare
                                    long temp = (bool)a ? 1 : 0;
                                    long b    = ((LongColumnVector)cv).vector[rowId];
                                    Assert.Equal(temp.ToString(), b.ToString());
                                }
                                else if (a is Timestamp)
                                {
                                    // Timestamps are stored as long, so convert and compare
                                    Timestamp t = (Timestamp)a;
                                    // Timestamp.getTime() is overriden and is
                                    // long time = super.getTime();
                                    // return (time + (nanos / 1000000));
                                    long timeInNanoSec = (t.Milliseconds * 1000000)
                                                         + (t.getNanos() % 1000000);
                                    long b = ((LongColumnVector)cv).vector[rowId];
                                    Assert.Equal(timeInNanoSec.ToString(), b.ToString());
                                }
                                else if (a is Date)
                                {
                                    // Dates are stored as long, so convert and compare

                                    Date adt = (Date)a;
                                    long b   = ((LongColumnVector)cv).vector[rowId];
                                    // Assert.Equal(adt, Date.daysToMillis((int)b));
                                    Assert.Equal(adt.Days, (int)b);
                                }
                                else if (a is HiveDecimal)
                                {
                                    // Decimals are stored as BigInteger, so convert and compare
                                    HiveDecimal dec = (HiveDecimal)a;
                                    HiveDecimal b   = ((DecimalColumnVector)cv).vector[i];
                                    Assert.Equal(dec, b);
                                }
                                else if (a is double)
                                {
                                    double b = ((DoubleColumnVector)cv).vector[rowId];
                                    Assert.Equal(a.ToString(), b.ToString());
                                }
                                else if (a is string)
                                {
                                    BytesColumnVector bcv = (BytesColumnVector)cv;
                                    string            b   = Encoding.UTF8.GetString(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]);
                                    Assert.Equal((string)a, b);
                                }
                                else if (a is int || a is long || a is sbyte || a is short)
                                {
                                    Assert.Equal(a.ToString(),
                                                 ((LongColumnVector)cv).vector[rowId].ToString());
                                }
                                else
                                {
                                    Assert.True(false);
                                }
                            }
                        }

                        // Check repeating
                        Assert.Equal(false, batch.cols[0].isRepeating);
                        Assert.Equal(false, batch.cols[1].isRepeating);
                        Assert.Equal(false, batch.cols[2].isRepeating);
                        Assert.Equal(true, batch.cols[3].isRepeating);
                        Assert.Equal(false, batch.cols[4].isRepeating);
                        Assert.Equal(false, batch.cols[5].isRepeating);
                        Assert.Equal(false, batch.cols[6].isRepeating);
                        Assert.Equal(false, batch.cols[7].isRepeating);
                        Assert.Equal(false, batch.cols[8].isRepeating);
                        Assert.Equal(false, batch.cols[9].isRepeating);

                        // Check non null
                        Assert.Equal(false, batch.cols[0].noNulls);
                        Assert.Equal(false, batch.cols[1].noNulls);
                        Assert.Equal(true, batch.cols[2].noNulls);
                        Assert.Equal(true, batch.cols[3].noNulls);
                        Assert.Equal(false, batch.cols[4].noNulls);
                        Assert.Equal(false, batch.cols[5].noNulls);
                        Assert.Equal(false, batch.cols[6].noNulls);
                        Assert.Equal(false, batch.cols[7].noNulls);
                        Assert.Equal(false, batch.cols[8].noNulls);
                        Assert.Equal(false, batch.cols[9].noNulls);
                    }
                    Assert.Equal(false, rr.hasNext());
                }
        }
        public void testMultiStripeWithNull()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyStruct));

            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                                                            .inspector(inspector)
                                                            .stripeSize(100000)
                                                            .compress(CompressionKind.NONE)
                                                            .bufferSize(10000)))
                {
                    Random rand = new Random(100);
                    writer.addRow(new MyStruct(null, null, true, new List <InnerStruct> {
                        new InnerStruct(100)
                    }));
                    for (int i = 2; i < 20000; i++)
                    {
                        writer.addRow(new MyStruct(rand.Next(1), "a", true, new List <InnerStruct> {
                            new InnerStruct(100)
                        }));
                    }
                    writer.addRow(new MyStruct(null, null, true, new List <InnerStruct> {
                        new InnerStruct(100)
                    }));
                }

            Reader reader = OrcFile.createReader(TestFilePath,
                                                 OrcFile.readerOptions(conf));

            // check the stats
            ColumnStatistics[] stats = reader.getStatistics();
            Assert.Equal(20000, reader.getNumberOfRows());
            Assert.Equal(20000, stats[0].getNumberOfValues());

            Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getMaximum());
            Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getMinimum());
            Assert.Equal(true, ((IntegerColumnStatistics)stats[1]).isSumDefined());
            Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getSum());
            Assert.Equal("count: 19998 hasNull: True min: 0 max: 0 sum: 0",
                         stats[1].ToString());

            Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMaximum());
            Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMinimum());
            Assert.Equal(19998, stats[2].getNumberOfValues());
            Assert.Equal("count: 19998 hasNull: True min: a max: a sum: 19998",
                         stats[2].ToString());

            // check the inspectors
            StructObjectInspector readerInspector =
                (StructObjectInspector)reader.getObjectInspector();

            Assert.Equal(ObjectInspectorCategory.STRUCT,
                         readerInspector.getCategory());
            Assert.Equal("struct<a:int,b:string,c:boolean,list:array<struct<z:int>>>",
                         readerInspector.getTypeName());

            using (RecordReader rows = reader.rows())
            {
                List <bool> expected = new List <bool>();
                foreach (StripeInformation sinfo in reader.getStripes())
                {
                    expected.Add(false);
                }
                // only the first and last stripe will have PRESENT stream
                expected[0] = true;
                expected[expected.Count - 1] = true;

                List <bool> got = new List <bool>();
                // check if the strip footer contains PRESENT stream
                foreach (StripeInformation sinfo in reader.getStripes())
                {
                    OrcProto.StripeFooter sf =
                        ((RecordReaderImpl)rows).readStripeFooter(sinfo);
                    got.Add(sf.ToString().IndexOf(OrcProto.Stream.Types.Kind.PRESENT.ToString()) != -1);
                }
                Assert.Equal(expected, got);

                // row 1
                OrcStruct row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.Null(row.getFieldValue(0));
                Assert.Null(row.getFieldValue(1));
                Assert.Equal(true, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));

                rows.seekToRow(19998);
                // last-1 row
                row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.NotNull(row.getFieldValue(1));
                Assert.Equal(0, row.getFieldValue(0));
                Assert.Equal(true, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));

                // last row
                row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.Null(row.getFieldValue(0));
                Assert.Null(row.getFieldValue(1));
                Assert.Equal(true, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));
            }
        }
        public void testColumnsWithNullAndCompression()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyStruct));

            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                                                            .inspector(inspector)
                                                            .stripeSize(100000)
                                                            .bufferSize(10000)))
                {
                    writer.addRow(new MyStruct(3, "a", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(null, "b", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(3, null, false,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(3, "d", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(2, "e", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(2, "f", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(2, "g", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(2, "h", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                }

            Reader reader = OrcFile.createReader(TestFilePath,
                                                 OrcFile.readerOptions(conf));

            // check the stats
            ColumnStatistics[] stats = reader.getStatistics();
            Assert.Equal(8, reader.getNumberOfRows());
            Assert.Equal(8, stats[0].getNumberOfValues());

            Assert.Equal(3, ((IntegerColumnStatistics)stats[1]).getMaximum());
            Assert.Equal(2, ((IntegerColumnStatistics)stats[1]).getMinimum());
            Assert.Equal(true, ((IntegerColumnStatistics)stats[1]).isSumDefined());
            Assert.Equal(17, ((IntegerColumnStatistics)stats[1]).getSum());
            Assert.Equal("count: 7 hasNull: True min: 2 max: 3 sum: 17",
                         stats[1].ToString());

            Assert.Equal("h", ((StringColumnStatistics)stats[2]).getMaximum());
            Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMinimum());
            Assert.Equal(7, stats[2].getNumberOfValues());
            Assert.Equal("count: 7 hasNull: True min: a max: h sum: 7",
                         stats[2].ToString());

            // check the inspectors
            StructObjectInspector readerInspector = (StructObjectInspector)reader.getObjectInspector();

            Assert.Equal(ObjectInspectorCategory.STRUCT,
                         readerInspector.getCategory());
            Assert.Equal("struct<a:int,b:string,c:boolean,list:array<struct<z:int>>>",
                         readerInspector.getTypeName());

            using (RecordReader rows = reader.rows())
            {
                // only the last strip will have PRESENT stream
                List <bool> expected = new List <bool>();
                foreach (StripeInformation sinfo in reader.getStripes())
                {
                    expected.Add(false);
                }
                expected[expected.Count - 1] = true;

                List <bool> got = new List <bool>();
                // check if the strip footer contains PRESENT stream
                foreach (StripeInformation sinfo in reader.getStripes())
                {
                    OrcProto.StripeFooter sf = ((RecordReaderImpl)rows).readStripeFooter(sinfo);
                    got.Add(sf.ToString().IndexOf(OrcProto.Stream.Types.Kind.PRESENT.ToString()) != -1);
                }
                Assert.Equal(expected, got);

                // row 1
                OrcStruct row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.Equal(3, row.getFieldValue(0));
                Assert.Equal("a", row.getFieldValue(1).ToString());
                Assert.Equal(true, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));

                // row 2
                row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.Null(row.getFieldValue(0));
                Assert.Equal("b", row.getFieldValue(1).ToString());
                Assert.Equal(true, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));

                // row 3
                row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.Null(row.getFieldValue(1));
                Assert.Equal(3, row.getFieldValue(0));
                Assert.Equal(false, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));
            }
        }