// Find the record identifier column (if there) and return a possibly new ObjectInspector that
        // will strain out the record id for the underlying writer.
        private ObjectInspector findRecId(ObjectInspector inspector, int rowIdColNum)
        {
            if (!(inspector is StructObjectInspector))
            {
                throw new InvalidOperationException("Serious problem, expected a StructObjectInspector, but got a " +
                                                    inspector.GetType().FullName);
            }
            if (rowIdColNum < 0)
            {
                return(inspector);
            }
            else
            {
                RecIdStrippingObjectInspector newInspector =
                    new RecIdStrippingObjectInspector(inspector, rowIdColNum);
                recIdField = newInspector.getRecId();
                List <StructField> fields =
                    ((StructObjectInspector)recIdField.getFieldObjectInspector()).getAllStructFieldRefs();
                // Go by position, not field name, as field names aren't guaranteed.  The order of fields
                // in RecordIdentifier is transactionId, bucketId, rowId
                originalTxnField = fields[0];
                origTxnInspector = (LongObjectInspector)originalTxnField.getFieldObjectInspector();
                rowIdField       = fields[2];
                rowIdInspector   = (LongObjectInspector)rowIdField.getFieldObjectInspector();


                recIdInspector = (StructObjectInspector)recIdField.getFieldObjectInspector();
                return(newInspector);
            }
        }
        public void testReadTimestampFormat_0_11(string readerTimeZone)
        {
            string oldFilePath = Path.Combine(TestHelpers.ResourcesDirectory, "orc-file-11-format.orc");

            using (TestHelpers.SetTimeZoneInfo(readerTimeZone))
            {
                Reader reader = OrcFile.createReader(oldFilePath, OrcFile.readerOptions(conf));

                StructObjectInspector    readerInspector = (StructObjectInspector)reader.getObjectInspector();
                IList <StructField>      fields          = readerInspector.getAllStructFieldRefs();
                TimestampObjectInspector tso             = (TimestampObjectInspector)readerInspector
                                                           .getStructFieldRef("ts").getFieldObjectInspector();

                using (RecordReader rows = reader.rows())
                {
                    object row = rows.next();
                    Assert.NotNull(row);
                    Assert.Equal(Timestamp.Parse("2000-03-12 15:00:00"),
                                 tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
                                                                                               fields[12])));

                    // check the contents of second row
                    Assert.Equal(true, rows.hasNext());
                    rows.seekToRow(7499);
                    row = rows.next();
                    Assert.Equal(Timestamp.Parse("2000-03-12 15:00:01"),
                                 tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
                                                                                               fields[12])));

                    Assert.Equal(false, rows.hasNext());
                }
            }
        }
        public void testInspectorFromTypeInfo()
        {
            TypeInfo typeInfo =
                TypeInfoUtils.getTypeInfoFromTypeString("struct<c1:boolean,c2:tinyint" +
                                                        ",c3:smallint,c4:int,c5:bigint,c6:float,c7:double,c8:binary," +
                                                        "c9:string,c10:struct<c1:int>,c11:map<int,int>,c12:uniontype<int>" +
                                                        ",c13:array<timestamp>>");
            StructObjectInspector inspector = (StructObjectInspector)
                                              OrcStruct.createObjectInspector(typeInfo);

            Assert.Equal("struct<c1:boolean,c2:tinyint,c3:smallint,c4:int,c5:" +
                         "bigint,c6:float,c7:double,c8:binary,c9:string,c10:struct<" +
                         "c1:int>,c11:map<int,int>,c12:uniontype<int>,c13:array<timestamp>>",
                         inspector.getTypeName());
            Assert.Equal(null,
                         inspector.getAllStructFieldRefs()[0].getFieldComment());
            Assert.Equal(null, inspector.getStructFieldRef("UNKNOWN"));
            OrcStruct s1 = new OrcStruct(13);

            for (int i = 0; i < 13; ++i)
            {
                s1.setFieldValue(i, i);
            }

            List <object> list = new List <object> {
                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
            };

            Assert.Equal(list, inspector.getStructFieldsDataAsList(s1));
            ListObjectInspector listOI = (ListObjectInspector)
                                         inspector.getAllStructFieldRefs()[12].getFieldObjectInspector();

            Assert.Equal(ObjectInspectorCategory.LIST, listOI.getCategory());
            Assert.Equal(10, listOI.getListElement(list, 10));
            Assert.Equal(null, listOI.getListElement(list, -1));
            Assert.Equal(null, listOI.getListElement(list, 13));
            Assert.Equal(13, listOI.getListLength(list));

            Dictionary <object, object> map = new Dictionary <object, object>()
            {
                { 1, 2 },
                { 2, 4 },
                { 3, 6 },
            };
            MapObjectInspector mapOI = (MapObjectInspector)
                                       inspector.getAllStructFieldRefs()[10].getFieldObjectInspector();

            Assert.Equal(3, mapOI.getMapSize(map));
            Assert.Equal(4, mapOI.getMapValueElement(map, 2));
        }
            public RecIdStrippingObjectInspector(ObjectInspector oi, int rowIdColNum)
            {
                if (!(oi is StructObjectInspector))
                {
                    throw new InvalidOperationException("Serious problem, expected a StructObjectInspector, " +
                                                        "but got a " + oi.GetType().Name);
                }
                wrapped = (StructObjectInspector)oi;
                IList <StructField> wrappedFields = wrapped.getAllStructFieldRefs();

                fields = new List <StructField>(wrapped.getAllStructFieldRefs().Count);
                for (int i = 0; i < wrappedFields.Count; i++)
                {
                    if (i == rowIdColNum)
                    {
                        recId = wrappedFields[i];
                    }
                    else
                    {
                        fields.Add(wrappedFields[i]);
                    }
                }
            }
        OrcRecordUpdater(Path path,
                         AcidOutputFormat.Options options)
        {
            this.options = options;
            this.bucket.set(options.getBucket());
            this.path = AcidUtils.createFilename(path, options);
            FileSystem fs = options.getFilesystem();

            if (fs == null)
            {
                fs = path.getFileSystem(options.getConfiguration());
            }
            this.fs = fs;
            try
            {
                FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false);
                strm.writeInt(ORC_ACID_VERSION);
                strm.close();
            }
            catch (IOException ioe)
            {
                if (LOG.isDebugEnabled())
                {
                    LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " +
                              ioe);
                }
            }
            if (options.getMinimumTransactionId() != options.getMaximumTransactionId() &&
                !options.isWritingBase())
            {
                flushLengths = fs.create(getSideFile(this.path), true, 8,
                                         options.getReporter());
            }
            else
            {
                flushLengths = null;
            }
            OrcFile.WriterOptions writerOptions = null;
            if (options is OrcOptions)
            {
                writerOptions = ((OrcOptions)options).getOrcOptions();
            }
            if (writerOptions == null)
            {
                writerOptions = OrcFile.writerOptions( /* options.getTableProperties(), */
                    options.getConfiguration());
            }
            writerOptions.fileSystem(fs).callback(indexBuilder);
            if (!options.isWritingBase())
            {
                writerOptions.blockPadding(false);
                writerOptions.bufferSize(DELTA_BUFFER_SIZE);
                writerOptions.stripeSize(DELTA_STRIPE_SIZE);
            }
            rowInspector = (StructObjectInspector)options.getInspector();
            writerOptions.inspector(createEventSchema(findRecId(options.getInspector(),
                                                                options.getRecordIdColumn())));
            this.writer = OrcFile.createWriter(this.path, writerOptions);
            item        = new OrcStruct(FIELDS);
            item.setFieldValue(OPERATION, operation);
            item.setFieldValue(CURRENT_TRANSACTION, currentTransaction);
            item.setFieldValue(ORIGINAL_TRANSACTION, originalTransaction);
            item.setFieldValue(BUCKET, bucket);
            item.setFieldValue(ROW_ID, rowId);
        }
예제 #6
0
        public void testStringAndBinaryStatistics()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(SimpleStruct));

            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                                                            .inspector(inspector)
                                                            .stripeSize(100000)
                                                            .bufferSize(10000)))
                {
                    writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4), "foo"));
                    writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3), "bar"));
                    writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4, 5), null));
                    writer.addRow(new SimpleStruct(null, "hi"));
                    writer.close();

                    Assert.Equal(4, writer.getNumberOfRows());
                    Assert.Equal(273, writer.getRawDataSize());
                }

            Reader reader = OrcFile.createReader(TestFilePath,
                                                 OrcFile.readerOptions(conf));

            Assert.Equal(4, reader.getNumberOfRows());
            Assert.Equal(273, reader.getRawDataSize());
            Assert.Equal(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
            Assert.Equal(258, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1")));
            Assert.Equal(273, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1")));

            // check the stats
            ColumnStatistics[] stats = reader.getStatistics();
            Assert.Equal(4, stats[0].getNumberOfValues());
            Assert.Equal("count: 4 hasNull: False", stats[0].ToString());

            Assert.Equal(3, stats[1].getNumberOfValues());
            Assert.Equal(15, ((BinaryColumnStatistics)stats[1]).getSum());
            Assert.Equal("count: 3 hasNull: True sum: 15", stats[1].ToString());

            Assert.Equal(3, stats[2].getNumberOfValues());
            Assert.Equal("bar", ((StringColumnStatistics)stats[2]).getMinimum());
            Assert.Equal("hi", ((StringColumnStatistics)stats[2]).getMaximum());
            Assert.Equal(8, ((StringColumnStatistics)stats[2]).getSum());
            Assert.Equal("count: 3 hasNull: True min: bar max: hi sum: 8",
                         stats[2].ToString());

            // check the inspectors
            StructObjectInspector readerInspector =
                (StructObjectInspector)reader.getObjectInspector();

            Assert.Equal(ObjectInspectorCategory.STRUCT, readerInspector.getCategory());
            Assert.Equal("struct<bytes1:binary,string1:string>", readerInspector.getTypeName());
            IList <StructField>   fields = readerInspector.getAllStructFieldRefs();
            BinaryObjectInspector bi     = (BinaryObjectInspector)readerInspector.
                                           getStructFieldRef("bytes1").getFieldObjectInspector();
            StringObjectInspector st = (StringObjectInspector)readerInspector.
                                       getStructFieldRef("string1").getFieldObjectInspector();

            using (RecordReader rows = reader.rows())
            {
                object row = rows.next();
                Assert.NotNull(row);
                // check the contents of the first row
                Assert.Equal(bytes(0, 1, 2, 3, 4), bi.get(
                                 readerInspector.getStructFieldData(row, fields[0])));
                Assert.Equal("foo", st.getPrimitiveJavaObject(readerInspector.
                                                              getStructFieldData(row, fields[1])));

                // check the contents of second row
                Assert.Equal(true, rows.hasNext());
                row = rows.next();
                Assert.Equal(bytes(0, 1, 2, 3), bi.get(
                                 readerInspector.getStructFieldData(row, fields[0])));
                Assert.Equal("bar", st.getPrimitiveJavaObject(readerInspector.
                                                              getStructFieldData(row, fields[1])));

                // check the contents of second row
                Assert.Equal(true, rows.hasNext());
                row = rows.next();
                Assert.Equal(bytes(0, 1, 2, 3, 4, 5), bi.get(
                                 readerInspector.getStructFieldData(row, fields[0])));
                Assert.Null(st.getPrimitiveJavaObject(readerInspector.
                                                      getStructFieldData(row, fields[1])));

                // check the contents of second row
                Assert.Equal(true, rows.hasNext());
                row = rows.next();
                Assert.Null(bi.get(
                                readerInspector.getStructFieldData(row, fields[0])));
                Assert.Equal("hi", st.getPrimitiveJavaObject(readerInspector.
                                                             getStructFieldData(row, fields[1])));

                Assert.Equal(false, rows.hasNext());
            }
        }
예제 #7
0
 public RecIdStrippingObjectInspector(ObjectInspector oi, int rowIdColNum)
 {
     if (!(oi is StructObjectInspector))
     {
         throw new InvalidOperationException("Serious problem, expected a StructObjectInspector, " +
             "but got a " + oi.GetType().Name);
     }
     wrapped = (StructObjectInspector)oi;
     IList<StructField> wrappedFields = wrapped.getAllStructFieldRefs();
     fields = new List<StructField>(wrapped.getAllStructFieldRefs().Count);
     for (int i = 0; i < wrappedFields.Count; i++)
     {
         if (i == rowIdColNum)
         {
             recId = wrappedFields[i];
         }
         else
         {
             fields.Add(wrappedFields[i]);
         }
     }
 }
        public void testMultiStripeWithNull()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyStruct));

            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                                                            .inspector(inspector)
                                                            .stripeSize(100000)
                                                            .compress(CompressionKind.NONE)
                                                            .bufferSize(10000)))
                {
                    Random rand = new Random(100);
                    writer.addRow(new MyStruct(null, null, true, new List <InnerStruct> {
                        new InnerStruct(100)
                    }));
                    for (int i = 2; i < 20000; i++)
                    {
                        writer.addRow(new MyStruct(rand.Next(1), "a", true, new List <InnerStruct> {
                            new InnerStruct(100)
                        }));
                    }
                    writer.addRow(new MyStruct(null, null, true, new List <InnerStruct> {
                        new InnerStruct(100)
                    }));
                }

            Reader reader = OrcFile.createReader(TestFilePath,
                                                 OrcFile.readerOptions(conf));

            // check the stats
            ColumnStatistics[] stats = reader.getStatistics();
            Assert.Equal(20000, reader.getNumberOfRows());
            Assert.Equal(20000, stats[0].getNumberOfValues());

            Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getMaximum());
            Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getMinimum());
            Assert.Equal(true, ((IntegerColumnStatistics)stats[1]).isSumDefined());
            Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getSum());
            Assert.Equal("count: 19998 hasNull: True min: 0 max: 0 sum: 0",
                         stats[1].ToString());

            Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMaximum());
            Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMinimum());
            Assert.Equal(19998, stats[2].getNumberOfValues());
            Assert.Equal("count: 19998 hasNull: True min: a max: a sum: 19998",
                         stats[2].ToString());

            // check the inspectors
            StructObjectInspector readerInspector =
                (StructObjectInspector)reader.getObjectInspector();

            Assert.Equal(ObjectInspectorCategory.STRUCT,
                         readerInspector.getCategory());
            Assert.Equal("struct<a:int,b:string,c:boolean,list:array<struct<z:int>>>",
                         readerInspector.getTypeName());

            using (RecordReader rows = reader.rows())
            {
                List <bool> expected = new List <bool>();
                foreach (StripeInformation sinfo in reader.getStripes())
                {
                    expected.Add(false);
                }
                // only the first and last stripe will have PRESENT stream
                expected[0] = true;
                expected[expected.Count - 1] = true;

                List <bool> got = new List <bool>();
                // check if the strip footer contains PRESENT stream
                foreach (StripeInformation sinfo in reader.getStripes())
                {
                    OrcProto.StripeFooter sf =
                        ((RecordReaderImpl)rows).readStripeFooter(sinfo);
                    got.Add(sf.ToString().IndexOf(OrcProto.Stream.Types.Kind.PRESENT.ToString()) != -1);
                }
                Assert.Equal(expected, got);

                // row 1
                OrcStruct row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.Null(row.getFieldValue(0));
                Assert.Null(row.getFieldValue(1));
                Assert.Equal(true, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));

                rows.seekToRow(19998);
                // last-1 row
                row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.NotNull(row.getFieldValue(1));
                Assert.Equal(0, row.getFieldValue(0));
                Assert.Equal(true, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));

                // last row
                row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.Null(row.getFieldValue(0));
                Assert.Null(row.getFieldValue(1));
                Assert.Equal(true, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));
            }
        }
        public void testColumnsWithNullAndCompression()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyStruct));

            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                                                            .inspector(inspector)
                                                            .stripeSize(100000)
                                                            .bufferSize(10000)))
                {
                    writer.addRow(new MyStruct(3, "a", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(null, "b", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(3, null, false,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(3, "d", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(2, "e", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(2, "f", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(2, "g", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                    writer.addRow(new MyStruct(2, "h", true,
                                               Lists.newArrayList(new InnerStruct(100))));
                }

            Reader reader = OrcFile.createReader(TestFilePath,
                                                 OrcFile.readerOptions(conf));

            // check the stats
            ColumnStatistics[] stats = reader.getStatistics();
            Assert.Equal(8, reader.getNumberOfRows());
            Assert.Equal(8, stats[0].getNumberOfValues());

            Assert.Equal(3, ((IntegerColumnStatistics)stats[1]).getMaximum());
            Assert.Equal(2, ((IntegerColumnStatistics)stats[1]).getMinimum());
            Assert.Equal(true, ((IntegerColumnStatistics)stats[1]).isSumDefined());
            Assert.Equal(17, ((IntegerColumnStatistics)stats[1]).getSum());
            Assert.Equal("count: 7 hasNull: True min: 2 max: 3 sum: 17",
                         stats[1].ToString());

            Assert.Equal("h", ((StringColumnStatistics)stats[2]).getMaximum());
            Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMinimum());
            Assert.Equal(7, stats[2].getNumberOfValues());
            Assert.Equal("count: 7 hasNull: True min: a max: h sum: 7",
                         stats[2].ToString());

            // check the inspectors
            StructObjectInspector readerInspector = (StructObjectInspector)reader.getObjectInspector();

            Assert.Equal(ObjectInspectorCategory.STRUCT,
                         readerInspector.getCategory());
            Assert.Equal("struct<a:int,b:string,c:boolean,list:array<struct<z:int>>>",
                         readerInspector.getTypeName());

            using (RecordReader rows = reader.rows())
            {
                // only the last strip will have PRESENT stream
                List <bool> expected = new List <bool>();
                foreach (StripeInformation sinfo in reader.getStripes())
                {
                    expected.Add(false);
                }
                expected[expected.Count - 1] = true;

                List <bool> got = new List <bool>();
                // check if the strip footer contains PRESENT stream
                foreach (StripeInformation sinfo in reader.getStripes())
                {
                    OrcProto.StripeFooter sf = ((RecordReaderImpl)rows).readStripeFooter(sinfo);
                    got.Add(sf.ToString().IndexOf(OrcProto.Stream.Types.Kind.PRESENT.ToString()) != -1);
                }
                Assert.Equal(expected, got);

                // row 1
                OrcStruct row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.Equal(3, row.getFieldValue(0));
                Assert.Equal("a", row.getFieldValue(1).ToString());
                Assert.Equal(true, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));

                // row 2
                row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.Null(row.getFieldValue(0));
                Assert.Equal("b", row.getFieldValue(1).ToString());
                Assert.Equal(true, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));

                // row 3
                row = (OrcStruct)rows.next();
                Assert.NotNull(row);
                Assert.Null(row.getFieldValue(1));
                Assert.Equal(3, row.getFieldValue(0));
                Assert.Equal(false, row.getFieldValue(2));
                Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]).
                             getFieldValue(0));
            }
        }