// Find the record identifier column (if there) and return a possibly new ObjectInspector that // will strain out the record id for the underlying writer. private ObjectInspector findRecId(ObjectInspector inspector, int rowIdColNum) { if (!(inspector is StructObjectInspector)) { throw new InvalidOperationException("Serious problem, expected a StructObjectInspector, but got a " + inspector.GetType().FullName); } if (rowIdColNum < 0) { return(inspector); } else { RecIdStrippingObjectInspector newInspector = new RecIdStrippingObjectInspector(inspector, rowIdColNum); recIdField = newInspector.getRecId(); List <StructField> fields = ((StructObjectInspector)recIdField.getFieldObjectInspector()).getAllStructFieldRefs(); // Go by position, not field name, as field names aren't guaranteed. The order of fields // in RecordIdentifier is transactionId, bucketId, rowId originalTxnField = fields[0]; origTxnInspector = (LongObjectInspector)originalTxnField.getFieldObjectInspector(); rowIdField = fields[2]; rowIdInspector = (LongObjectInspector)rowIdField.getFieldObjectInspector(); recIdInspector = (StructObjectInspector)recIdField.getFieldObjectInspector(); return(newInspector); } }
public void testReadTimestampFormat_0_11(string readerTimeZone) { string oldFilePath = Path.Combine(TestHelpers.ResourcesDirectory, "orc-file-11-format.orc"); using (TestHelpers.SetTimeZoneInfo(readerTimeZone)) { Reader reader = OrcFile.createReader(oldFilePath, OrcFile.readerOptions(conf)); StructObjectInspector readerInspector = (StructObjectInspector)reader.getObjectInspector(); IList <StructField> fields = readerInspector.getAllStructFieldRefs(); TimestampObjectInspector tso = (TimestampObjectInspector)readerInspector .getStructFieldRef("ts").getFieldObjectInspector(); using (RecordReader rows = reader.rows()) { object row = rows.next(); Assert.NotNull(row); Assert.Equal(Timestamp.Parse("2000-03-12 15:00:00"), tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields[12]))); // check the contents of second row Assert.Equal(true, rows.hasNext()); rows.seekToRow(7499); row = rows.next(); Assert.Equal(Timestamp.Parse("2000-03-12 15:00:01"), tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields[12]))); Assert.Equal(false, rows.hasNext()); } } }
public void testInspectorFromTypeInfo() { TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString("struct<c1:boolean,c2:tinyint" + ",c3:smallint,c4:int,c5:bigint,c6:float,c7:double,c8:binary," + "c9:string,c10:struct<c1:int>,c11:map<int,int>,c12:uniontype<int>" + ",c13:array<timestamp>>"); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(typeInfo); Assert.Equal("struct<c1:boolean,c2:tinyint,c3:smallint,c4:int,c5:" + "bigint,c6:float,c7:double,c8:binary,c9:string,c10:struct<" + "c1:int>,c11:map<int,int>,c12:uniontype<int>,c13:array<timestamp>>", inspector.getTypeName()); Assert.Equal(null, inspector.getAllStructFieldRefs()[0].getFieldComment()); Assert.Equal(null, inspector.getStructFieldRef("UNKNOWN")); OrcStruct s1 = new OrcStruct(13); for (int i = 0; i < 13; ++i) { s1.setFieldValue(i, i); } List <object> list = new List <object> { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }; Assert.Equal(list, inspector.getStructFieldsDataAsList(s1)); ListObjectInspector listOI = (ListObjectInspector) inspector.getAllStructFieldRefs()[12].getFieldObjectInspector(); Assert.Equal(ObjectInspectorCategory.LIST, listOI.getCategory()); Assert.Equal(10, listOI.getListElement(list, 10)); Assert.Equal(null, listOI.getListElement(list, -1)); Assert.Equal(null, listOI.getListElement(list, 13)); Assert.Equal(13, listOI.getListLength(list)); Dictionary <object, object> map = new Dictionary <object, object>() { { 1, 2 }, { 2, 4 }, { 3, 6 }, }; MapObjectInspector mapOI = (MapObjectInspector) inspector.getAllStructFieldRefs()[10].getFieldObjectInspector(); Assert.Equal(3, mapOI.getMapSize(map)); Assert.Equal(4, mapOI.getMapValueElement(map, 2)); }
public RecIdStrippingObjectInspector(ObjectInspector oi, int rowIdColNum) { if (!(oi is StructObjectInspector)) { throw new InvalidOperationException("Serious problem, expected a StructObjectInspector, " + "but got a " + oi.GetType().Name); } wrapped = (StructObjectInspector)oi; IList <StructField> wrappedFields = wrapped.getAllStructFieldRefs(); fields = new List <StructField>(wrapped.getAllStructFieldRefs().Count); for (int i = 0; i < wrappedFields.Count; i++) { if (i == rowIdColNum) { recId = wrappedFields[i]; } else { fields.Add(wrappedFields[i]); } } }
OrcRecordUpdater(Path path, AcidOutputFormat.Options options) { this.options = options; this.bucket.set(options.getBucket()); this.path = AcidUtils.createFilename(path, options); FileSystem fs = options.getFilesystem(); if (fs == null) { fs = path.getFileSystem(options.getConfiguration()); } this.fs = fs; try { FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false); strm.writeInt(ORC_ACID_VERSION); strm.close(); } catch (IOException ioe) { if (LOG.isDebugEnabled()) { LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " + ioe); } } if (options.getMinimumTransactionId() != options.getMaximumTransactionId() && !options.isWritingBase()) { flushLengths = fs.create(getSideFile(this.path), true, 8, options.getReporter()); } else { flushLengths = null; } OrcFile.WriterOptions writerOptions = null; if (options is OrcOptions) { writerOptions = ((OrcOptions)options).getOrcOptions(); } if (writerOptions == null) { writerOptions = OrcFile.writerOptions( /* options.getTableProperties(), */ options.getConfiguration()); } writerOptions.fileSystem(fs).callback(indexBuilder); if (!options.isWritingBase()) { writerOptions.blockPadding(false); writerOptions.bufferSize(DELTA_BUFFER_SIZE); writerOptions.stripeSize(DELTA_STRIPE_SIZE); } rowInspector = (StructObjectInspector)options.getInspector(); writerOptions.inspector(createEventSchema(findRecId(options.getInspector(), options.getRecordIdColumn()))); this.writer = OrcFile.createWriter(this.path, writerOptions); item = new OrcStruct(FIELDS); item.setFieldValue(OPERATION, operation); item.setFieldValue(CURRENT_TRANSACTION, currentTransaction); item.setFieldValue(ORIGINAL_TRANSACTION, originalTransaction); item.setFieldValue(BUCKET, bucket); item.setFieldValue(ROW_ID, rowId); }
public void testStringAndBinaryStatistics() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(SimpleStruct)); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .bufferSize(10000))) { writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4), "foo")); writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3), "bar")); writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4, 5), null)); writer.addRow(new SimpleStruct(null, "hi")); writer.close(); Assert.Equal(4, writer.getNumberOfRows()); Assert.Equal(273, writer.getRawDataSize()); } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); Assert.Equal(4, reader.getNumberOfRows()); Assert.Equal(273, reader.getRawDataSize()); Assert.Equal(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1"))); Assert.Equal(258, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1"))); Assert.Equal(273, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1"))); // check the stats ColumnStatistics[] stats = reader.getStatistics(); Assert.Equal(4, stats[0].getNumberOfValues()); Assert.Equal("count: 4 hasNull: False", stats[0].ToString()); Assert.Equal(3, stats[1].getNumberOfValues()); Assert.Equal(15, ((BinaryColumnStatistics)stats[1]).getSum()); Assert.Equal("count: 3 hasNull: True sum: 15", stats[1].ToString()); Assert.Equal(3, stats[2].getNumberOfValues()); Assert.Equal("bar", ((StringColumnStatistics)stats[2]).getMinimum()); Assert.Equal("hi", ((StringColumnStatistics)stats[2]).getMaximum()); Assert.Equal(8, ((StringColumnStatistics)stats[2]).getSum()); Assert.Equal("count: 3 hasNull: True min: bar max: hi sum: 8", stats[2].ToString()); // check the inspectors StructObjectInspector readerInspector = (StructObjectInspector)reader.getObjectInspector(); Assert.Equal(ObjectInspectorCategory.STRUCT, readerInspector.getCategory()); Assert.Equal("struct<bytes1:binary,string1:string>", readerInspector.getTypeName()); IList <StructField> fields = readerInspector.getAllStructFieldRefs(); BinaryObjectInspector bi = (BinaryObjectInspector)readerInspector. getStructFieldRef("bytes1").getFieldObjectInspector(); StringObjectInspector st = (StringObjectInspector)readerInspector. getStructFieldRef("string1").getFieldObjectInspector(); using (RecordReader rows = reader.rows()) { object row = rows.next(); Assert.NotNull(row); // check the contents of the first row Assert.Equal(bytes(0, 1, 2, 3, 4), bi.get( readerInspector.getStructFieldData(row, fields[0]))); Assert.Equal("foo", st.getPrimitiveJavaObject(readerInspector. getStructFieldData(row, fields[1]))); // check the contents of second row Assert.Equal(true, rows.hasNext()); row = rows.next(); Assert.Equal(bytes(0, 1, 2, 3), bi.get( readerInspector.getStructFieldData(row, fields[0]))); Assert.Equal("bar", st.getPrimitiveJavaObject(readerInspector. getStructFieldData(row, fields[1]))); // check the contents of second row Assert.Equal(true, rows.hasNext()); row = rows.next(); Assert.Equal(bytes(0, 1, 2, 3, 4, 5), bi.get( readerInspector.getStructFieldData(row, fields[0]))); Assert.Null(st.getPrimitiveJavaObject(readerInspector. getStructFieldData(row, fields[1]))); // check the contents of second row Assert.Equal(true, rows.hasNext()); row = rows.next(); Assert.Null(bi.get( readerInspector.getStructFieldData(row, fields[0]))); Assert.Equal("hi", st.getPrimitiveJavaObject(readerInspector. getStructFieldData(row, fields[1]))); Assert.Equal(false, rows.hasNext()); } }
public RecIdStrippingObjectInspector(ObjectInspector oi, int rowIdColNum) { if (!(oi is StructObjectInspector)) { throw new InvalidOperationException("Serious problem, expected a StructObjectInspector, " + "but got a " + oi.GetType().Name); } wrapped = (StructObjectInspector)oi; IList<StructField> wrappedFields = wrapped.getAllStructFieldRefs(); fields = new List<StructField>(wrapped.getAllStructFieldRefs().Count); for (int i = 0; i < wrappedFields.Count; i++) { if (i == rowIdColNum) { recId = wrappedFields[i]; } else { fields.Add(wrappedFields[i]); } } }
public void testMultiStripeWithNull() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyStruct)); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .compress(CompressionKind.NONE) .bufferSize(10000))) { Random rand = new Random(100); writer.addRow(new MyStruct(null, null, true, new List <InnerStruct> { new InnerStruct(100) })); for (int i = 2; i < 20000; i++) { writer.addRow(new MyStruct(rand.Next(1), "a", true, new List <InnerStruct> { new InnerStruct(100) })); } writer.addRow(new MyStruct(null, null, true, new List <InnerStruct> { new InnerStruct(100) })); } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); // check the stats ColumnStatistics[] stats = reader.getStatistics(); Assert.Equal(20000, reader.getNumberOfRows()); Assert.Equal(20000, stats[0].getNumberOfValues()); Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getMaximum()); Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getMinimum()); Assert.Equal(true, ((IntegerColumnStatistics)stats[1]).isSumDefined()); Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getSum()); Assert.Equal("count: 19998 hasNull: True min: 0 max: 0 sum: 0", stats[1].ToString()); Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMaximum()); Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMinimum()); Assert.Equal(19998, stats[2].getNumberOfValues()); Assert.Equal("count: 19998 hasNull: True min: a max: a sum: 19998", stats[2].ToString()); // check the inspectors StructObjectInspector readerInspector = (StructObjectInspector)reader.getObjectInspector(); Assert.Equal(ObjectInspectorCategory.STRUCT, readerInspector.getCategory()); Assert.Equal("struct<a:int,b:string,c:boolean,list:array<struct<z:int>>>", readerInspector.getTypeName()); using (RecordReader rows = reader.rows()) { List <bool> expected = new List <bool>(); foreach (StripeInformation sinfo in reader.getStripes()) { expected.Add(false); } // only the first and last stripe will have PRESENT stream expected[0] = true; expected[expected.Count - 1] = true; List <bool> got = new List <bool>(); // check if the strip footer contains PRESENT stream foreach (StripeInformation sinfo in reader.getStripes()) { OrcProto.StripeFooter sf = ((RecordReaderImpl)rows).readStripeFooter(sinfo); got.Add(sf.ToString().IndexOf(OrcProto.Stream.Types.Kind.PRESENT.ToString()) != -1); } Assert.Equal(expected, got); // row 1 OrcStruct row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.Null(row.getFieldValue(0)); Assert.Null(row.getFieldValue(1)); Assert.Equal(true, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); rows.seekToRow(19998); // last-1 row row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.NotNull(row.getFieldValue(1)); Assert.Equal(0, row.getFieldValue(0)); Assert.Equal(true, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); // last row row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.Null(row.getFieldValue(0)); Assert.Null(row.getFieldValue(1)); Assert.Equal(true, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); } }
public void testColumnsWithNullAndCompression() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyStruct)); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .bufferSize(10000))) { writer.addRow(new MyStruct(3, "a", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(null, "b", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(3, null, false, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(3, "d", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(2, "e", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(2, "f", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(2, "g", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(2, "h", true, Lists.newArrayList(new InnerStruct(100)))); } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); // check the stats ColumnStatistics[] stats = reader.getStatistics(); Assert.Equal(8, reader.getNumberOfRows()); Assert.Equal(8, stats[0].getNumberOfValues()); Assert.Equal(3, ((IntegerColumnStatistics)stats[1]).getMaximum()); Assert.Equal(2, ((IntegerColumnStatistics)stats[1]).getMinimum()); Assert.Equal(true, ((IntegerColumnStatistics)stats[1]).isSumDefined()); Assert.Equal(17, ((IntegerColumnStatistics)stats[1]).getSum()); Assert.Equal("count: 7 hasNull: True min: 2 max: 3 sum: 17", stats[1].ToString()); Assert.Equal("h", ((StringColumnStatistics)stats[2]).getMaximum()); Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMinimum()); Assert.Equal(7, stats[2].getNumberOfValues()); Assert.Equal("count: 7 hasNull: True min: a max: h sum: 7", stats[2].ToString()); // check the inspectors StructObjectInspector readerInspector = (StructObjectInspector)reader.getObjectInspector(); Assert.Equal(ObjectInspectorCategory.STRUCT, readerInspector.getCategory()); Assert.Equal("struct<a:int,b:string,c:boolean,list:array<struct<z:int>>>", readerInspector.getTypeName()); using (RecordReader rows = reader.rows()) { // only the last strip will have PRESENT stream List <bool> expected = new List <bool>(); foreach (StripeInformation sinfo in reader.getStripes()) { expected.Add(false); } expected[expected.Count - 1] = true; List <bool> got = new List <bool>(); // check if the strip footer contains PRESENT stream foreach (StripeInformation sinfo in reader.getStripes()) { OrcProto.StripeFooter sf = ((RecordReaderImpl)rows).readStripeFooter(sinfo); got.Add(sf.ToString().IndexOf(OrcProto.Stream.Types.Kind.PRESENT.ToString()) != -1); } Assert.Equal(expected, got); // row 1 OrcStruct row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.Equal(3, row.getFieldValue(0)); Assert.Equal("a", row.getFieldValue(1).ToString()); Assert.Equal(true, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); // row 2 row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.Null(row.getFieldValue(0)); Assert.Equal("b", row.getFieldValue(1).ToString()); Assert.Equal(true, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); // row 3 row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.Null(row.getFieldValue(1)); Assert.Equal(3, row.getFieldValue(0)); Assert.Equal(false, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); } }