void next(OrcStruct next) { if (recordReader.hasNext()) { long nextRowId = recordReader.getRowNumber(); // have to do initialization here, because the super's constructor // calls next and thus we need to initialize before our constructor // runs if (next == null) { nextRecord = new OrcStruct(OrcRecordUpdater.FIELDS); IntWritable operation = new IntWritable(OrcRecordUpdater.INSERT_OPERATION); nextRecord.setFieldValue(OrcRecordUpdater.OPERATION, operation); nextRecord.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION, new LongWritable(0)); nextRecord.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION, new LongWritable(0)); nextRecord.setFieldValue(OrcRecordUpdater.BUCKET, new IntWritable(bucket)); nextRecord.setFieldValue(OrcRecordUpdater.ROW_ID, new LongWritable(nextRowId)); nextRecord.setFieldValue(OrcRecordUpdater.ROW, recordReader.next(null)); } else { nextRecord = next; ((IntWritable)next.getFieldValue(OrcRecordUpdater.OPERATION)) .set(OrcRecordUpdater.INSERT_OPERATION); ((LongWritable)next.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION)) .set(0); ((IntWritable)next.getFieldValue(OrcRecordUpdater.BUCKET)) .set(bucket); ((LongWritable)next.getFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION)) .set(0); ((LongWritable)next.getFieldValue(OrcRecordUpdater.ROW_ID)) .set(0); nextRecord.setFieldValue(OrcRecordUpdater.ROW, recordReader.next(OrcRecordUpdater.getRow(next))); } key.setValues(0L, bucket, nextRowId, 0L, 0); if (maxKey != null && key.compareRow(maxKey) > 0) { if (LOG.isDebugEnabled()) { LOG.debug("key " + key + " > maxkey " + maxKey); } nextRecord = null; recordReader.close(); } } else { nextRecord = null; recordReader.close(); } }
static OrcStruct getRow(OrcStruct @struct) { if (@struct == null) { return(null); } else { return((OrcStruct)@struct.getFieldValue(ROW)); } }
internal static int getBucket(OrcStruct @struct) { return(((IntWritable)@struct.getFieldValue(BUCKET)).get()); }
internal static long getOriginalTransaction(OrcStruct @struct) { return(((LongWritable)@struct.getFieldValue(ORIGINAL_TRANSACTION)).get()); }
internal static long getCurrentTransaction(OrcStruct @struct) { return(((LongWritable)@struct.getFieldValue(CURRENT_TRANSACTION)).get()); }
internal static int getOperation(OrcStruct @struct) { return(((IntWritable)@struct.getFieldValue(OPERATION)).get()); }
internal static long getCurrentTransaction(OrcStruct @struct) { return ((LongWritable)@struct.getFieldValue(CURRENT_TRANSACTION)).get(); }
/** * Generate an ORC file with a range of dates and times. */ public void createOrcDateFile(string path, int minYear, int maxYear) { List<OrcProto.Type> types = new List<OrcProto.Type>(); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRUCT). AddFieldNames("time").AddFieldNames("date"). AddSubtypes(1).AddSubtypes(2).Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.TIMESTAMP). Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.DATE). Build()); ObjectInspector inspector = OrcStruct.createObjectInspector(0, types); using (Stream file = FileOpenWrite(path)) using (Writer writer = OrcFile.createWriter(path, file, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .bufferSize(10000) .blockPadding(false))) { OrcStruct row = new OrcStruct(2); for (int year = minYear; year < maxYear; ++year) { for (int ms = 1000; ms < 2000; ++ms) { row.setFieldValue(0, Timestamp.Parse(year + "-05-05 12:34:56." + ms)); row.setFieldValue(1, new Date(year - 1900, 11, 25)); writer.addRow(row); } } } Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf)); using (RecordReader rows = reader.rows()) { for (int year = minYear; year < maxYear; ++year) { for (int ms = 1000; ms < 2000; ++ms) { OrcStruct row = (OrcStruct)rows.next(); Assert.Equal( Timestamp.Parse(year + "-05-05 12:34:56." + ms), row.getFieldValue(0)); Assert.Equal(new Date(year - 1900, 11, 25), row.getFieldValue(1)); } } } }
public void testUnionAndTimestamp() { List<OrcProto.Type> types = new List<OrcProto.Type>(); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRUCT). AddFieldNames("time").AddFieldNames("union").AddFieldNames("decimal"). AddSubtypes(1).AddSubtypes(2).AddSubtypes(5).Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.TIMESTAMP). Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.UNION). AddSubtypes(3).AddSubtypes(4).Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.INT). Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRING). Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.DECIMAL). Build()); ObjectInspector inspector = OrcStruct.createObjectInspector(0, types); HiveDecimal maxValue = HiveDecimal.Parse("10000000000000000000"); OrcStruct row = new OrcStruct(3); OrcUnion union = new OrcUnion(); Random rand; using (Stream file = FileOpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(1000) .compress(CompressionKind.NONE) .bufferSize(100) .blockPadding(false))) { row.setFieldValue(1, union); row.setFieldValue(0, Timestamp.Parse("2000-03-12 15:00:00")); HiveDecimal value = HiveDecimal.Parse("12345678.6547456"); row.setFieldValue(2, value); union.set((byte)0, 42); writer.addRow(row); row.setFieldValue(0, Timestamp.Parse("2000-03-20 12:00:00.123456789")); union.set((byte)1, "hello"); value = HiveDecimal.Parse("-5643.234"); row.setFieldValue(2, value); writer.addRow(row); row.setFieldValue(0, null); row.setFieldValue(1, null); row.setFieldValue(2, null); writer.addRow(row); row.setFieldValue(1, union); union.set((byte)0, null); writer.addRow(row); union.set((byte)1, null); writer.addRow(row); union.set((byte)0, 200000); row.setFieldValue(0, Timestamp.Parse("1970-01-01 00:00:00")); value = HiveDecimal.Parse("10000000000000000000"); row.setFieldValue(2, value); writer.addRow(row); rand = new Random(42); for (int i = 1970; i < 2038; ++i) { row.setFieldValue(0, Timestamp.Parse(i + "-05-05 12:34:56." + i)); if ((i & 1) == 0) { union.set((byte)0, (i * i)); } else { union.set((byte)1, (i * i).ToString()); } value = HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18)); row.setFieldValue(2, value); if (maxValue.CompareTo(value) < 0) { maxValue = value; } writer.addRow(row); } // let's add a lot of constant rows to test the rle row.setFieldValue(0, null); union.set((byte)0, 1732050807); row.setFieldValue(2, null); for (int i = 0; i < 5000; ++i) { writer.addRow(row); } union.set((byte)0, 0); writer.addRow(row); union.set((byte)0, 10); writer.addRow(row); union.set((byte)0, 138); writer.addRow(row); writer.close(); TypeDescription schema = writer.getSchema(); Assert.Equal(5, schema.getMaximumId()); bool[] expected = new bool[] { false, false, false, false, false, false }; bool[] included = OrcUtils.includeColumns("", schema); Assert.Equal(expected, included); expected = new bool[] { false, true, false, false, false, true }; included = OrcUtils.includeColumns("time,decimal", schema); Assert.Equal(expected, included); expected = new bool[] { false, false, true, true, true, false }; included = OrcUtils.includeColumns("union", schema); Assert.Equal(expected, included); } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); Assert.Equal(0, reader.getMetadataKeys().Count); Assert.Equal(5077, reader.getNumberOfRows()); DecimalColumnStatistics stats = (DecimalColumnStatistics)reader.getStatistics()[5]; Assert.Equal(71, stats.getNumberOfValues()); Assert.Equal(HiveDecimal.Parse("-5643.234"), stats.getMinimum()); Assert.Equal(maxValue, stats.getMaximum()); // TODO: fix this // Assert.Equal(null,stats.getSum()); int stripeCount = 0; int rowCount = 0; long currentOffset = -1; foreach (StripeInformation stripe in reader.getStripes()) { stripeCount += 1; rowCount += (int)stripe.getNumberOfRows(); if (currentOffset < 0) { currentOffset = stripe.getOffset() + stripe.getLength(); } else { Assert.Equal(currentOffset, stripe.getOffset()); currentOffset += stripe.getLength(); } } Assert.Equal(reader.getNumberOfRows(), rowCount); Assert.Equal(2, stripeCount); Assert.Equal(reader.getContentLength(), currentOffset); using (RecordReader rows = reader.rows()) { Assert.Equal(0, rows.getRowNumber()); Assert.Equal(0.0, rows.getProgress(), 6); Assert.Equal(true, rows.hasNext()); row = (OrcStruct)rows.next(); Assert.Equal(1, rows.getRowNumber()); inspector = reader.getObjectInspector(); Assert.Equal("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>", inspector.getTypeName()); Assert.Equal(Timestamp.Parse("2000-03-12 15:00:00"), row.getFieldValue(0)); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(0, union.getTag()); Assert.Equal(42, union.getObject()); Assert.Equal(HiveDecimal.Parse("12345678.6547456"), row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(2, rows.getRowNumber()); Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0)); Assert.Equal(1, union.getTag()); Assert.Equal("hello", union.getObject()); Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(null, row.getFieldValue(0)); Assert.Equal(null, row.getFieldValue(1)); Assert.Equal(null, row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(null, row.getFieldValue(0)); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(0, union.getTag()); Assert.Equal(null, union.getObject()); Assert.Equal(null, row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(null, row.getFieldValue(0)); Assert.Equal(1, union.getTag()); Assert.Equal(null, union.getObject()); Assert.Equal(null, row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(Timestamp.Parse("1970-01-01 00:00:00"), row.getFieldValue(0)); Assert.Equal(200000, union.getObject()); Assert.Equal(HiveDecimal.Parse("10000000000000000000"), row.getFieldValue(2)); rand = new Random(42); for (int i = 1970; i < 2038; ++i) { row = (OrcStruct)rows.next(); Assert.Equal(Timestamp.Parse(i + "-05-05 12:34:56." + i), row.getFieldValue(0)); if ((i & 1) == 0) { Assert.Equal(0, union.getTag()); Assert.Equal(i * i, union.getObject()); } else { Assert.Equal(1, union.getTag()); Assert.Equal((i * i).ToString(), union.getObject()); } Assert.Equal(HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18)), row.getFieldValue(2)); } for (int i = 0; i < 5000; ++i) { row = (OrcStruct)rows.next(); Assert.Equal(1732050807, union.getObject()); } row = (OrcStruct)rows.next(); Assert.Equal(0, union.getObject()); row = (OrcStruct)rows.next(); Assert.Equal(10, union.getObject()); row = (OrcStruct)rows.next(); Assert.Equal(138, union.getObject()); Assert.Equal(false, rows.hasNext()); Assert.Equal(1.0, rows.getProgress(), 5); Assert.Equal(reader.getNumberOfRows(), rows.getRowNumber()); rows.seekToRow(1); row = (OrcStruct)rows.next(); Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0)); Assert.Equal(1, union.getTag()); Assert.Equal("hello", union.getObject()); Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2)); } }
internal static long getRowId(OrcStruct @struct) { return ((LongWritable)@struct.getFieldValue(ROW_ID)).get(); }
static OrcStruct getRow(OrcStruct @struct) { if (@struct == null) { return null; } else { return (OrcStruct)@struct.getFieldValue(ROW); } }
internal static long getOriginalTransaction(OrcStruct @struct) { return ((LongWritable)@struct.getFieldValue(ORIGINAL_TRANSACTION)).get(); }
internal static int getOperation(OrcStruct @struct) { return ((IntWritable)@struct.getFieldValue(OPERATION)).get(); }
internal static long getRowId(OrcStruct @struct) { return(((LongWritable)@struct.getFieldValue(ROW_ID)).get()); }
private void compareInner(InnerStruct expect, OrcStruct actual) { if (expect == null || actual == null) { Assert.Equal(null, expect); Assert.Equal(null, actual); } else { Assert.Equal(expect.int1, actual.getFieldValue(0)); Assert.Equal(expect.string1, actual.getFieldValue(1)); } }
internal static int getBucket(OrcStruct @struct) { return ((IntWritable)@struct.getFieldValue(BUCKET)).get(); }