public void testStruct() { OrcStruct st1 = new OrcStruct(4); OrcStruct st2 = new OrcStruct(4); OrcStruct st3 = new OrcStruct(3); st1.setFieldValue(0, "hop"); st1.setFieldValue(1, "on"); st1.setFieldValue(2, "pop"); st1.setFieldValue(3, 42); Assert.Equal(false, st1.Equals(null)); st2.setFieldValue(0, "hop"); st2.setFieldValue(1, "on"); st2.setFieldValue(2, "pop"); st2.setFieldValue(3, 42); Assert.Equal(st1, st2); st3.setFieldValue(0, "hop"); st3.setFieldValue(1, "on"); st3.setFieldValue(2, "pop"); Assert.Equal(false, st1.Equals(st3)); #if PREDICTABLE_STRING_HASH Assert.Equal(11241, st1.GetHashCode()); #endif Assert.Equal(st1.GetHashCode(), st2.GetHashCode()); #if PREDICTABLE_STRING_HASH Assert.Equal(11204, st3.GetHashCode()); #endif Assert.Equal("{hop, on, pop, 42}", st1.ToString()); st1.setFieldValue(3, null); Assert.Equal(false, st1.Equals(st2)); Assert.Equal(false, st2.Equals(st1)); st2.setFieldValue(3, null); Assert.Equal(st1, st2); }
VectorizedOrcAcidRowReader(AcidInputFormat.RowReader <OrcStruct> inner, Configuration conf, FileSplit split) { this.innerReader = inner; this.key = inner.createKey(); this.rowBatchCtx = new VectorizedRowBatchCtx(); this.value = inner.createValue(); this.objectInspector = inner.getObjectInspector(); try { rowBatchCtx.init(conf, split); } catch (ClassNotFoundException e) { throw new IOException("Failed to initialize context", e); } catch (SerDeException e) { throw new IOException("Failed to initialize context", e); } catch (InstantiationException e) { throw new IOException("Failed to initialize context", e); } catch (IllegalAccessException e) { throw new IOException("Failed to initialize context", e); } catch (HiveException e) { throw new IOException("Failed to initialize context", e); } }
public void testInspectorFromTypeInfo() { TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString("struct<c1:boolean,c2:tinyint" + ",c3:smallint,c4:int,c5:bigint,c6:float,c7:double,c8:binary," + "c9:string,c10:struct<c1:int>,c11:map<int,int>,c12:uniontype<int>" + ",c13:array<timestamp>>"); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(typeInfo); Assert.Equal("struct<c1:boolean,c2:tinyint,c3:smallint,c4:int,c5:" + "bigint,c6:float,c7:double,c8:binary,c9:string,c10:struct<" + "c1:int>,c11:map<int,int>,c12:uniontype<int>,c13:array<timestamp>>", inspector.getTypeName()); Assert.Equal(null, inspector.getAllStructFieldRefs()[0].getFieldComment()); Assert.Equal(null, inspector.getStructFieldRef("UNKNOWN")); OrcStruct s1 = new OrcStruct(13); for (int i = 0; i < 13; ++i) { s1.setFieldValue(i, i); } List <object> list = new List <object> { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }; Assert.Equal(list, inspector.getStructFieldsDataAsList(s1)); ListObjectInspector listOI = (ListObjectInspector) inspector.getAllStructFieldRefs()[12].getFieldObjectInspector(); Assert.Equal(ObjectInspectorCategory.LIST, listOI.getCategory()); Assert.Equal(10, listOI.getListElement(list, 10)); Assert.Equal(null, listOI.getListElement(list, -1)); Assert.Equal(null, listOI.getListElement(list, 13)); Assert.Equal(13, listOI.getListLength(list)); Dictionary <object, object> map = new Dictionary <object, object>() { { 1, 2 }, { 2, 4 }, { 3, 6 }, }; MapObjectInspector mapOI = (MapObjectInspector) inspector.getAllStructFieldRefs()[10].getFieldObjectInspector(); Assert.Equal(3, mapOI.getMapSize(map)); Assert.Equal(4, mapOI.getMapValueElement(map, 2)); }
private static void setRow(OrcStruct event,
private void checkVectorizedReader() { Reader vreader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); using (RecordReaderImpl vrr = (RecordReaderImpl)vreader.rows()) using (RecordReaderImpl rr = (RecordReaderImpl)reader.rows()) { VectorizedRowBatch batch = null; // Check Vectorized ORC reader against ORC row reader while (vrr.hasNext()) { batch = vrr.nextBatch(batch); for (int i = 0; i < batch.size; i++) { OrcStruct row = (OrcStruct)rr.next(); for (int j = 0; j < batch.cols.Length; j++) { object a = (row.getFieldValue(j)); ColumnVector cv = batch.cols[j]; // if the value is repeating, use row 0 int rowId = cv.isRepeating ? 0 : i; // make sure the null flag agrees if (a == null) { Assert.True(!cv.noNulls && cv.isNull[rowId]); } else if (a is bool) { // bool values are stores a 1's and 0's, so convert and compare long temp = (bool)a ? 1 : 0; long b = ((LongColumnVector)cv).vector[rowId]; Assert.Equal(temp.ToString(), b.ToString()); } else if (a is Timestamp) { // Timestamps are stored as long, so convert and compare Timestamp t = (Timestamp)a; // Timestamp.getTime() is overriden and is // long time = super.getTime(); // return (time + (nanos / 1000000)); long timeInNanoSec = (t.Milliseconds * 1000000) + (t.getNanos() % 1000000); long b = ((LongColumnVector)cv).vector[rowId]; Assert.Equal(timeInNanoSec.ToString(), b.ToString()); } else if (a is Date) { // Dates are stored as long, so convert and compare Date adt = (Date)a; long b = ((LongColumnVector)cv).vector[rowId]; // Assert.Equal(adt, Date.daysToMillis((int)b)); Assert.Equal(adt.Days, (int)b); } else if (a is HiveDecimal) { // Decimals are stored as BigInteger, so convert and compare HiveDecimal dec = (HiveDecimal)a; HiveDecimal b = ((DecimalColumnVector)cv).vector[i]; Assert.Equal(dec, b); } else if (a is double) { double b = ((DoubleColumnVector)cv).vector[rowId]; Assert.Equal(a.ToString(), b.ToString()); } else if (a is string) { BytesColumnVector bcv = (BytesColumnVector)cv; string b = Encoding.UTF8.GetString(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]); Assert.Equal((string)a, b); } else if (a is int || a is long || a is sbyte || a is short) { Assert.Equal(a.ToString(), ((LongColumnVector)cv).vector[rowId].ToString()); } else { Assert.True(false); } } } // Check repeating Assert.Equal(false, batch.cols[0].isRepeating); Assert.Equal(false, batch.cols[1].isRepeating); Assert.Equal(false, batch.cols[2].isRepeating); Assert.Equal(true, batch.cols[3].isRepeating); Assert.Equal(false, batch.cols[4].isRepeating); Assert.Equal(false, batch.cols[5].isRepeating); Assert.Equal(false, batch.cols[6].isRepeating); Assert.Equal(false, batch.cols[7].isRepeating); Assert.Equal(false, batch.cols[8].isRepeating); Assert.Equal(false, batch.cols[9].isRepeating); // Check non null Assert.Equal(false, batch.cols[0].noNulls); Assert.Equal(false, batch.cols[1].noNulls); Assert.Equal(true, batch.cols[2].noNulls); Assert.Equal(true, batch.cols[3].noNulls); Assert.Equal(false, batch.cols[4].noNulls); Assert.Equal(false, batch.cols[5].noNulls); Assert.Equal(false, batch.cols[6].noNulls); Assert.Equal(false, batch.cols[7].noNulls); Assert.Equal(false, batch.cols[8].noNulls); Assert.Equal(false, batch.cols[9].noNulls); } Assert.Equal(false, rr.hasNext()); } }
public void testMultiStripeWithNull() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyStruct)); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .compress(CompressionKind.NONE) .bufferSize(10000))) { Random rand = new Random(100); writer.addRow(new MyStruct(null, null, true, new List <InnerStruct> { new InnerStruct(100) })); for (int i = 2; i < 20000; i++) { writer.addRow(new MyStruct(rand.Next(1), "a", true, new List <InnerStruct> { new InnerStruct(100) })); } writer.addRow(new MyStruct(null, null, true, new List <InnerStruct> { new InnerStruct(100) })); } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); // check the stats ColumnStatistics[] stats = reader.getStatistics(); Assert.Equal(20000, reader.getNumberOfRows()); Assert.Equal(20000, stats[0].getNumberOfValues()); Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getMaximum()); Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getMinimum()); Assert.Equal(true, ((IntegerColumnStatistics)stats[1]).isSumDefined()); Assert.Equal(0, ((IntegerColumnStatistics)stats[1]).getSum()); Assert.Equal("count: 19998 hasNull: True min: 0 max: 0 sum: 0", stats[1].ToString()); Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMaximum()); Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMinimum()); Assert.Equal(19998, stats[2].getNumberOfValues()); Assert.Equal("count: 19998 hasNull: True min: a max: a sum: 19998", stats[2].ToString()); // check the inspectors StructObjectInspector readerInspector = (StructObjectInspector)reader.getObjectInspector(); Assert.Equal(ObjectInspectorCategory.STRUCT, readerInspector.getCategory()); Assert.Equal("struct<a:int,b:string,c:boolean,list:array<struct<z:int>>>", readerInspector.getTypeName()); using (RecordReader rows = reader.rows()) { List <bool> expected = new List <bool>(); foreach (StripeInformation sinfo in reader.getStripes()) { expected.Add(false); } // only the first and last stripe will have PRESENT stream expected[0] = true; expected[expected.Count - 1] = true; List <bool> got = new List <bool>(); // check if the strip footer contains PRESENT stream foreach (StripeInformation sinfo in reader.getStripes()) { OrcProto.StripeFooter sf = ((RecordReaderImpl)rows).readStripeFooter(sinfo); got.Add(sf.ToString().IndexOf(OrcProto.Stream.Types.Kind.PRESENT.ToString()) != -1); } Assert.Equal(expected, got); // row 1 OrcStruct row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.Null(row.getFieldValue(0)); Assert.Null(row.getFieldValue(1)); Assert.Equal(true, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); rows.seekToRow(19998); // last-1 row row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.NotNull(row.getFieldValue(1)); Assert.Equal(0, row.getFieldValue(0)); Assert.Equal(true, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); // last row row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.Null(row.getFieldValue(0)); Assert.Null(row.getFieldValue(1)); Assert.Equal(true, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); } }
public void testColumnsWithNullAndCompression() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyStruct)); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .bufferSize(10000))) { writer.addRow(new MyStruct(3, "a", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(null, "b", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(3, null, false, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(3, "d", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(2, "e", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(2, "f", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(2, "g", true, Lists.newArrayList(new InnerStruct(100)))); writer.addRow(new MyStruct(2, "h", true, Lists.newArrayList(new InnerStruct(100)))); } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); // check the stats ColumnStatistics[] stats = reader.getStatistics(); Assert.Equal(8, reader.getNumberOfRows()); Assert.Equal(8, stats[0].getNumberOfValues()); Assert.Equal(3, ((IntegerColumnStatistics)stats[1]).getMaximum()); Assert.Equal(2, ((IntegerColumnStatistics)stats[1]).getMinimum()); Assert.Equal(true, ((IntegerColumnStatistics)stats[1]).isSumDefined()); Assert.Equal(17, ((IntegerColumnStatistics)stats[1]).getSum()); Assert.Equal("count: 7 hasNull: True min: 2 max: 3 sum: 17", stats[1].ToString()); Assert.Equal("h", ((StringColumnStatistics)stats[2]).getMaximum()); Assert.Equal("a", ((StringColumnStatistics)stats[2]).getMinimum()); Assert.Equal(7, stats[2].getNumberOfValues()); Assert.Equal("count: 7 hasNull: True min: a max: h sum: 7", stats[2].ToString()); // check the inspectors StructObjectInspector readerInspector = (StructObjectInspector)reader.getObjectInspector(); Assert.Equal(ObjectInspectorCategory.STRUCT, readerInspector.getCategory()); Assert.Equal("struct<a:int,b:string,c:boolean,list:array<struct<z:int>>>", readerInspector.getTypeName()); using (RecordReader rows = reader.rows()) { // only the last strip will have PRESENT stream List <bool> expected = new List <bool>(); foreach (StripeInformation sinfo in reader.getStripes()) { expected.Add(false); } expected[expected.Count - 1] = true; List <bool> got = new List <bool>(); // check if the strip footer contains PRESENT stream foreach (StripeInformation sinfo in reader.getStripes()) { OrcProto.StripeFooter sf = ((RecordReaderImpl)rows).readStripeFooter(sinfo); got.Add(sf.ToString().IndexOf(OrcProto.Stream.Types.Kind.PRESENT.ToString()) != -1); } Assert.Equal(expected, got); // row 1 OrcStruct row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.Equal(3, row.getFieldValue(0)); Assert.Equal("a", row.getFieldValue(1).ToString()); Assert.Equal(true, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); // row 2 row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.Null(row.getFieldValue(0)); Assert.Equal("b", row.getFieldValue(1).ToString()); Assert.Equal(true, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); // row 3 row = (OrcStruct)rows.next(); Assert.NotNull(row); Assert.Null(row.getFieldValue(1)); Assert.Equal(3, row.getFieldValue(0)); Assert.Equal(false, row.getFieldValue(2)); Assert.Equal(100, ((OrcStruct)((IList <object>)row.getFieldValue(3))[0]). getFieldValue(0)); } }