public void NextBigIntegerTest() { Random rand = new Random(); byte[] aaa = new byte[] { 10, 20, 30, 40, 50 }; byte[] bbb = aaa.Reverse().ToArray(); bool isSqeEq = bbb.SequenceEqual(new byte[] { 50, 40, 30, 20, 10 }); Assert.IsTrue(isSqeEq); BigInteger b0 = rand.NextBigInteger(0); BigInteger b1 = rand.NextBigInteger(1); BigInteger b2 = rand.NextBigInteger(2); BigInteger b3 = rand.NextBigInteger(100); BigInteger b4 = rand.NextBigInteger(BigInteger.Pow(2, 127)); Assert.IsTrue(b0 >= 0); Assert.IsTrue(b0 == 0); Assert.IsTrue(b1 >= 0); Assert.IsTrue(b1 < 1); Assert.IsTrue(b2 >= 0); Assert.IsTrue(b2 < 2); Assert.IsTrue(b3 >= 0); Assert.IsTrue(b3 < 100); Assert.IsTrue(b4 >= 0); Assert.IsTrue(b4 < BigInteger.Pow(2, 130)); try { BigInteger b5 = rand.NextBigInteger(-BigInteger.Pow(2, 127)); Assert.Fail(); } catch (Exception ex) { } }
public void testUnionAndTimestamp() { List<OrcProto.Type> types = new List<OrcProto.Type>(); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRUCT). AddFieldNames("time").AddFieldNames("union").AddFieldNames("decimal"). AddSubtypes(1).AddSubtypes(2).AddSubtypes(5).Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.TIMESTAMP). Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.UNION). AddSubtypes(3).AddSubtypes(4).Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.INT). Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRING). Build()); types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.DECIMAL). Build()); ObjectInspector inspector = OrcStruct.createObjectInspector(0, types); HiveDecimal maxValue = HiveDecimal.Parse("10000000000000000000"); OrcStruct row = new OrcStruct(3); OrcUnion union = new OrcUnion(); Random rand; using (Stream file = FileOpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(1000) .compress(CompressionKind.NONE) .bufferSize(100) .blockPadding(false))) { row.setFieldValue(1, union); row.setFieldValue(0, Timestamp.Parse("2000-03-12 15:00:00")); HiveDecimal value = HiveDecimal.Parse("12345678.6547456"); row.setFieldValue(2, value); union.set((byte)0, 42); writer.addRow(row); row.setFieldValue(0, Timestamp.Parse("2000-03-20 12:00:00.123456789")); union.set((byte)1, "hello"); value = HiveDecimal.Parse("-5643.234"); row.setFieldValue(2, value); writer.addRow(row); row.setFieldValue(0, null); row.setFieldValue(1, null); row.setFieldValue(2, null); writer.addRow(row); row.setFieldValue(1, union); union.set((byte)0, null); writer.addRow(row); union.set((byte)1, null); writer.addRow(row); union.set((byte)0, 200000); row.setFieldValue(0, Timestamp.Parse("1970-01-01 00:00:00")); value = HiveDecimal.Parse("10000000000000000000"); row.setFieldValue(2, value); writer.addRow(row); rand = new Random(42); for (int i = 1970; i < 2038; ++i) { row.setFieldValue(0, Timestamp.Parse(i + "-05-05 12:34:56." + i)); if ((i & 1) == 0) { union.set((byte)0, (i * i)); } else { union.set((byte)1, (i * i).ToString()); } value = HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18)); row.setFieldValue(2, value); if (maxValue.CompareTo(value) < 0) { maxValue = value; } writer.addRow(row); } // let's add a lot of constant rows to test the rle row.setFieldValue(0, null); union.set((byte)0, 1732050807); row.setFieldValue(2, null); for (int i = 0; i < 5000; ++i) { writer.addRow(row); } union.set((byte)0, 0); writer.addRow(row); union.set((byte)0, 10); writer.addRow(row); union.set((byte)0, 138); writer.addRow(row); writer.close(); TypeDescription schema = writer.getSchema(); Assert.Equal(5, schema.getMaximumId()); bool[] expected = new bool[] { false, false, false, false, false, false }; bool[] included = OrcUtils.includeColumns("", schema); Assert.Equal(expected, included); expected = new bool[] { false, true, false, false, false, true }; included = OrcUtils.includeColumns("time,decimal", schema); Assert.Equal(expected, included); expected = new bool[] { false, false, true, true, true, false }; included = OrcUtils.includeColumns("union", schema); Assert.Equal(expected, included); } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); Assert.Equal(0, reader.getMetadataKeys().Count); Assert.Equal(5077, reader.getNumberOfRows()); DecimalColumnStatistics stats = (DecimalColumnStatistics)reader.getStatistics()[5]; Assert.Equal(71, stats.getNumberOfValues()); Assert.Equal(HiveDecimal.Parse("-5643.234"), stats.getMinimum()); Assert.Equal(maxValue, stats.getMaximum()); // TODO: fix this // Assert.Equal(null,stats.getSum()); int stripeCount = 0; int rowCount = 0; long currentOffset = -1; foreach (StripeInformation stripe in reader.getStripes()) { stripeCount += 1; rowCount += (int)stripe.getNumberOfRows(); if (currentOffset < 0) { currentOffset = stripe.getOffset() + stripe.getLength(); } else { Assert.Equal(currentOffset, stripe.getOffset()); currentOffset += stripe.getLength(); } } Assert.Equal(reader.getNumberOfRows(), rowCount); Assert.Equal(2, stripeCount); Assert.Equal(reader.getContentLength(), currentOffset); using (RecordReader rows = reader.rows()) { Assert.Equal(0, rows.getRowNumber()); Assert.Equal(0.0, rows.getProgress(), 6); Assert.Equal(true, rows.hasNext()); row = (OrcStruct)rows.next(); Assert.Equal(1, rows.getRowNumber()); inspector = reader.getObjectInspector(); Assert.Equal("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>", inspector.getTypeName()); Assert.Equal(Timestamp.Parse("2000-03-12 15:00:00"), row.getFieldValue(0)); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(0, union.getTag()); Assert.Equal(42, union.getObject()); Assert.Equal(HiveDecimal.Parse("12345678.6547456"), row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(2, rows.getRowNumber()); Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0)); Assert.Equal(1, union.getTag()); Assert.Equal("hello", union.getObject()); Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(null, row.getFieldValue(0)); Assert.Equal(null, row.getFieldValue(1)); Assert.Equal(null, row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(null, row.getFieldValue(0)); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(0, union.getTag()); Assert.Equal(null, union.getObject()); Assert.Equal(null, row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(null, row.getFieldValue(0)); Assert.Equal(1, union.getTag()); Assert.Equal(null, union.getObject()); Assert.Equal(null, row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(Timestamp.Parse("1970-01-01 00:00:00"), row.getFieldValue(0)); Assert.Equal(200000, union.getObject()); Assert.Equal(HiveDecimal.Parse("10000000000000000000"), row.getFieldValue(2)); rand = new Random(42); for (int i = 1970; i < 2038; ++i) { row = (OrcStruct)rows.next(); Assert.Equal(Timestamp.Parse(i + "-05-05 12:34:56." + i), row.getFieldValue(0)); if ((i & 1) == 0) { Assert.Equal(0, union.getTag()); Assert.Equal(i * i, union.getObject()); } else { Assert.Equal(1, union.getTag()); Assert.Equal((i * i).ToString(), union.getObject()); } Assert.Equal(HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18)), row.getFieldValue(2)); } for (int i = 0; i < 5000; ++i) { row = (OrcStruct)rows.next(); Assert.Equal(1732050807, union.getObject()); } row = (OrcStruct)rows.next(); Assert.Equal(0, union.getObject()); row = (OrcStruct)rows.next(); Assert.Equal(10, union.getObject()); row = (OrcStruct)rows.next(); Assert.Equal(138, union.getObject()); Assert.Equal(false, rows.hasNext()); Assert.Equal(1.0, rows.getProgress(), 5); Assert.Equal(reader.getNumberOfRows(), rows.getRowNumber()); rows.seekToRow(1); row = (OrcStruct)rows.next(); Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0)); Assert.Equal(1, union.getTag()); Assert.Equal("hello", union.getObject()); Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2)); } }
public void testUnionAndTimestamp() { TypeDescription schema = TypeDescription.createStruct() .addField("time", TypeDescription.createTimestamp()) .addField("union", TypeDescription.createUnion() .addUnionChild(TypeDescription.createInt()) .addUnionChild(TypeDescription.createString())) .addField("decimal", TypeDescription.createDecimal() .withPrecision(38) .withScale(18)); HiveDecimal maxValue = HiveDecimal.Parse("10000000000000000000"); Random rand = new Random(42); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .setSchema(schema) .stripeSize(1000) .compress(CompressionKind.NONE) .bufferSize(100) .blockPadding(false))) { VectorizedRowBatch batch = schema.createRowBatch(); batch.size = 6; setUnion(batch, 0, Timestamp.Parse("2000-03-12 15:00:00"), 0, 42, null, HiveDecimal.Parse("12345678.6547456")); setUnion(batch, 1, Timestamp.Parse("2000-03-20 12:00:00.123456789"), 1, null, "hello", HiveDecimal.Parse("-5643.234")); setUnion(batch, 2, null, null, null, null, null); setUnion(batch, 3, null, 0, null, null, null); setUnion(batch, 4, null, 1, null, null, null); setUnion(batch, 5, Timestamp.Parse("1970-01-01 00:00:00"), 0, 200000, null, HiveDecimal.Parse("10000000000000000000")); writer.addRowBatch(batch); batch.reset(); for (int i = 1970; i < 2038; ++i) { Timestamp ts = Timestamp.Parse(i + "-05-05 12:34:56." + i); HiveDecimal dec = HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18)); if ((i & 1) == 0) { setUnion(batch, batch.size++, ts, 0, i * i, null, dec); } else { setUnion(batch, batch.size++, ts, 1, null, (i * i).ToString(), dec); } if (maxValue.CompareTo(dec) < 0) { maxValue = dec; } } writer.addRowBatch(batch); batch.reset(); // let's add a lot of constant rows to test the rle batch.size = 1000; for (int c = 0; c < batch.cols.Length; ++c) { batch.cols[c].setRepeating(true); } setUnion(batch, 0, null, 0, 1732050807, null, null); for (int i = 0; i < 5; ++i) { writer.addRowBatch(batch); } batch.reset(); batch.size = 3; setUnion(batch, 0, null, 0, 0, null, null); setUnion(batch, 1, null, 0, 10, null, null); setUnion(batch, 2, null, 0, 138, null, null); writer.addRowBatch(batch); schema = writer.getSchema(); } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); Assert.Equal(5, schema.getMaximumId()); bool[] expected = new bool[] { false, false, false, false, false, false }; bool[] included = OrcUtils.includeColumns("", schema); Assert.Equal(expected, included); expected = new bool[] { false, true, false, false, false, true }; included = OrcUtils.includeColumns("time,decimal", schema); Assert.Equal(expected, included); expected = new bool[] { false, false, true, true, true, false }; included = OrcUtils.includeColumns("union", schema); Assert.Equal(expected, included); Assert.Equal(0, reader.getMetadataKeys().Count); Assert.Equal(5077, reader.getNumberOfRows()); DecimalColumnStatistics stats = (DecimalColumnStatistics)reader.getStatistics()[5]; Assert.Equal(71, stats.getNumberOfValues()); Assert.Equal(HiveDecimal.Parse("-5643.234"), stats.getMinimum()); Assert.Equal(maxValue, stats.getMaximum()); // TODO: fix this // Assert.Equal(null,stats.getSum()); int stripeCount = 0; int rowCount = 0; long currentOffset = -1; foreach (StripeInformation stripe in reader.getStripes()) { stripeCount += 1; rowCount += (int)stripe.getNumberOfRows(); if (currentOffset < 0) { currentOffset = stripe.getOffset() + stripe.getLength(); } else { Assert.Equal(currentOffset, stripe.getOffset()); currentOffset += stripe.getLength(); } } Assert.Equal(reader.getNumberOfRows(), rowCount); Assert.Equal(2, stripeCount); Assert.Equal(reader.getContentLength(), currentOffset); using (RecordReader rows = reader.rows()) { Assert.Equal(0, rows.getRowNumber()); Assert.Equal(0.0, rows.getProgress(), 6); Assert.Equal(true, rows.hasNext()); OrcStruct row = (OrcStruct)rows.next(); Assert.Equal(1, rows.getRowNumber()); ObjectInspector inspector = reader.getObjectInspector(); Assert.Equal("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>", inspector.getTypeName()); Assert.Equal(Timestamp.Parse("2000-03-12 15:00:00"), row.getFieldValue(0)); OrcUnion union = (OrcUnion)row.getFieldValue(1); Assert.Equal(0, union.getTag()); Assert.Equal(42, union.getObject()); Assert.Equal(HiveDecimal.Parse("12345678.6547456"), row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(2, rows.getRowNumber()); Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0)); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(1, union.getTag()); Assert.Equal("hello", union.getObject()); Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Null(row.getFieldValue(0)); Assert.Null(row.getFieldValue(1)); Assert.Null(row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Null(row.getFieldValue(0)); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(0, union.getTag()); Assert.Null(union.getObject()); Assert.Null(row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Null(row.getFieldValue(0)); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(1, union.getTag()); Assert.Null(union.getObject()); Assert.Null(row.getFieldValue(2)); row = (OrcStruct)rows.next(); Assert.Equal(Timestamp.Parse("1970-01-01 00:00:00"), row.getFieldValue(0)); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(200000, union.getObject()); Assert.Equal(HiveDecimal.Parse("10000000000000000000"), row.getFieldValue(2)); rand = new Random(42); for (int i = 1970; i < 2038; ++i) { row = (OrcStruct)rows.next(); Assert.Equal(Timestamp.Parse(i + "-05-05 12:34:56." + i), row.getFieldValue(0)); union = (OrcUnion)row.getFieldValue(1); if ((i & 1) == 0) { Assert.Equal(0, union.getTag()); Assert.Equal(i * i, union.getObject()); } else { Assert.Equal(1, union.getTag()); Assert.Equal((i * i).ToString(), union.getObject()); } Assert.Equal( HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18)), row.getFieldValue(2)); } for (int i = 0; i < 5000; ++i) { row = (OrcStruct)rows.next(); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(1732050807, union.getObject()); } row = (OrcStruct)rows.next(); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(0, union.getObject()); row = (OrcStruct)rows.next(); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(10, union.getObject()); row = (OrcStruct)rows.next(); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(138, union.getObject()); Assert.Equal(false, rows.hasNext()); Assert.Equal(1.0, rows.getProgress(), 5); Assert.Equal(reader.getNumberOfRows(), rows.getRowNumber()); rows.seekToRow(1); row = (OrcStruct)rows.next(); Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0)); union = (OrcUnion)row.getFieldValue(1); Assert.Equal(1, union.getTag()); Assert.Equal("hello", union.getObject()); Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2)); } }