public void NextBigIntegerTest()
        {
            Random rand = new Random();

            byte[] aaa = new byte[] { 10, 20, 30, 40, 50 };
            byte[] bbb = aaa.Reverse().ToArray();
            bool isSqeEq = bbb.SequenceEqual(new byte[] { 50, 40, 30, 20, 10 });
            Assert.IsTrue(isSqeEq);

            BigInteger b0 = rand.NextBigInteger(0);
            BigInteger b1 = rand.NextBigInteger(1);
            BigInteger b2 = rand.NextBigInteger(2);
            BigInteger b3 = rand.NextBigInteger(100);
            BigInteger b4 = rand.NextBigInteger(BigInteger.Pow(2, 127));

            Assert.IsTrue(b0 >= 0);
            Assert.IsTrue(b0 == 0);
            Assert.IsTrue(b1 >= 0);
            Assert.IsTrue(b1 < 1);
            Assert.IsTrue(b2 >= 0);
            Assert.IsTrue(b2 < 2);
            Assert.IsTrue(b3 >= 0);
            Assert.IsTrue(b3 < 100);
            Assert.IsTrue(b4 >= 0);
            Assert.IsTrue(b4 < BigInteger.Pow(2, 130));

            try
            {
                BigInteger b5 = rand.NextBigInteger(-BigInteger.Pow(2, 127));
                Assert.Fail();
            }
            catch (Exception ex) { }
        }
Exemplo n.º 2
0
        public void testUnionAndTimestamp()
        {
            List<OrcProto.Type> types = new List<OrcProto.Type>();
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRUCT).
                AddFieldNames("time").AddFieldNames("union").AddFieldNames("decimal").
                AddSubtypes(1).AddSubtypes(2).AddSubtypes(5).Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.TIMESTAMP).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.UNION).
                AddSubtypes(3).AddSubtypes(4).Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.INT).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRING).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.DECIMAL).
                Build());

            ObjectInspector inspector = OrcStruct.createObjectInspector(0, types);

            HiveDecimal maxValue = HiveDecimal.Parse("10000000000000000000");
            OrcStruct row = new OrcStruct(3);
            OrcUnion union = new OrcUnion();
            Random rand;

            using (Stream file = FileOpenWrite(TestFilePath))
            using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                .inspector(inspector)
                .stripeSize(1000)
                .compress(CompressionKind.NONE)
                .bufferSize(100)
                .blockPadding(false)))
            {
                row.setFieldValue(1, union);
                row.setFieldValue(0, Timestamp.Parse("2000-03-12 15:00:00"));
                HiveDecimal value = HiveDecimal.Parse("12345678.6547456");
                row.setFieldValue(2, value);
                union.set((byte)0, 42);
                writer.addRow(row);
                row.setFieldValue(0, Timestamp.Parse("2000-03-20 12:00:00.123456789"));
                union.set((byte)1, "hello");
                value = HiveDecimal.Parse("-5643.234");
                row.setFieldValue(2, value);
                writer.addRow(row);
                row.setFieldValue(0, null);
                row.setFieldValue(1, null);
                row.setFieldValue(2, null);
                writer.addRow(row);
                row.setFieldValue(1, union);
                union.set((byte)0, null);
                writer.addRow(row);
                union.set((byte)1, null);
                writer.addRow(row);
                union.set((byte)0, 200000);
                row.setFieldValue(0, Timestamp.Parse("1970-01-01 00:00:00"));
                value = HiveDecimal.Parse("10000000000000000000");
                row.setFieldValue(2, value);
                writer.addRow(row);
                rand = new Random(42);
                for (int i = 1970; i < 2038; ++i)
                {
                    row.setFieldValue(0, Timestamp.Parse(i + "-05-05 12:34:56." + i));
                    if ((i & 1) == 0)
                    {
                        union.set((byte)0, (i * i));
                    }
                    else
                    {
                        union.set((byte)1, (i * i).ToString());
                    }
                    value = HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18));
                    row.setFieldValue(2, value);
                    if (maxValue.CompareTo(value) < 0)
                    {
                        maxValue = value;
                    }
                    writer.addRow(row);
                }
                // let's add a lot of constant rows to test the rle
                row.setFieldValue(0, null);
                union.set((byte)0, 1732050807);
                row.setFieldValue(2, null);
                for (int i = 0; i < 5000; ++i)
                {
                    writer.addRow(row);
                }
                union.set((byte)0, 0);
                writer.addRow(row);
                union.set((byte)0, 10);
                writer.addRow(row);
                union.set((byte)0, 138);
                writer.addRow(row);
                writer.close();

                TypeDescription schema = writer.getSchema();
                Assert.Equal(5, schema.getMaximumId());
                bool[] expected = new bool[] { false, false, false, false, false, false };
                bool[] included = OrcUtils.includeColumns("", schema);
                Assert.Equal(expected, included);

                expected = new bool[] { false, true, false, false, false, true };
                included = OrcUtils.includeColumns("time,decimal", schema);
                Assert.Equal(expected, included);

                expected = new bool[] { false, false, true, true, true, false };
                included = OrcUtils.includeColumns("union", schema);
                Assert.Equal(expected, included);
            }

            Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf));

            Assert.Equal(0, reader.getMetadataKeys().Count);
            Assert.Equal(5077, reader.getNumberOfRows());
            DecimalColumnStatistics stats =
                (DecimalColumnStatistics)reader.getStatistics()[5];
            Assert.Equal(71, stats.getNumberOfValues());
            Assert.Equal(HiveDecimal.Parse("-5643.234"), stats.getMinimum());
            Assert.Equal(maxValue, stats.getMaximum());
            // TODO: fix this
            //    Assert.Equal(null,stats.getSum());
            int stripeCount = 0;
            int rowCount = 0;
            long currentOffset = -1;
            foreach (StripeInformation stripe in reader.getStripes())
            {
                stripeCount += 1;
                rowCount += (int)stripe.getNumberOfRows();
                if (currentOffset < 0)
                {
                    currentOffset = stripe.getOffset() + stripe.getLength();
                }
                else
                {
                    Assert.Equal(currentOffset, stripe.getOffset());
                    currentOffset += stripe.getLength();
                }
            }
            Assert.Equal(reader.getNumberOfRows(), rowCount);
            Assert.Equal(2, stripeCount);
            Assert.Equal(reader.getContentLength(), currentOffset);

            using (RecordReader rows = reader.rows())
            {
                Assert.Equal(0, rows.getRowNumber());
                Assert.Equal(0.0, rows.getProgress(), 6);
                Assert.Equal(true, rows.hasNext());
                row = (OrcStruct)rows.next();
                Assert.Equal(1, rows.getRowNumber());
                inspector = reader.getObjectInspector();
                Assert.Equal("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>",
                    inspector.getTypeName());
                Assert.Equal(Timestamp.Parse("2000-03-12 15:00:00"), row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(0, union.getTag());
                Assert.Equal(42, union.getObject());
                Assert.Equal(HiveDecimal.Parse("12345678.6547456"), row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(2, rows.getRowNumber());
                Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal("hello", union.getObject());
                Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                Assert.Equal(null, row.getFieldValue(1));
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(0, union.getTag());
                Assert.Equal(null, union.getObject());
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal(null, union.getObject());
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(Timestamp.Parse("1970-01-01 00:00:00"), row.getFieldValue(0));
                Assert.Equal(200000, union.getObject());
                Assert.Equal(HiveDecimal.Parse("10000000000000000000"), row.getFieldValue(2));
                rand = new Random(42);
                for (int i = 1970; i < 2038; ++i)
                {
                    row = (OrcStruct)rows.next();
                    Assert.Equal(Timestamp.Parse(i + "-05-05 12:34:56." + i), row.getFieldValue(0));
                    if ((i & 1) == 0)
                    {
                        Assert.Equal(0, union.getTag());
                        Assert.Equal(i * i, union.getObject());
                    }
                    else
                    {
                        Assert.Equal(1, union.getTag());
                        Assert.Equal((i * i).ToString(), union.getObject());
                    }
                    Assert.Equal(HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18)), row.getFieldValue(2));
                }
                for (int i = 0; i < 5000; ++i)
                {
                    row = (OrcStruct)rows.next();
                    Assert.Equal(1732050807, union.getObject());
                }
                row = (OrcStruct)rows.next();
                Assert.Equal(0, union.getObject());
                row = (OrcStruct)rows.next();
                Assert.Equal(10, union.getObject());
                row = (OrcStruct)rows.next();
                Assert.Equal(138, union.getObject());
                Assert.Equal(false, rows.hasNext());
                Assert.Equal(1.0, rows.getProgress(), 5);
                Assert.Equal(reader.getNumberOfRows(), rows.getRowNumber());
                rows.seekToRow(1);
                row = (OrcStruct)rows.next();
                Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal("hello", union.getObject());
                Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2));
            }
        }
Exemplo n.º 3
0
        public void testUnionAndTimestamp()
        {
            TypeDescription schema = TypeDescription.createStruct()
                .addField("time", TypeDescription.createTimestamp())
                .addField("union", TypeDescription.createUnion()
                    .addUnionChild(TypeDescription.createInt())
                    .addUnionChild(TypeDescription.createString()))
                .addField("decimal", TypeDescription.createDecimal()
                    .withPrecision(38)
                    .withScale(18));
            HiveDecimal maxValue = HiveDecimal.Parse("10000000000000000000");
            Random rand = new Random(42);

            using (Stream file = File.OpenWrite(TestFilePath))
            using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                .setSchema(schema)
                .stripeSize(1000)
                .compress(CompressionKind.NONE)
                .bufferSize(100)
                .blockPadding(false)))
            {
                VectorizedRowBatch batch = schema.createRowBatch();
                batch.size = 6;
                setUnion(batch, 0, Timestamp.Parse("2000-03-12 15:00:00"), 0, 42, null,
                         HiveDecimal.Parse("12345678.6547456"));
                setUnion(batch, 1, Timestamp.Parse("2000-03-20 12:00:00.123456789"),
                    1, null, "hello", HiveDecimal.Parse("-5643.234"));

                setUnion(batch, 2, null, null, null, null, null);
                setUnion(batch, 3, null, 0, null, null, null);
                setUnion(batch, 4, null, 1, null, null, null);

                setUnion(batch, 5, Timestamp.Parse("1970-01-01 00:00:00"), 0, 200000,
                    null, HiveDecimal.Parse("10000000000000000000"));
                writer.addRowBatch(batch);

                batch.reset();
                for (int i = 1970; i < 2038; ++i)
                {
                    Timestamp ts = Timestamp.Parse(i + "-05-05 12:34:56." + i);
                    HiveDecimal dec = HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18));
                    if ((i & 1) == 0)
                    {
                        setUnion(batch, batch.size++, ts, 0, i * i, null, dec);
                    }
                    else
                    {
                        setUnion(batch, batch.size++, ts, 1, null, (i * i).ToString(), dec);
                    }
                    if (maxValue.CompareTo(dec) < 0)
                    {
                        maxValue = dec;
                    }
                }
                writer.addRowBatch(batch);
                batch.reset();

                // let's add a lot of constant rows to test the rle
                batch.size = 1000;
                for (int c = 0; c < batch.cols.Length; ++c)
                {
                    batch.cols[c].setRepeating(true);
                }
                setUnion(batch, 0, null, 0, 1732050807, null, null);
                for (int i = 0; i < 5; ++i)
                {
                    writer.addRowBatch(batch);
                }

                batch.reset();
                batch.size = 3;
                setUnion(batch, 0, null, 0, 0, null, null);
                setUnion(batch, 1, null, 0, 10, null, null);
                setUnion(batch, 2, null, 0, 138, null, null);
                writer.addRowBatch(batch);
                schema = writer.getSchema();
            }

            Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf));

            Assert.Equal(5, schema.getMaximumId());
            bool[] expected = new bool[] { false, false, false, false, false, false };
            bool[] included = OrcUtils.includeColumns("", schema);
            Assert.Equal(expected, included);

            expected = new bool[] { false, true, false, false, false, true };
            included = OrcUtils.includeColumns("time,decimal", schema);
            Assert.Equal(expected, included);

            expected = new bool[] { false, false, true, true, true, false };
            included = OrcUtils.includeColumns("union", schema);
            Assert.Equal(expected, included);

            Assert.Equal(0, reader.getMetadataKeys().Count);
            Assert.Equal(5077, reader.getNumberOfRows());
            DecimalColumnStatistics stats =
                (DecimalColumnStatistics)reader.getStatistics()[5];
            Assert.Equal(71, stats.getNumberOfValues());
            Assert.Equal(HiveDecimal.Parse("-5643.234"), stats.getMinimum());
            Assert.Equal(maxValue, stats.getMaximum());
            // TODO: fix this
            //    Assert.Equal(null,stats.getSum());
            int stripeCount = 0;
            int rowCount = 0;
            long currentOffset = -1;
            foreach (StripeInformation stripe in reader.getStripes())
            {
                stripeCount += 1;
                rowCount += (int)stripe.getNumberOfRows();
                if (currentOffset < 0)
                {
                    currentOffset = stripe.getOffset() + stripe.getLength();
                }
                else
                {
                    Assert.Equal(currentOffset, stripe.getOffset());
                    currentOffset += stripe.getLength();
                }
            }
            Assert.Equal(reader.getNumberOfRows(), rowCount);
            Assert.Equal(2, stripeCount);
            Assert.Equal(reader.getContentLength(), currentOffset);
            using (RecordReader rows = reader.rows())
            {
                Assert.Equal(0, rows.getRowNumber());
                Assert.Equal(0.0, rows.getProgress(), 6);
                Assert.Equal(true, rows.hasNext());
                OrcStruct row = (OrcStruct)rows.next();
                Assert.Equal(1, rows.getRowNumber());
                ObjectInspector inspector = reader.getObjectInspector();
                Assert.Equal("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>",
                    inspector.getTypeName());
                Assert.Equal(Timestamp.Parse("2000-03-12 15:00:00"), row.getFieldValue(0));
                OrcUnion union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(0, union.getTag());
                Assert.Equal(42, union.getObject());
                Assert.Equal(HiveDecimal.Parse("12345678.6547456"), row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(2, rows.getRowNumber());
                Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(1, union.getTag());
                Assert.Equal("hello", union.getObject());
                Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Null(row.getFieldValue(0));
                Assert.Null(row.getFieldValue(1));
                Assert.Null(row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Null(row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(0, union.getTag());
                Assert.Null(union.getObject());
                Assert.Null(row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Null(row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(1, union.getTag());
                Assert.Null(union.getObject());
                Assert.Null(row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(Timestamp.Parse("1970-01-01 00:00:00"), row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(200000, union.getObject());
                Assert.Equal(HiveDecimal.Parse("10000000000000000000"), row.getFieldValue(2));
                rand = new Random(42);
                for (int i = 1970; i < 2038; ++i)
                {
                    row = (OrcStruct)rows.next();
                    Assert.Equal(Timestamp.Parse(i + "-05-05 12:34:56." + i),
                        row.getFieldValue(0));
                    union = (OrcUnion)row.getFieldValue(1);
                    if ((i & 1) == 0)
                    {
                        Assert.Equal(0, union.getTag());
                        Assert.Equal(i * i, union.getObject());
                    }
                    else
                    {
                        Assert.Equal(1, union.getTag());
                        Assert.Equal((i * i).ToString(), union.getObject());
                    }
                    Assert.Equal(
                        HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18)),
                        row.getFieldValue(2));
                }
                for (int i = 0; i < 5000; ++i)
                {
                    row = (OrcStruct)rows.next();
                    union = (OrcUnion)row.getFieldValue(1);
                    Assert.Equal(1732050807, union.getObject());
                }
                row = (OrcStruct)rows.next();
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(0, union.getObject());
                row = (OrcStruct)rows.next();
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(10, union.getObject());
                row = (OrcStruct)rows.next();
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(138, union.getObject());
                Assert.Equal(false, rows.hasNext());
                Assert.Equal(1.0, rows.getProgress(), 5);
                Assert.Equal(reader.getNumberOfRows(), rows.getRowNumber());
                rows.seekToRow(1);
                row = (OrcStruct)rows.next();
                Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(1, union.getTag());
                Assert.Equal("hello", union.getObject());
                Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2));
            }
        }