Пример #1
0
        public override bool Equals(object other)
        {
            OrcUnion union = other as OrcUnion;

            if (union == null)
            {
                return(false);
            }
            if (tag != union.tag)
            {
                return(false);
            }
            else if (@object == null)
            {
                return(union.@object == null);
            }
            else
            {
                return(object.ReferenceEquals(@object, union.@object));
            }
        }
Пример #2
0
        public void testUnionAndTimestamp()
        {
            List<OrcProto.Type> types = new List<OrcProto.Type>();
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRUCT).
                AddFieldNames("time").AddFieldNames("union").AddFieldNames("decimal").
                AddSubtypes(1).AddSubtypes(2).AddSubtypes(5).Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.TIMESTAMP).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.UNION).
                AddSubtypes(3).AddSubtypes(4).Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.INT).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRING).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.DECIMAL).
                Build());

            ObjectInspector inspector = OrcStruct.createObjectInspector(0, types);

            HiveDecimal maxValue = HiveDecimal.Parse("10000000000000000000");
            OrcStruct row = new OrcStruct(3);
            OrcUnion union = new OrcUnion();
            Random rand;

            using (Stream file = FileOpenWrite(TestFilePath))
            using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                .inspector(inspector)
                .stripeSize(1000)
                .compress(CompressionKind.NONE)
                .bufferSize(100)
                .blockPadding(false)))
            {
                row.setFieldValue(1, union);
                row.setFieldValue(0, Timestamp.Parse("2000-03-12 15:00:00"));
                HiveDecimal value = HiveDecimal.Parse("12345678.6547456");
                row.setFieldValue(2, value);
                union.set((byte)0, 42);
                writer.addRow(row);
                row.setFieldValue(0, Timestamp.Parse("2000-03-20 12:00:00.123456789"));
                union.set((byte)1, "hello");
                value = HiveDecimal.Parse("-5643.234");
                row.setFieldValue(2, value);
                writer.addRow(row);
                row.setFieldValue(0, null);
                row.setFieldValue(1, null);
                row.setFieldValue(2, null);
                writer.addRow(row);
                row.setFieldValue(1, union);
                union.set((byte)0, null);
                writer.addRow(row);
                union.set((byte)1, null);
                writer.addRow(row);
                union.set((byte)0, 200000);
                row.setFieldValue(0, Timestamp.Parse("1970-01-01 00:00:00"));
                value = HiveDecimal.Parse("10000000000000000000");
                row.setFieldValue(2, value);
                writer.addRow(row);
                rand = new Random(42);
                for (int i = 1970; i < 2038; ++i)
                {
                    row.setFieldValue(0, Timestamp.Parse(i + "-05-05 12:34:56." + i));
                    if ((i & 1) == 0)
                    {
                        union.set((byte)0, (i * i));
                    }
                    else
                    {
                        union.set((byte)1, (i * i).ToString());
                    }
                    value = HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18));
                    row.setFieldValue(2, value);
                    if (maxValue.CompareTo(value) < 0)
                    {
                        maxValue = value;
                    }
                    writer.addRow(row);
                }
                // let's add a lot of constant rows to test the rle
                row.setFieldValue(0, null);
                union.set((byte)0, 1732050807);
                row.setFieldValue(2, null);
                for (int i = 0; i < 5000; ++i)
                {
                    writer.addRow(row);
                }
                union.set((byte)0, 0);
                writer.addRow(row);
                union.set((byte)0, 10);
                writer.addRow(row);
                union.set((byte)0, 138);
                writer.addRow(row);
                writer.close();

                TypeDescription schema = writer.getSchema();
                Assert.Equal(5, schema.getMaximumId());
                bool[] expected = new bool[] { false, false, false, false, false, false };
                bool[] included = OrcUtils.includeColumns("", schema);
                Assert.Equal(expected, included);

                expected = new bool[] { false, true, false, false, false, true };
                included = OrcUtils.includeColumns("time,decimal", schema);
                Assert.Equal(expected, included);

                expected = new bool[] { false, false, true, true, true, false };
                included = OrcUtils.includeColumns("union", schema);
                Assert.Equal(expected, included);
            }

            Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf));

            Assert.Equal(0, reader.getMetadataKeys().Count);
            Assert.Equal(5077, reader.getNumberOfRows());
            DecimalColumnStatistics stats =
                (DecimalColumnStatistics)reader.getStatistics()[5];
            Assert.Equal(71, stats.getNumberOfValues());
            Assert.Equal(HiveDecimal.Parse("-5643.234"), stats.getMinimum());
            Assert.Equal(maxValue, stats.getMaximum());
            // TODO: fix this
            //    Assert.Equal(null,stats.getSum());
            int stripeCount = 0;
            int rowCount = 0;
            long currentOffset = -1;
            foreach (StripeInformation stripe in reader.getStripes())
            {
                stripeCount += 1;
                rowCount += (int)stripe.getNumberOfRows();
                if (currentOffset < 0)
                {
                    currentOffset = stripe.getOffset() + stripe.getLength();
                }
                else
                {
                    Assert.Equal(currentOffset, stripe.getOffset());
                    currentOffset += stripe.getLength();
                }
            }
            Assert.Equal(reader.getNumberOfRows(), rowCount);
            Assert.Equal(2, stripeCount);
            Assert.Equal(reader.getContentLength(), currentOffset);

            using (RecordReader rows = reader.rows())
            {
                Assert.Equal(0, rows.getRowNumber());
                Assert.Equal(0.0, rows.getProgress(), 6);
                Assert.Equal(true, rows.hasNext());
                row = (OrcStruct)rows.next();
                Assert.Equal(1, rows.getRowNumber());
                inspector = reader.getObjectInspector();
                Assert.Equal("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>",
                    inspector.getTypeName());
                Assert.Equal(Timestamp.Parse("2000-03-12 15:00:00"), row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(0, union.getTag());
                Assert.Equal(42, union.getObject());
                Assert.Equal(HiveDecimal.Parse("12345678.6547456"), row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(2, rows.getRowNumber());
                Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal("hello", union.getObject());
                Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                Assert.Equal(null, row.getFieldValue(1));
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(0, union.getTag());
                Assert.Equal(null, union.getObject());
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal(null, union.getObject());
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(Timestamp.Parse("1970-01-01 00:00:00"), row.getFieldValue(0));
                Assert.Equal(200000, union.getObject());
                Assert.Equal(HiveDecimal.Parse("10000000000000000000"), row.getFieldValue(2));
                rand = new Random(42);
                for (int i = 1970; i < 2038; ++i)
                {
                    row = (OrcStruct)rows.next();
                    Assert.Equal(Timestamp.Parse(i + "-05-05 12:34:56." + i), row.getFieldValue(0));
                    if ((i & 1) == 0)
                    {
                        Assert.Equal(0, union.getTag());
                        Assert.Equal(i * i, union.getObject());
                    }
                    else
                    {
                        Assert.Equal(1, union.getTag());
                        Assert.Equal((i * i).ToString(), union.getObject());
                    }
                    Assert.Equal(HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18)), row.getFieldValue(2));
                }
                for (int i = 0; i < 5000; ++i)
                {
                    row = (OrcStruct)rows.next();
                    Assert.Equal(1732050807, union.getObject());
                }
                row = (OrcStruct)rows.next();
                Assert.Equal(0, union.getObject());
                row = (OrcStruct)rows.next();
                Assert.Equal(10, union.getObject());
                row = (OrcStruct)rows.next();
                Assert.Equal(138, union.getObject());
                Assert.Equal(false, rows.hasNext());
                Assert.Equal(1.0, rows.getProgress(), 5);
                Assert.Equal(reader.getNumberOfRows(), rows.getRowNumber());
                rows.seekToRow(1);
                row = (OrcStruct)rows.next();
                Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal("hello", union.getObject());
                Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2));
            }
        }
Пример #3
0
 public void testUnion()
 {
     OrcUnion un1 = new OrcUnion();
     OrcUnion un2 = new OrcUnion();
     un1.set((byte)0, "hi");
     un2.set((byte)0, "hi");
     Assert.Equal(un1, un2);
     Assert.Equal(un1.GetHashCode(), un2.GetHashCode());
     un2.set((byte)0, null);
     Assert.Equal(false, un1.Equals(un2));
     Assert.Equal(false, un2.Equals(un1));
     un1.set((byte)0, null);
     Assert.Equal(un1, un2);
     un2.set((byte)0, "hi");
     un1.set((byte)1, "hi");
     Assert.Equal(false, un1.Equals(un2));
     Assert.Equal(false, un1.GetHashCode() == un2.GetHashCode());
     un2.set((byte)1, "byte");
     Assert.Equal(false, un1.Equals(un2));
     Assert.Equal("union(1, hi)", un1.ToString());
     Assert.Equal(false, un1.Equals(null));
 }