void next(OrcStruct next)
 {
     if (recordReader.hasNext())
     {
         long nextRowId = recordReader.getRowNumber();
         // have to do initialization here, because the super's constructor
         // calls next and thus we need to initialize before our constructor
         // runs
         if (next == null)
         {
             nextRecord = new OrcStruct(OrcRecordUpdater.FIELDS);
             IntWritable operation =
                 new IntWritable(OrcRecordUpdater.INSERT_OPERATION);
             nextRecord.setFieldValue(OrcRecordUpdater.OPERATION, operation);
             nextRecord.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION,
                                      new LongWritable(0));
             nextRecord.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION,
                                      new LongWritable(0));
             nextRecord.setFieldValue(OrcRecordUpdater.BUCKET,
                                      new IntWritable(bucket));
             nextRecord.setFieldValue(OrcRecordUpdater.ROW_ID,
                                      new LongWritable(nextRowId));
             nextRecord.setFieldValue(OrcRecordUpdater.ROW,
                                      recordReader.next(null));
         }
         else
         {
             nextRecord = next;
             ((IntWritable)next.getFieldValue(OrcRecordUpdater.OPERATION))
             .set(OrcRecordUpdater.INSERT_OPERATION);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION))
             .set(0);
             ((IntWritable)next.getFieldValue(OrcRecordUpdater.BUCKET))
             .set(bucket);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION))
             .set(0);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.ROW_ID))
             .set(0);
             nextRecord.setFieldValue(OrcRecordUpdater.ROW,
                                      recordReader.next(OrcRecordUpdater.getRow(next)));
         }
         key.setValues(0L, bucket, nextRowId, 0L, 0);
         if (maxKey != null && key.compareRow(maxKey) > 0)
         {
             if (LOG.isDebugEnabled())
             {
                 LOG.debug("key " + key + " > maxkey " + maxKey);
             }
             nextRecord = null;
             recordReader.close();
         }
     }
     else
     {
         nextRecord = null;
         recordReader.close();
     }
 }
 static OrcStruct getRow(OrcStruct @struct)
 {
     if (@struct == null)
     {
         return(null);
     }
     else
     {
         return((OrcStruct)@struct.getFieldValue(ROW));
     }
 }
 internal static int getBucket(OrcStruct @struct)
 {
     return(((IntWritable)@struct.getFieldValue(BUCKET)).get());
 }
 internal static long getOriginalTransaction(OrcStruct @struct)
 {
     return(((LongWritable)@struct.getFieldValue(ORIGINAL_TRANSACTION)).get());
 }
 internal static long getCurrentTransaction(OrcStruct @struct)
 {
     return(((LongWritable)@struct.getFieldValue(CURRENT_TRANSACTION)).get());
 }
 internal static int getOperation(OrcStruct @struct)
 {
     return(((IntWritable)@struct.getFieldValue(OPERATION)).get());
 }
 void next(OrcStruct next)
 {
     if (recordReader.hasNext())
     {
         long nextRowId = recordReader.getRowNumber();
         // have to do initialization here, because the super's constructor
         // calls next and thus we need to initialize before our constructor
         // runs
         if (next == null)
         {
             nextRecord = new OrcStruct(OrcRecordUpdater.FIELDS);
             IntWritable operation =
                 new IntWritable(OrcRecordUpdater.INSERT_OPERATION);
             nextRecord.setFieldValue(OrcRecordUpdater.OPERATION, operation);
             nextRecord.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION,
                 new LongWritable(0));
             nextRecord.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION,
                 new LongWritable(0));
             nextRecord.setFieldValue(OrcRecordUpdater.BUCKET,
                 new IntWritable(bucket));
             nextRecord.setFieldValue(OrcRecordUpdater.ROW_ID,
                 new LongWritable(nextRowId));
             nextRecord.setFieldValue(OrcRecordUpdater.ROW,
                 recordReader.next(null));
         }
         else
         {
             nextRecord = next;
             ((IntWritable)next.getFieldValue(OrcRecordUpdater.OPERATION))
                 .set(OrcRecordUpdater.INSERT_OPERATION);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION))
                 .set(0);
             ((IntWritable)next.getFieldValue(OrcRecordUpdater.BUCKET))
                 .set(bucket);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION))
                 .set(0);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.ROW_ID))
                 .set(0);
             nextRecord.setFieldValue(OrcRecordUpdater.ROW,
                 recordReader.next(OrcRecordUpdater.getRow(next)));
         }
         key.setValues(0L, bucket, nextRowId, 0L, 0);
         if (maxKey != null && key.compareRow(maxKey) > 0)
         {
             if (LOG.isDebugEnabled())
             {
                 LOG.debug("key " + key + " > maxkey " + maxKey);
             }
             nextRecord = null;
             recordReader.close();
         }
     }
     else
     {
         nextRecord = null;
         recordReader.close();
     }
 }
 internal static long getCurrentTransaction(OrcStruct @struct)
 {
     return ((LongWritable)@struct.getFieldValue(CURRENT_TRANSACTION)).get();
 }
        /**
         * Generate an ORC file with a range of dates and times.
         */
        public void createOrcDateFile(string path, int minYear, int maxYear)
        {
            List<OrcProto.Type> types = new List<OrcProto.Type>();
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRUCT).
                AddFieldNames("time").AddFieldNames("date").
                AddSubtypes(1).AddSubtypes(2).Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.TIMESTAMP).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.DATE).
                Build());

            ObjectInspector inspector = OrcStruct.createObjectInspector(0, types);

            using (Stream file = FileOpenWrite(path))
            using (Writer writer = OrcFile.createWriter(path, file, OrcFile.writerOptions(conf)
                .inspector(inspector)
                .stripeSize(100000)
                .bufferSize(10000)
                .blockPadding(false)))
            {
                OrcStruct row = new OrcStruct(2);
                for (int year = minYear; year < maxYear; ++year)
                {
                    for (int ms = 1000; ms < 2000; ++ms)
                    {
                        row.setFieldValue(0, Timestamp.Parse(year + "-05-05 12:34:56." + ms));
                        row.setFieldValue(1, new Date(year - 1900, 11, 25));
                        writer.addRow(row);
                    }
                }
            }

            Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
            using (RecordReader rows = reader.rows())
            {
                for (int year = minYear; year < maxYear; ++year)
                {
                    for (int ms = 1000; ms < 2000; ++ms)
                    {
                        OrcStruct row = (OrcStruct)rows.next();
                        Assert.Equal(
                            Timestamp.Parse(year + "-05-05 12:34:56." + ms),
                            row.getFieldValue(0));
                        Assert.Equal(new Date(year - 1900, 11, 25), row.getFieldValue(1));
                    }
                }
            }
        }
Beispiel #10
0
        public void testUnionAndTimestamp()
        {
            List<OrcProto.Type> types = new List<OrcProto.Type>();
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRUCT).
                AddFieldNames("time").AddFieldNames("union").AddFieldNames("decimal").
                AddSubtypes(1).AddSubtypes(2).AddSubtypes(5).Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.TIMESTAMP).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.UNION).
                AddSubtypes(3).AddSubtypes(4).Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.INT).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRING).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.DECIMAL).
                Build());

            ObjectInspector inspector = OrcStruct.createObjectInspector(0, types);

            HiveDecimal maxValue = HiveDecimal.Parse("10000000000000000000");
            OrcStruct row = new OrcStruct(3);
            OrcUnion union = new OrcUnion();
            Random rand;

            using (Stream file = FileOpenWrite(TestFilePath))
            using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                .inspector(inspector)
                .stripeSize(1000)
                .compress(CompressionKind.NONE)
                .bufferSize(100)
                .blockPadding(false)))
            {
                row.setFieldValue(1, union);
                row.setFieldValue(0, Timestamp.Parse("2000-03-12 15:00:00"));
                HiveDecimal value = HiveDecimal.Parse("12345678.6547456");
                row.setFieldValue(2, value);
                union.set((byte)0, 42);
                writer.addRow(row);
                row.setFieldValue(0, Timestamp.Parse("2000-03-20 12:00:00.123456789"));
                union.set((byte)1, "hello");
                value = HiveDecimal.Parse("-5643.234");
                row.setFieldValue(2, value);
                writer.addRow(row);
                row.setFieldValue(0, null);
                row.setFieldValue(1, null);
                row.setFieldValue(2, null);
                writer.addRow(row);
                row.setFieldValue(1, union);
                union.set((byte)0, null);
                writer.addRow(row);
                union.set((byte)1, null);
                writer.addRow(row);
                union.set((byte)0, 200000);
                row.setFieldValue(0, Timestamp.Parse("1970-01-01 00:00:00"));
                value = HiveDecimal.Parse("10000000000000000000");
                row.setFieldValue(2, value);
                writer.addRow(row);
                rand = new Random(42);
                for (int i = 1970; i < 2038; ++i)
                {
                    row.setFieldValue(0, Timestamp.Parse(i + "-05-05 12:34:56." + i));
                    if ((i & 1) == 0)
                    {
                        union.set((byte)0, (i * i));
                    }
                    else
                    {
                        union.set((byte)1, (i * i).ToString());
                    }
                    value = HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18));
                    row.setFieldValue(2, value);
                    if (maxValue.CompareTo(value) < 0)
                    {
                        maxValue = value;
                    }
                    writer.addRow(row);
                }
                // let's add a lot of constant rows to test the rle
                row.setFieldValue(0, null);
                union.set((byte)0, 1732050807);
                row.setFieldValue(2, null);
                for (int i = 0; i < 5000; ++i)
                {
                    writer.addRow(row);
                }
                union.set((byte)0, 0);
                writer.addRow(row);
                union.set((byte)0, 10);
                writer.addRow(row);
                union.set((byte)0, 138);
                writer.addRow(row);
                writer.close();

                TypeDescription schema = writer.getSchema();
                Assert.Equal(5, schema.getMaximumId());
                bool[] expected = new bool[] { false, false, false, false, false, false };
                bool[] included = OrcUtils.includeColumns("", schema);
                Assert.Equal(expected, included);

                expected = new bool[] { false, true, false, false, false, true };
                included = OrcUtils.includeColumns("time,decimal", schema);
                Assert.Equal(expected, included);

                expected = new bool[] { false, false, true, true, true, false };
                included = OrcUtils.includeColumns("union", schema);
                Assert.Equal(expected, included);
            }

            Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf));

            Assert.Equal(0, reader.getMetadataKeys().Count);
            Assert.Equal(5077, reader.getNumberOfRows());
            DecimalColumnStatistics stats =
                (DecimalColumnStatistics)reader.getStatistics()[5];
            Assert.Equal(71, stats.getNumberOfValues());
            Assert.Equal(HiveDecimal.Parse("-5643.234"), stats.getMinimum());
            Assert.Equal(maxValue, stats.getMaximum());
            // TODO: fix this
            //    Assert.Equal(null,stats.getSum());
            int stripeCount = 0;
            int rowCount = 0;
            long currentOffset = -1;
            foreach (StripeInformation stripe in reader.getStripes())
            {
                stripeCount += 1;
                rowCount += (int)stripe.getNumberOfRows();
                if (currentOffset < 0)
                {
                    currentOffset = stripe.getOffset() + stripe.getLength();
                }
                else
                {
                    Assert.Equal(currentOffset, stripe.getOffset());
                    currentOffset += stripe.getLength();
                }
            }
            Assert.Equal(reader.getNumberOfRows(), rowCount);
            Assert.Equal(2, stripeCount);
            Assert.Equal(reader.getContentLength(), currentOffset);

            using (RecordReader rows = reader.rows())
            {
                Assert.Equal(0, rows.getRowNumber());
                Assert.Equal(0.0, rows.getProgress(), 6);
                Assert.Equal(true, rows.hasNext());
                row = (OrcStruct)rows.next();
                Assert.Equal(1, rows.getRowNumber());
                inspector = reader.getObjectInspector();
                Assert.Equal("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>",
                    inspector.getTypeName());
                Assert.Equal(Timestamp.Parse("2000-03-12 15:00:00"), row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(0, union.getTag());
                Assert.Equal(42, union.getObject());
                Assert.Equal(HiveDecimal.Parse("12345678.6547456"), row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(2, rows.getRowNumber());
                Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal("hello", union.getObject());
                Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                Assert.Equal(null, row.getFieldValue(1));
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(0, union.getTag());
                Assert.Equal(null, union.getObject());
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal(null, union.getObject());
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(Timestamp.Parse("1970-01-01 00:00:00"), row.getFieldValue(0));
                Assert.Equal(200000, union.getObject());
                Assert.Equal(HiveDecimal.Parse("10000000000000000000"), row.getFieldValue(2));
                rand = new Random(42);
                for (int i = 1970; i < 2038; ++i)
                {
                    row = (OrcStruct)rows.next();
                    Assert.Equal(Timestamp.Parse(i + "-05-05 12:34:56." + i), row.getFieldValue(0));
                    if ((i & 1) == 0)
                    {
                        Assert.Equal(0, union.getTag());
                        Assert.Equal(i * i, union.getObject());
                    }
                    else
                    {
                        Assert.Equal(1, union.getTag());
                        Assert.Equal((i * i).ToString(), union.getObject());
                    }
                    Assert.Equal(HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18)), row.getFieldValue(2));
                }
                for (int i = 0; i < 5000; ++i)
                {
                    row = (OrcStruct)rows.next();
                    Assert.Equal(1732050807, union.getObject());
                }
                row = (OrcStruct)rows.next();
                Assert.Equal(0, union.getObject());
                row = (OrcStruct)rows.next();
                Assert.Equal(10, union.getObject());
                row = (OrcStruct)rows.next();
                Assert.Equal(138, union.getObject());
                Assert.Equal(false, rows.hasNext());
                Assert.Equal(1.0, rows.getProgress(), 5);
                Assert.Equal(reader.getNumberOfRows(), rows.getRowNumber());
                rows.seekToRow(1);
                row = (OrcStruct)rows.next();
                Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal("hello", union.getObject());
                Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2));
            }
        }
 internal static long getRowId(OrcStruct @struct)
 {
     return ((LongWritable)@struct.getFieldValue(ROW_ID)).get();
 }
 static OrcStruct getRow(OrcStruct @struct)
 {
     if (@struct == null)
     {
         return null;
     }
     else
     {
         return (OrcStruct)@struct.getFieldValue(ROW);
     }
 }
 internal static long getOriginalTransaction(OrcStruct @struct)
 {
     return ((LongWritable)@struct.getFieldValue(ORIGINAL_TRANSACTION)).get();
 }
 internal static int getOperation(OrcStruct @struct)
 {
     return ((IntWritable)@struct.getFieldValue(OPERATION)).get();
 }
 internal static long getRowId(OrcStruct @struct)
 {
     return(((LongWritable)@struct.getFieldValue(ROW_ID)).get());
 }
Beispiel #16
0
 private void compareInner(InnerStruct expect, OrcStruct actual)
 {
     if (expect == null || actual == null)
     {
         Assert.Equal(null, expect);
         Assert.Equal(null, actual);
     }
     else
     {
         Assert.Equal(expect.int1, actual.getFieldValue(0));
         Assert.Equal(expect.string1, actual.getFieldValue(1));
     }
 }
 internal static int getBucket(OrcStruct @struct)
 {
     return ((IntWritable)@struct.getFieldValue(BUCKET)).get();
 }