예제 #1
0
 public void testStruct()
 {
     OrcStruct st1 = new OrcStruct(4);
     OrcStruct st2 = new OrcStruct(4);
     OrcStruct st3 = new OrcStruct(3);
     st1.setFieldValue(0, "hop");
     st1.setFieldValue(1, "on");
     st1.setFieldValue(2, "pop");
     st1.setFieldValue(3, 42);
     Assert.Equal(false, st1.Equals(null));
     st2.setFieldValue(0, "hop");
     st2.setFieldValue(1, "on");
     st2.setFieldValue(2, "pop");
     st2.setFieldValue(3, 42);
     Assert.Equal(st1, st2);
     st3.setFieldValue(0, "hop");
     st3.setFieldValue(1, "on");
     st3.setFieldValue(2, "pop");
     Assert.Equal(false, st1.Equals(st3));
     #if PREDICTABLE_STRING_HASH
     Assert.Equal(11241, st1.GetHashCode());
     #endif
     Assert.Equal(st1.GetHashCode(), st2.GetHashCode());
     #if PREDICTABLE_STRING_HASH
     Assert.Equal(11204, st3.GetHashCode());
     #endif
     Assert.Equal("{hop, on, pop, 42}", st1.ToString());
     st1.setFieldValue(3, null);
     Assert.Equal(false, st1.Equals(st2));
     Assert.Equal(false, st2.Equals(st1));
     st2.setFieldValue(3, null);
     Assert.Equal(st1, st2);
 }
예제 #2
0
        public override bool Equals(object other)
        {
            OrcStruct oth = other as OrcStruct;

            if (other == null)
            {
                return(false);
            }
            else
            {
                if (fields.Length != oth.fields.Length)
                {
                    return(false);
                }
                for (int i = 0; i < fields.Length; ++i)
                {
                    if (fields[i] == null)
                    {
                        if (oth.fields[i] != null)
                        {
                            return(false);
                        }
                    }
                    else
                    {
                        if (!fields[i].Equals(oth.fields[i]))
                        {
                            return(false);
                        }
                    }
                }
                return(true);
            }
        }
            void next(OrcStruct next)
            {
                if (recordReader.hasNext())
                {
                    nextRecord = (OrcStruct)recordReader.next(next);
                    // set the key
                    key.setValues(OrcRecordUpdater.getOriginalTransaction(nextRecord),
                                  OrcRecordUpdater.getBucket(nextRecord),
                                  OrcRecordUpdater.getRowId(nextRecord),
                                  OrcRecordUpdater.getCurrentTransaction(nextRecord),
                                  statementId);

                    // if this record is larger than maxKey, we need to stop
                    if (maxKey != null && key.compareRow(maxKey) > 0)
                    {
                        LOG.debug("key " + key + " > maxkey " + maxKey);
                        nextRecord = null;
                        recordReader.Dispose();
                    }
                }
                else
                {
                    nextRecord = null;
                    recordReader.Dispose();
                }
            }
 void next(OrcStruct next)
 {
     if (recordReader.hasNext())
     {
         long nextRowId = recordReader.getRowNumber();
         // have to do initialization here, because the super's constructor
         // calls next and thus we need to initialize before our constructor
         // runs
         if (next == null)
         {
             nextRecord = new OrcStruct(OrcRecordUpdater.FIELDS);
             IntWritable operation =
                 new IntWritable(OrcRecordUpdater.INSERT_OPERATION);
             nextRecord.setFieldValue(OrcRecordUpdater.OPERATION, operation);
             nextRecord.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION,
                                      new LongWritable(0));
             nextRecord.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION,
                                      new LongWritable(0));
             nextRecord.setFieldValue(OrcRecordUpdater.BUCKET,
                                      new IntWritable(bucket));
             nextRecord.setFieldValue(OrcRecordUpdater.ROW_ID,
                                      new LongWritable(nextRowId));
             nextRecord.setFieldValue(OrcRecordUpdater.ROW,
                                      recordReader.next(null));
         }
         else
         {
             nextRecord = next;
             ((IntWritable)next.getFieldValue(OrcRecordUpdater.OPERATION))
             .set(OrcRecordUpdater.INSERT_OPERATION);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION))
             .set(0);
             ((IntWritable)next.getFieldValue(OrcRecordUpdater.BUCKET))
             .set(bucket);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION))
             .set(0);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.ROW_ID))
             .set(0);
             nextRecord.setFieldValue(OrcRecordUpdater.ROW,
                                      recordReader.next(OrcRecordUpdater.getRow(next)));
         }
         key.setValues(0L, bucket, nextRowId, 0L, 0);
         if (maxKey != null && key.compareRow(maxKey) > 0)
         {
             if (LOG.isDebugEnabled())
             {
                 LOG.debug("key " + key + " > maxkey " + maxKey);
             }
             nextRecord = null;
             recordReader.close();
         }
     }
     else
     {
         nextRecord = null;
         recordReader.close();
     }
 }
예제 #5
0
            public OrcUnionObjectInspector(UnionTypeInfo info)
            {
                List <TypeInfo> unionChildren = info.getAllUnionObjectTypeInfos();

                this.children = new List <ObjectInspector>(unionChildren.Count);
                foreach (TypeInfo child in info.getAllUnionObjectTypeInfos())
                {
                    this.children.Add(OrcStruct.createObjectInspector(child));
                }
            }
예제 #6
0
 public OrcUnionObjectInspector(int columnId, IList <OrcProto.Type> types)
 {
     OrcProto.Type type = types[columnId];
     children = new List <ObjectInspector>(type.SubtypesCount);
     for (int i = 0; i < type.SubtypesCount; ++i)
     {
         children.Add(OrcStruct.createObjectInspector((int)type.SubtypesList[i],
                                                      types));
     }
 }
 static OrcStruct getRow(OrcStruct @struct)
 {
     if (@struct == null)
     {
         return(null);
     }
     else
     {
         return((OrcStruct)@struct.getFieldValue(ROW));
     }
 }
예제 #8
0
            public object setStructFieldData(object @struct, StructField field, object fieldValue)
            {
                OrcStruct orcStruct = (OrcStruct)@struct;
                int       offset    = ((Field)field).offset;

                // if the offset is bigger than our current number of fields, grow it
                if (orcStruct.getNumFields() <= offset)
                {
                    orcStruct.setNumFields(offset + 1);
                }
                orcStruct.setFieldValue(offset, fieldValue);
                return(@struct);
            }
예제 #9
0
            public override List <object> getStructFieldsDataAsList(object @object)
            {
                if (@object == null)
                {
                    return(null);
                }
                OrcStruct     @struct = (OrcStruct)@object;
                List <object> result  = new List <object>(@struct.fields.Length);

                foreach (object child in @struct.fields)
                {
                    result.Add(child);
                }
                return(result);
            }
예제 #10
0
            public override object getStructFieldData(object @object, StructField field)
            {
                if (@object == null)
                {
                    return(null);
                }
                int       offset  = ((Field)field).offset;
                OrcStruct @struct = (OrcStruct)@object;

                if (offset >= @struct.fields.Length)
                {
                    return(null);
                }

                return(@struct.fields[offset]);
            }
예제 #11
0
            public MetaInfoObjExtractor(CompressionKind compressionKind, int bufferSize, int metadataSize,
                                        ByteBuffer footerBuffer)
            {
                this.compressionKind = compressionKind;
                this.bufferSize      = bufferSize;
                this.codec           = WriterImpl.createCodec(compressionKind);
                this.metadataSize    = metadataSize;

                int position         = footerBuffer.position();
                int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;

                this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize);
                this.footer   = extractFooter(
                    footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize);

                footerBuffer.position(position);
                this.inspector = OrcStruct.createObjectInspector(0, footer.TypesList);
            }
예제 #12
0
        public void testInspectorFromTypeInfo()
        {
            TypeInfo typeInfo =
                TypeInfoUtils.getTypeInfoFromTypeString("struct<c1:boolean,c2:tinyint" +
                    ",c3:smallint,c4:int,c5:bigint,c6:float,c7:double,c8:binary," +
                    "c9:string,c10:struct<c1:int>,c11:map<int,int>,c12:uniontype<int>" +
                    ",c13:array<timestamp>>");
            StructObjectInspector inspector = (StructObjectInspector)
                OrcStruct.createObjectInspector(typeInfo);
            Assert.Equal("struct<c1:boolean,c2:tinyint,c3:smallint,c4:int,c5:" +
                "bigint,c6:float,c7:double,c8:binary,c9:string,c10:struct<" +
                "c1:int>,c11:map<int,int>,c12:uniontype<int>,c13:array<timestamp>>",
                inspector.getTypeName());
            Assert.Equal(null,
                inspector.getAllStructFieldRefs()[0].getFieldComment());
            Assert.Equal(null, inspector.getStructFieldRef("UNKNOWN"));
            OrcStruct s1 = new OrcStruct(13);
            for (int i = 0; i < 13; ++i)
            {
                s1.setFieldValue(i, i);
            }

            List<object> list = new List<object> { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };
            Assert.Equal(list, inspector.getStructFieldsDataAsList(s1));
            ListObjectInspector listOI = (ListObjectInspector)
                inspector.getAllStructFieldRefs()[12].getFieldObjectInspector();
            Assert.Equal(ObjectInspectorCategory.LIST, listOI.getCategory());
            Assert.Equal(10, listOI.getListElement(list, 10));
            Assert.Equal(null, listOI.getListElement(list, -1));
            Assert.Equal(null, listOI.getListElement(list, 13));
            Assert.Equal(13, listOI.getListLength(list));

            Dictionary<object, object> map = new Dictionary<object, object>()
            {
                {1, 2},
                {2, 4},
                {3, 6},
            };
            MapObjectInspector mapOI = (MapObjectInspector)
                inspector.getAllStructFieldRefs()[10].getFieldObjectInspector();
            Assert.Equal(3, mapOI.getMapSize(map));
            Assert.Equal(4, mapOI.getMapValueElement(map, 2));
        }
예제 #13
0
 public bool isDelete(OrcStruct value)
 {
     return OrcRecordUpdater.getOperation(value) == OrcRecordUpdater.DELETE_OPERATION;
 }
예제 #14
0
 private void compareInner(InnerStruct expect, OrcStruct actual)
 {
     if (expect == null || actual == null)
     {
         Assert.Equal(null, expect);
         Assert.Equal(null, actual);
     }
     else
     {
         Assert.Equal(expect.int1, actual.getFieldValue(0));
         Assert.Equal(expect.string1, actual.getFieldValue(1));
     }
 }
예제 #15
0
 internal static long getCurrentTransaction(OrcStruct @struct)
 {
     return ((LongWritable)@struct.getFieldValue(CURRENT_TRANSACTION)).get();
 }
 internal static int getBucket(OrcStruct @struct)
 {
     return(((IntWritable)@struct.getFieldValue(BUCKET)).get());
 }
예제 #17
0
 internal static int getBucket(OrcStruct @struct)
 {
     return ((IntWritable)@struct.getFieldValue(BUCKET)).get();
 }
예제 #18
0
            void next(OrcStruct next)
            {
                if (recordReader.hasNext())
                {
                    nextRecord = (OrcStruct)recordReader.next(next);
                    // set the key
                    key.setValues(OrcRecordUpdater.getOriginalTransaction(nextRecord),
                        OrcRecordUpdater.getBucket(nextRecord),
                        OrcRecordUpdater.getRowId(nextRecord),
                        OrcRecordUpdater.getCurrentTransaction(nextRecord),
                        statementId);

                    // if this record is larger than maxKey, we need to stop
                    if (maxKey != null && key.compareRow(maxKey) > 0)
                    {
                        LOG.debug("key " + key + " > maxkey " + maxKey);
                        nextRecord = null;
                        recordReader.Dispose();
                    }
                }
                else
                {
                    nextRecord = null;
                    recordReader.Dispose();
                }
            }
 internal static long getCurrentTransaction(OrcStruct @struct)
 {
     return(((LongWritable)@struct.getFieldValue(CURRENT_TRANSACTION)).get());
 }
예제 #20
0
 internal static long getRowId(OrcStruct @struct)
 {
     return ((LongWritable)@struct.getFieldValue(ROW_ID)).get();
 }
예제 #21
0
 static OrcStruct getRow(OrcStruct @struct)
 {
     if (@struct == null)
     {
         return null;
     }
     else
     {
         return (OrcStruct)@struct.getFieldValue(ROW);
     }
 }
예제 #22
0
 OrcRecordUpdater(Path path,
                  AcidOutputFormat.Options options)
 {
     this.options = options;
     this.bucket.set(options.getBucket());
     this.path = AcidUtils.createFilename(path, options);
     FileSystem fs = options.getFilesystem();
     if (fs == null)
     {
         fs = path.getFileSystem(options.getConfiguration());
     }
     this.fs = fs;
     try
     {
         FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false);
         strm.writeInt(ORC_ACID_VERSION);
         strm.close();
     }
     catch (IOException ioe)
     {
         if (LOG.isDebugEnabled())
         {
             LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " +
                 ioe);
         }
     }
     if (options.getMinimumTransactionId() != options.getMaximumTransactionId()
         && !options.isWritingBase())
     {
         flushLengths = fs.create(getSideFile(this.path), true, 8,
             options.getReporter());
     }
     else
     {
         flushLengths = null;
     }
     OrcFile.WriterOptions writerOptions = null;
     if (options is OrcOptions)
     {
         writerOptions = ((OrcOptions)options).getOrcOptions();
     }
     if (writerOptions == null)
     {
         writerOptions = OrcFile.writerOptions( /* options.getTableProperties(), */
             options.getConfiguration());
     }
     writerOptions.fileSystem(fs).callback(indexBuilder);
     if (!options.isWritingBase())
     {
         writerOptions.blockPadding(false);
         writerOptions.bufferSize(DELTA_BUFFER_SIZE);
         writerOptions.stripeSize(DELTA_STRIPE_SIZE);
     }
     rowInspector = (StructObjectInspector)options.getInspector();
     writerOptions.inspector(createEventSchema(findRecId(options.getInspector(),
         options.getRecordIdColumn())));
     this.writer = OrcFile.createWriter(this.path, writerOptions);
     item = new OrcStruct(FIELDS);
     item.setFieldValue(OPERATION, operation);
     item.setFieldValue(CURRENT_TRANSACTION, currentTransaction);
     item.setFieldValue(ORIGINAL_TRANSACTION, originalTransaction);
     item.setFieldValue(BUCKET, bucket);
     item.setFieldValue(ROW_ID, rowId);
 }
예제 #23
0
 /**
  * Destructively make this object link to other's values.
  * @param other the value to point to
  */
 void linkFields(OrcStruct other)
 {
     fields = other.fields;
 }
예제 #24
0
        /**
         * Constructor that let's the user specify additional options.
         * @param path pathname for file
         * @param options options for reading
         * @
         */
        public ReaderImpl(Func <Stream> streamCreator, string path, OrcFile.ReaderOptions options)
        {
            this.streamCreator = streamCreator;
            this.path          = path;
            this.conf          = options.getConfiguration();

            FileMetadata fileMetadata = options.getFileMetadata();

            if (fileMetadata != null)
            {
                this.compressionKind       = fileMetadata.getCompressionKind();
                this.bufferSize            = fileMetadata.getCompressionBufferSize();
                this.codec                 = WriterImpl.createCodec(compressionKind);
                this.metadataSize          = fileMetadata.getMetadataSize();
                this.stripeStats           = fileMetadata.getStripeStats();
                this.versionList           = fileMetadata.getVersionList();
                this.writerVersion         = OrcFile.WriterVersionHelpers.from(fileMetadata.getWriterVersionNum());
                this.types                 = fileMetadata.getTypes();
                this.rowIndexStride        = fileMetadata.getRowIndexStride();
                this.contentLength         = fileMetadata.getContentLength();
                this.numberOfRows          = fileMetadata.getNumberOfRows();
                this.fileStats             = fileMetadata.getFileStats();
                this.stripes               = fileMetadata.getStripes();
                this.inspector             = OrcStruct.createObjectInspector(0, fileMetadata.getTypes());
                this.footerByteBuffer      = null; // not cached and not needed here
                this.userMetadata          = null; // not cached and not needed here
                this.footerMetaAndPsBuffer = null;
            }
            else
            {
                FileMetaInfo footerMetaData;
                if (options.getFileMetaInfo() != null)
                {
                    footerMetaData             = options.getFileMetaInfo();
                    this.footerMetaAndPsBuffer = null;
                }
                else
                {
                    using (Stream file = streamCreator())
                    {
                        footerMetaData             = extractMetaInfoFromFooter(file, path, options.getMaxLength());
                        this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
                    }
                }
                MetaInfoObjExtractor rInfo =
                    new MetaInfoObjExtractor(footerMetaData.compressionKind,
                                             footerMetaData.bufferSize,
                                             footerMetaData.metadataSize,
                                             footerMetaData.footerBuffer
                                             );
                this.footerByteBuffer = footerMetaData.footerBuffer;
                this.compressionKind  = rInfo.compressionKind;
                this.codec            = rInfo.codec;
                this.bufferSize       = rInfo.bufferSize;
                this.metadataSize     = rInfo.metadataSize;
                this.stripeStats      = rInfo.metadata.StripeStatsList;
                this.types            = rInfo.footer.TypesList;
                this.rowIndexStride   = (int)rInfo.footer.RowIndexStride;
                this.contentLength    = (int)rInfo.footer.ContentLength;
                this.numberOfRows     = (int)rInfo.footer.NumberOfRows;
                this.userMetadata     = rInfo.footer.MetadataList;
                this.fileStats        = rInfo.footer.StatisticsList;
                this.inspector        = rInfo.inspector;
                this.versionList      = footerMetaData.versionList.Select(v => (int)v).ToList();
                this.writerVersion    = footerMetaData.writerVersion;
                this.stripes          = convertProtoStripesToStripes(rInfo.footer.StripesList);
            }
        }
예제 #25
0
        public void testUnionAndTimestamp()
        {
            List<OrcProto.Type> types = new List<OrcProto.Type>();
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRUCT).
                AddFieldNames("time").AddFieldNames("union").AddFieldNames("decimal").
                AddSubtypes(1).AddSubtypes(2).AddSubtypes(5).Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.TIMESTAMP).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.UNION).
                AddSubtypes(3).AddSubtypes(4).Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.INT).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRING).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.DECIMAL).
                Build());

            ObjectInspector inspector = OrcStruct.createObjectInspector(0, types);

            HiveDecimal maxValue = HiveDecimal.Parse("10000000000000000000");
            OrcStruct row = new OrcStruct(3);
            OrcUnion union = new OrcUnion();
            Random rand;

            using (Stream file = FileOpenWrite(TestFilePath))
            using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                .inspector(inspector)
                .stripeSize(1000)
                .compress(CompressionKind.NONE)
                .bufferSize(100)
                .blockPadding(false)))
            {
                row.setFieldValue(1, union);
                row.setFieldValue(0, Timestamp.Parse("2000-03-12 15:00:00"));
                HiveDecimal value = HiveDecimal.Parse("12345678.6547456");
                row.setFieldValue(2, value);
                union.set((byte)0, 42);
                writer.addRow(row);
                row.setFieldValue(0, Timestamp.Parse("2000-03-20 12:00:00.123456789"));
                union.set((byte)1, "hello");
                value = HiveDecimal.Parse("-5643.234");
                row.setFieldValue(2, value);
                writer.addRow(row);
                row.setFieldValue(0, null);
                row.setFieldValue(1, null);
                row.setFieldValue(2, null);
                writer.addRow(row);
                row.setFieldValue(1, union);
                union.set((byte)0, null);
                writer.addRow(row);
                union.set((byte)1, null);
                writer.addRow(row);
                union.set((byte)0, 200000);
                row.setFieldValue(0, Timestamp.Parse("1970-01-01 00:00:00"));
                value = HiveDecimal.Parse("10000000000000000000");
                row.setFieldValue(2, value);
                writer.addRow(row);
                rand = new Random(42);
                for (int i = 1970; i < 2038; ++i)
                {
                    row.setFieldValue(0, Timestamp.Parse(i + "-05-05 12:34:56." + i));
                    if ((i & 1) == 0)
                    {
                        union.set((byte)0, (i * i));
                    }
                    else
                    {
                        union.set((byte)1, (i * i).ToString());
                    }
                    value = HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18));
                    row.setFieldValue(2, value);
                    if (maxValue.CompareTo(value) < 0)
                    {
                        maxValue = value;
                    }
                    writer.addRow(row);
                }
                // let's add a lot of constant rows to test the rle
                row.setFieldValue(0, null);
                union.set((byte)0, 1732050807);
                row.setFieldValue(2, null);
                for (int i = 0; i < 5000; ++i)
                {
                    writer.addRow(row);
                }
                union.set((byte)0, 0);
                writer.addRow(row);
                union.set((byte)0, 10);
                writer.addRow(row);
                union.set((byte)0, 138);
                writer.addRow(row);
                writer.close();

                TypeDescription schema = writer.getSchema();
                Assert.Equal(5, schema.getMaximumId());
                bool[] expected = new bool[] { false, false, false, false, false, false };
                bool[] included = OrcUtils.includeColumns("", schema);
                Assert.Equal(expected, included);

                expected = new bool[] { false, true, false, false, false, true };
                included = OrcUtils.includeColumns("time,decimal", schema);
                Assert.Equal(expected, included);

                expected = new bool[] { false, false, true, true, true, false };
                included = OrcUtils.includeColumns("union", schema);
                Assert.Equal(expected, included);
            }

            Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf));

            Assert.Equal(0, reader.getMetadataKeys().Count);
            Assert.Equal(5077, reader.getNumberOfRows());
            DecimalColumnStatistics stats =
                (DecimalColumnStatistics)reader.getStatistics()[5];
            Assert.Equal(71, stats.getNumberOfValues());
            Assert.Equal(HiveDecimal.Parse("-5643.234"), stats.getMinimum());
            Assert.Equal(maxValue, stats.getMaximum());
            // TODO: fix this
            //    Assert.Equal(null,stats.getSum());
            int stripeCount = 0;
            int rowCount = 0;
            long currentOffset = -1;
            foreach (StripeInformation stripe in reader.getStripes())
            {
                stripeCount += 1;
                rowCount += (int)stripe.getNumberOfRows();
                if (currentOffset < 0)
                {
                    currentOffset = stripe.getOffset() + stripe.getLength();
                }
                else
                {
                    Assert.Equal(currentOffset, stripe.getOffset());
                    currentOffset += stripe.getLength();
                }
            }
            Assert.Equal(reader.getNumberOfRows(), rowCount);
            Assert.Equal(2, stripeCount);
            Assert.Equal(reader.getContentLength(), currentOffset);

            using (RecordReader rows = reader.rows())
            {
                Assert.Equal(0, rows.getRowNumber());
                Assert.Equal(0.0, rows.getProgress(), 6);
                Assert.Equal(true, rows.hasNext());
                row = (OrcStruct)rows.next();
                Assert.Equal(1, rows.getRowNumber());
                inspector = reader.getObjectInspector();
                Assert.Equal("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>",
                    inspector.getTypeName());
                Assert.Equal(Timestamp.Parse("2000-03-12 15:00:00"), row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(0, union.getTag());
                Assert.Equal(42, union.getObject());
                Assert.Equal(HiveDecimal.Parse("12345678.6547456"), row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(2, rows.getRowNumber());
                Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal("hello", union.getObject());
                Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                Assert.Equal(null, row.getFieldValue(1));
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                union = (OrcUnion)row.getFieldValue(1);
                Assert.Equal(0, union.getTag());
                Assert.Equal(null, union.getObject());
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(null, row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal(null, union.getObject());
                Assert.Equal(null, row.getFieldValue(2));
                row = (OrcStruct)rows.next();
                Assert.Equal(Timestamp.Parse("1970-01-01 00:00:00"), row.getFieldValue(0));
                Assert.Equal(200000, union.getObject());
                Assert.Equal(HiveDecimal.Parse("10000000000000000000"), row.getFieldValue(2));
                rand = new Random(42);
                for (int i = 1970; i < 2038; ++i)
                {
                    row = (OrcStruct)rows.next();
                    Assert.Equal(Timestamp.Parse(i + "-05-05 12:34:56." + i), row.getFieldValue(0));
                    if ((i & 1) == 0)
                    {
                        Assert.Equal(0, union.getTag());
                        Assert.Equal(i * i, union.getObject());
                    }
                    else
                    {
                        Assert.Equal(1, union.getTag());
                        Assert.Equal((i * i).ToString(), union.getObject());
                    }
                    Assert.Equal(HiveDecimal.create(rand.NextBigInteger(64), rand.Next(18)), row.getFieldValue(2));
                }
                for (int i = 0; i < 5000; ++i)
                {
                    row = (OrcStruct)rows.next();
                    Assert.Equal(1732050807, union.getObject());
                }
                row = (OrcStruct)rows.next();
                Assert.Equal(0, union.getObject());
                row = (OrcStruct)rows.next();
                Assert.Equal(10, union.getObject());
                row = (OrcStruct)rows.next();
                Assert.Equal(138, union.getObject());
                Assert.Equal(false, rows.hasNext());
                Assert.Equal(1.0, rows.getProgress(), 5);
                Assert.Equal(reader.getNumberOfRows(), rows.getRowNumber());
                rows.seekToRow(1);
                row = (OrcStruct)rows.next();
                Assert.Equal(Timestamp.Parse("2000-03-20 12:00:00.123456789"), row.getFieldValue(0));
                Assert.Equal(1, union.getTag());
                Assert.Equal("hello", union.getObject());
                Assert.Equal(HiveDecimal.Parse("-5643.234"), row.getFieldValue(2));
            }
        }
예제 #26
0
 /**
  * Destructively make this object link to other's values.
  * @param other the value to point to
  */
 void linkFields(OrcStruct other)
 {
     fields = other.fields;
 }
예제 #27
0
        public bool next(RecordIdentifier recordIdentifier,
                            OrcStruct prev)
        {
            bool keysSame = true;
            while (keysSame && primary != null)
            {

                // The primary's nextRecord is the next value to return
                OrcStruct current = primary.nextRecord;
                recordIdentifier.set(primary.key);

                // Advance the primary reader to the next record
                primary.next(extraValue);

                // Save the current record as the new extraValue for next time so that
                // we minimize allocations
                extraValue = current;

                // now that the primary reader has advanced, we need to see if we
                // continue to read it or move to the secondary.
                if (primary.nextRecord == null ||
                    primary.key.compareTo(secondaryKey) > 0)
                {

                    // if the primary isn't done, push it back into the readers
                    if (primary.nextRecord != null)
                    {
                        readers.put(primary.key, primary);
                    }

                    // update primary and secondaryKey
                    Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry();
                    if (entry != null)
                    {
                        primary = entry.getValue();
                        if (readers.isEmpty())
                        {
                            secondaryKey = null;
                        }
                        else
                        {
                            secondaryKey = readers.firstKey();
                        }
                    }
                    else
                    {
                        primary = null;
                    }
                }

                // if this transaction isn't ok, skip over it
                if (!validTxnList.isTxnValid(
                    ((ReaderKey)recordIdentifier).getCurrentTransactionId()))
                {
                    continue;
                }

                /*for multi-statement txns, you may have multiple events for the same
                * row in the same (current) transaction.  We want to collapse these to just the last one
                * regardless whether we are minor compacting.  Consider INSERT/UPDATE/UPDATE of the
                * same row in the same txn.  There is no benefit passing along anything except the last
                * event.  If we did want to pass it along, we'd have to include statementId in the row
                * returned so that compaction could write it out or make minor minor compaction understand
                * how to write out delta files in delta_xxx_yyy_stid format.  There doesn't seem to be any
                * value in this.*/
                bool isSameRow = prevKey.isSameRow((ReaderKey)recordIdentifier);
                // if we are collapsing, figure out if this is a new row
                if (collapse || isSameRow)
                {
                    keysSame = (collapse && prevKey.compareRow(recordIdentifier) == 0) || (isSameRow);
                    if (!keysSame)
                    {
                        prevKey.set(recordIdentifier);
                    }
                }
                else
                {
                    keysSame = false;
                }

                // set the output record by fiddling with the pointers so that we can
                // avoid a copy.
                prev.linkFields(current);
            }
            return !keysSame;
        }
예제 #28
0
 internal static long getOriginalTransaction(OrcStruct @struct)
 {
     return ((LongWritable)@struct.getFieldValue(ORIGINAL_TRANSACTION)).get();
 }
예제 #29
0
 void next(OrcStruct next)
 {
     if (recordReader.hasNext())
     {
         long nextRowId = recordReader.getRowNumber();
         // have to do initialization here, because the super's constructor
         // calls next and thus we need to initialize before our constructor
         // runs
         if (next == null)
         {
             nextRecord = new OrcStruct(OrcRecordUpdater.FIELDS);
             IntWritable operation =
                 new IntWritable(OrcRecordUpdater.INSERT_OPERATION);
             nextRecord.setFieldValue(OrcRecordUpdater.OPERATION, operation);
             nextRecord.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION,
                 new LongWritable(0));
             nextRecord.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION,
                 new LongWritable(0));
             nextRecord.setFieldValue(OrcRecordUpdater.BUCKET,
                 new IntWritable(bucket));
             nextRecord.setFieldValue(OrcRecordUpdater.ROW_ID,
                 new LongWritable(nextRowId));
             nextRecord.setFieldValue(OrcRecordUpdater.ROW,
                 recordReader.next(null));
         }
         else
         {
             nextRecord = next;
             ((IntWritable)next.getFieldValue(OrcRecordUpdater.OPERATION))
                 .set(OrcRecordUpdater.INSERT_OPERATION);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION))
                 .set(0);
             ((IntWritable)next.getFieldValue(OrcRecordUpdater.BUCKET))
                 .set(bucket);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION))
                 .set(0);
             ((LongWritable)next.getFieldValue(OrcRecordUpdater.ROW_ID))
                 .set(0);
             nextRecord.setFieldValue(OrcRecordUpdater.ROW,
                 recordReader.next(OrcRecordUpdater.getRow(next)));
         }
         key.setValues(0L, bucket, nextRowId, 0L, 0);
         if (maxKey != null && key.compareRow(maxKey) > 0)
         {
             if (LOG.isDebugEnabled())
             {
                 LOG.debug("key " + key + " > maxkey " + maxKey);
             }
             nextRecord = null;
             recordReader.close();
         }
     }
     else
     {
         nextRecord = null;
         recordReader.close();
     }
 }
        /**
         * Create a reader that merge sorts the ACID events together.
         * @param conf the configuration
         * @param collapseEvents should the events on the same row be collapsed
         * @param isOriginal is the base file a pre-acid file
         * @param bucket the bucket we are reading
         * @param options the options to read with
         * @param deltaDirectory the list of delta directories to include
         * @
         */
        OrcRawRecordMerger(Configuration conf,
                           bool collapseEvents,
                           Reader reader,
                           bool isOriginal,
                           int bucket,
                           ValidTxnList validTxnList,
                           Reader.Options options,
                           Path[] deltaDirectory)
        {
            this.conf         = conf;
            this.collapse     = collapseEvents;
            this.offset       = options.getOffset();
            this.length       = options.getLength();
            this.validTxnList = validTxnList;
            TypeDescription typeDescr = OrcUtils.getDesiredRowTypeDescr(conf);

            if (typeDescr == null)
            {
                throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg());
            }

            objectInspector = OrcRecordUpdater.createEventSchema
                                  (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr)));

            // modify the options to reflect the event instead of the base row
            Reader.Options eventOptions = createEventOptions(options);
            if (reader == null)
            {
                baseReader = null;
            }
            else
            {
                // find the min/max based on the offset and length
                if (isOriginal)
                {
                    discoverOriginalKeyBounds(reader, bucket, options);
                }
                else
                {
                    discoverKeyBounds(reader, options);
                }
                LOG.info("min key = " + minKey + ", max key = " + maxKey);
                // use the min/max instead of the byte range
                ReaderPair pair;
                ReaderKey  key = new ReaderKey();
                if (isOriginal)
                {
                    options = options.clone();
                    options.range(options.getOffset(), Long.MAX_VALUE);
                    pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey,
                                                  options);
                }
                else
                {
                    pair = new ReaderPair(key, reader, bucket, minKey, maxKey,
                                          eventOptions, 0);
                }

                // if there is at least one record, put it in the map
                if (pair.nextRecord != null)
                {
                    readers.put(key, pair);
                }
                baseReader = pair.recordReader;
            }

            // we always want to read all of the deltas
            eventOptions.range(0, Long.MAX_VALUE);
            if (deltaDirectory != null)
            {
                foreach (Path delta in deltaDirectory)
                {
                    ReaderKey             key       = new ReaderKey();
                    Path                  deltaFile = AcidUtils.createBucketFile(delta, bucket);
                    AcidUtils.ParsedDelta deltaDir  = AcidUtils.parsedDelta(delta);
                    FileSystem            fs        = deltaFile.getFileSystem(conf);
                    long                  length    = getLastFlushLength(fs, deltaFile);
                    if (length != -1 && fs.exists(deltaFile))
                    {
                        Reader deltaReader = OrcFile.createReader(deltaFile,
                                                                  OrcFile.readerOptions(conf).maxLength(length));
                        Reader.Options deltaEventOptions = null;
                        if (eventOptions.getSearchArgument() != null)
                        {
                            // Turn off the sarg before pushing it to delta.  We never want to push a sarg to a delta as
                            // it can produce wrong results (if the latest valid version of the record is filtered out by
                            // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record)
                            // unless the delta only has insert events
                            OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(deltaReader);
                            if (acidStats.deletes > 0 || acidStats.updates > 0)
                            {
                                deltaEventOptions = eventOptions.clone().searchArgument(null, null);
                            }
                        }
                        ReaderPair deltaPair;
                        deltaPair = new ReaderPair(key, deltaReader, bucket, minKey,
                                                   maxKey, deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId());
                        if (deltaPair.nextRecord != null)
                        {
                            readers.put(key, deltaPair);
                        }
                    }
                }
            }

            // get the first record
            Map.Entry <ReaderKey, ReaderPair> entry = readers.pollFirstEntry();
            if (entry == null)
            {
                columns = 0;
                primary = null;
            }
            else
            {
                primary = entry.getValue();
                if (readers.isEmpty())
                {
                    secondaryKey = null;
                }
                else
                {
                    secondaryKey = readers.firstKey();
                }
                // get the number of columns in the user's rows
                columns = primary.getColumns();
            }
        }
 internal static int getOperation(OrcStruct @struct)
 {
     return(((IntWritable)@struct.getFieldValue(OPERATION)).get());
 }
        public bool next(RecordIdentifier recordIdentifier,
                         OrcStruct prev)
        {
            bool keysSame = true;

            while (keysSame && primary != null)
            {
                // The primary's nextRecord is the next value to return
                OrcStruct current = primary.nextRecord;
                recordIdentifier.set(primary.key);

                // Advance the primary reader to the next record
                primary.next(extraValue);

                // Save the current record as the new extraValue for next time so that
                // we minimize allocations
                extraValue = current;

                // now that the primary reader has advanced, we need to see if we
                // continue to read it or move to the secondary.
                if (primary.nextRecord == null ||
                    primary.key.compareTo(secondaryKey) > 0)
                {
                    // if the primary isn't done, push it back into the readers
                    if (primary.nextRecord != null)
                    {
                        readers.put(primary.key, primary);
                    }

                    // update primary and secondaryKey
                    Map.Entry <ReaderKey, ReaderPair> entry = readers.pollFirstEntry();
                    if (entry != null)
                    {
                        primary = entry.getValue();
                        if (readers.isEmpty())
                        {
                            secondaryKey = null;
                        }
                        else
                        {
                            secondaryKey = readers.firstKey();
                        }
                    }
                    else
                    {
                        primary = null;
                    }
                }

                // if this transaction isn't ok, skip over it
                if (!validTxnList.isTxnValid(
                        ((ReaderKey)recordIdentifier).getCurrentTransactionId()))
                {
                    continue;
                }

                /*for multi-statement txns, you may have multiple events for the same
                 * row in the same (current) transaction.  We want to collapse these to just the last one
                 * regardless whether we are minor compacting.  Consider INSERT/UPDATE/UPDATE of the
                 * same row in the same txn.  There is no benefit passing along anything except the last
                 * event.  If we did want to pass it along, we'd have to include statementId in the row
                 * returned so that compaction could write it out or make minor minor compaction understand
                 * how to write out delta files in delta_xxx_yyy_stid format.  There doesn't seem to be any
                 * value in this.*/
                bool isSameRow = prevKey.isSameRow((ReaderKey)recordIdentifier);
                // if we are collapsing, figure out if this is a new row
                if (collapse || isSameRow)
                {
                    keysSame = (collapse && prevKey.compareRow(recordIdentifier) == 0) || (isSameRow);
                    if (!keysSame)
                    {
                        prevKey.set(recordIdentifier);
                    }
                }
                else
                {
                    keysSame = false;
                }

                // set the output record by fiddling with the pointers so that we can
                // avoid a copy.
                prev.linkFields(current);
            }
            return(!keysSame);
        }
 internal static long getOriginalTransaction(OrcStruct @struct)
 {
     return(((LongWritable)@struct.getFieldValue(ORIGINAL_TRANSACTION)).get());
 }
 public bool isDelete(OrcStruct value)
 {
     return(OrcRecordUpdater.getOperation(value) == OrcRecordUpdater.DELETE_OPERATION);
 }
 internal static long getRowId(OrcStruct @struct)
 {
     return(((LongWritable)@struct.getFieldValue(ROW_ID)).get());
 }
예제 #36
0
 internal static int getOperation(OrcStruct @struct)
 {
     return ((IntWritable)@struct.getFieldValue(OPERATION)).get();
 }
        OrcRecordUpdater(Path path,
                         AcidOutputFormat.Options options)
        {
            this.options = options;
            this.bucket.set(options.getBucket());
            this.path = AcidUtils.createFilename(path, options);
            FileSystem fs = options.getFilesystem();

            if (fs == null)
            {
                fs = path.getFileSystem(options.getConfiguration());
            }
            this.fs = fs;
            try
            {
                FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false);
                strm.writeInt(ORC_ACID_VERSION);
                strm.close();
            }
            catch (IOException ioe)
            {
                if (LOG.isDebugEnabled())
                {
                    LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " +
                              ioe);
                }
            }
            if (options.getMinimumTransactionId() != options.getMaximumTransactionId() &&
                !options.isWritingBase())
            {
                flushLengths = fs.create(getSideFile(this.path), true, 8,
                                         options.getReporter());
            }
            else
            {
                flushLengths = null;
            }
            OrcFile.WriterOptions writerOptions = null;
            if (options is OrcOptions)
            {
                writerOptions = ((OrcOptions)options).getOrcOptions();
            }
            if (writerOptions == null)
            {
                writerOptions = OrcFile.writerOptions( /* options.getTableProperties(), */
                    options.getConfiguration());
            }
            writerOptions.fileSystem(fs).callback(indexBuilder);
            if (!options.isWritingBase())
            {
                writerOptions.blockPadding(false);
                writerOptions.bufferSize(DELTA_BUFFER_SIZE);
                writerOptions.stripeSize(DELTA_STRIPE_SIZE);
            }
            rowInspector = (StructObjectInspector)options.getInspector();
            writerOptions.inspector(createEventSchema(findRecId(options.getInspector(),
                                                                options.getRecordIdColumn())));
            this.writer = OrcFile.createWriter(this.path, writerOptions);
            item        = new OrcStruct(FIELDS);
            item.setFieldValue(OPERATION, operation);
            item.setFieldValue(CURRENT_TRANSACTION, currentTransaction);
            item.setFieldValue(ORIGINAL_TRANSACTION, originalTransaction);
            item.setFieldValue(BUCKET, bucket);
            item.setFieldValue(ROW_ID, rowId);
        }
예제 #38
0
        /**
         * Generate an ORC file with a range of dates and times.
         */
        public void createOrcDateFile(string path, int minYear, int maxYear)
        {
            List<OrcProto.Type> types = new List<OrcProto.Type>();
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.STRUCT).
                AddFieldNames("time").AddFieldNames("date").
                AddSubtypes(1).AddSubtypes(2).Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.TIMESTAMP).
                Build());
            types.Add(OrcProto.Type.CreateBuilder().SetKind(OrcProto.Type.Types.Kind.DATE).
                Build());

            ObjectInspector inspector = OrcStruct.createObjectInspector(0, types);

            using (Stream file = FileOpenWrite(path))
            using (Writer writer = OrcFile.createWriter(path, file, OrcFile.writerOptions(conf)
                .inspector(inspector)
                .stripeSize(100000)
                .bufferSize(10000)
                .blockPadding(false)))
            {
                OrcStruct row = new OrcStruct(2);
                for (int year = minYear; year < maxYear; ++year)
                {
                    for (int ms = 1000; ms < 2000; ++ms)
                    {
                        row.setFieldValue(0, Timestamp.Parse(year + "-05-05 12:34:56." + ms));
                        row.setFieldValue(1, new Date(year - 1900, 11, 25));
                        writer.addRow(row);
                    }
                }
            }

            Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
            using (RecordReader rows = reader.rows())
            {
                for (int year = minYear; year < maxYear; ++year)
                {
                    for (int ms = 1000; ms < 2000; ++ms)
                    {
                        OrcStruct row = (OrcStruct)rows.next();
                        Assert.Equal(
                            Timestamp.Parse(year + "-05-05 12:34:56." + ms),
                            row.getFieldValue(0));
                        Assert.Equal(new Date(year - 1900, 11, 25), row.getFieldValue(1));
                    }
                }
            }
        }