public OrcUnionObjectInspector(int columnId, IList <OrcProto.Type> types)
 {
     OrcProto.Type type = types[columnId];
     children = new List <ObjectInspector>(type.SubtypesCount);
     for (int i = 0; i < type.SubtypesCount; ++i)
     {
         children.Add(OrcStruct.createObjectInspector((int)type.SubtypesList[i],
                                                      types));
     }
 }
            public OrcUnionObjectInspector(UnionTypeInfo info)
            {
                List <TypeInfo> unionChildren = info.getAllUnionObjectTypeInfos();

                this.children = new List <ObjectInspector>(unionChildren.Count);
                foreach (TypeInfo child in info.getAllUnionObjectTypeInfos())
                {
                    this.children.Add(OrcStruct.createObjectInspector(child));
                }
            }
Example #3
0
            public MetaInfoObjExtractor(CompressionKind compressionKind, int bufferSize, int metadataSize,
                                        ByteBuffer footerBuffer)
            {
                this.compressionKind = compressionKind;
                this.bufferSize      = bufferSize;
                this.codec           = WriterImpl.createCodec(compressionKind);
                this.metadataSize    = metadataSize;

                int position         = footerBuffer.position();
                int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;

                this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize);
                this.footer   = extractFooter(
                    footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize);

                footerBuffer.position(position);
                this.inspector = OrcStruct.createObjectInspector(0, footer.TypesList);
            }
        /**
         * Create a reader that merge sorts the ACID events together.
         * @param conf the configuration
         * @param collapseEvents should the events on the same row be collapsed
         * @param isOriginal is the base file a pre-acid file
         * @param bucket the bucket we are reading
         * @param options the options to read with
         * @param deltaDirectory the list of delta directories to include
         * @
         */
        OrcRawRecordMerger(Configuration conf,
                           bool collapseEvents,
                           Reader reader,
                           bool isOriginal,
                           int bucket,
                           ValidTxnList validTxnList,
                           Reader.Options options,
                           Path[] deltaDirectory)
        {
            this.conf         = conf;
            this.collapse     = collapseEvents;
            this.offset       = options.getOffset();
            this.length       = options.getLength();
            this.validTxnList = validTxnList;
            TypeDescription typeDescr = OrcUtils.getDesiredRowTypeDescr(conf);

            if (typeDescr == null)
            {
                throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg());
            }

            objectInspector = OrcRecordUpdater.createEventSchema
                                  (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr)));

            // modify the options to reflect the event instead of the base row
            Reader.Options eventOptions = createEventOptions(options);
            if (reader == null)
            {
                baseReader = null;
            }
            else
            {
                // find the min/max based on the offset and length
                if (isOriginal)
                {
                    discoverOriginalKeyBounds(reader, bucket, options);
                }
                else
                {
                    discoverKeyBounds(reader, options);
                }
                LOG.info("min key = " + minKey + ", max key = " + maxKey);
                // use the min/max instead of the byte range
                ReaderPair pair;
                ReaderKey  key = new ReaderKey();
                if (isOriginal)
                {
                    options = options.clone();
                    options.range(options.getOffset(), Long.MAX_VALUE);
                    pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey,
                                                  options);
                }
                else
                {
                    pair = new ReaderPair(key, reader, bucket, minKey, maxKey,
                                          eventOptions, 0);
                }

                // if there is at least one record, put it in the map
                if (pair.nextRecord != null)
                {
                    readers.put(key, pair);
                }
                baseReader = pair.recordReader;
            }

            // we always want to read all of the deltas
            eventOptions.range(0, Long.MAX_VALUE);
            if (deltaDirectory != null)
            {
                foreach (Path delta in deltaDirectory)
                {
                    ReaderKey             key       = new ReaderKey();
                    Path                  deltaFile = AcidUtils.createBucketFile(delta, bucket);
                    AcidUtils.ParsedDelta deltaDir  = AcidUtils.parsedDelta(delta);
                    FileSystem            fs        = deltaFile.getFileSystem(conf);
                    long                  length    = getLastFlushLength(fs, deltaFile);
                    if (length != -1 && fs.exists(deltaFile))
                    {
                        Reader deltaReader = OrcFile.createReader(deltaFile,
                                                                  OrcFile.readerOptions(conf).maxLength(length));
                        Reader.Options deltaEventOptions = null;
                        if (eventOptions.getSearchArgument() != null)
                        {
                            // Turn off the sarg before pushing it to delta.  We never want to push a sarg to a delta as
                            // it can produce wrong results (if the latest valid version of the record is filtered out by
                            // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record)
                            // unless the delta only has insert events
                            OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(deltaReader);
                            if (acidStats.deletes > 0 || acidStats.updates > 0)
                            {
                                deltaEventOptions = eventOptions.clone().searchArgument(null, null);
                            }
                        }
                        ReaderPair deltaPair;
                        deltaPair = new ReaderPair(key, deltaReader, bucket, minKey,
                                                   maxKey, deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId());
                        if (deltaPair.nextRecord != null)
                        {
                            readers.put(key, deltaPair);
                        }
                    }
                }
            }

            // get the first record
            Map.Entry <ReaderKey, ReaderPair> entry = readers.pollFirstEntry();
            if (entry == null)
            {
                columns = 0;
                primary = null;
            }
            else
            {
                primary = entry.getValue();
                if (readers.isEmpty())
                {
                    secondaryKey = null;
                }
                else
                {
                    secondaryKey = readers.firstKey();
                }
                // get the number of columns in the user's rows
                columns = primary.getColumns();
            }
        }
Example #5
0
        /**
         * Constructor that let's the user specify additional options.
         * @param path pathname for file
         * @param options options for reading
         * @
         */
        public ReaderImpl(Func <Stream> streamCreator, string path, OrcFile.ReaderOptions options)
        {
            this.streamCreator = streamCreator;
            this.path          = path;
            this.conf          = options.getConfiguration();

            FileMetadata fileMetadata = options.getFileMetadata();

            if (fileMetadata != null)
            {
                this.compressionKind       = fileMetadata.getCompressionKind();
                this.bufferSize            = fileMetadata.getCompressionBufferSize();
                this.codec                 = WriterImpl.createCodec(compressionKind);
                this.metadataSize          = fileMetadata.getMetadataSize();
                this.stripeStats           = fileMetadata.getStripeStats();
                this.versionList           = fileMetadata.getVersionList();
                this.writerVersion         = OrcFile.WriterVersionHelpers.from(fileMetadata.getWriterVersionNum());
                this.types                 = fileMetadata.getTypes();
                this.rowIndexStride        = fileMetadata.getRowIndexStride();
                this.contentLength         = fileMetadata.getContentLength();
                this.numberOfRows          = fileMetadata.getNumberOfRows();
                this.fileStats             = fileMetadata.getFileStats();
                this.stripes               = fileMetadata.getStripes();
                this.inspector             = OrcStruct.createObjectInspector(0, fileMetadata.getTypes());
                this.footerByteBuffer      = null; // not cached and not needed here
                this.userMetadata          = null; // not cached and not needed here
                this.footerMetaAndPsBuffer = null;
            }
            else
            {
                FileMetaInfo footerMetaData;
                if (options.getFileMetaInfo() != null)
                {
                    footerMetaData             = options.getFileMetaInfo();
                    this.footerMetaAndPsBuffer = null;
                }
                else
                {
                    using (Stream file = streamCreator())
                    {
                        footerMetaData             = extractMetaInfoFromFooter(file, path, options.getMaxLength());
                        this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
                    }
                }
                MetaInfoObjExtractor rInfo =
                    new MetaInfoObjExtractor(footerMetaData.compressionKind,
                                             footerMetaData.bufferSize,
                                             footerMetaData.metadataSize,
                                             footerMetaData.footerBuffer
                                             );
                this.footerByteBuffer = footerMetaData.footerBuffer;
                this.compressionKind  = rInfo.compressionKind;
                this.codec            = rInfo.codec;
                this.bufferSize       = rInfo.bufferSize;
                this.metadataSize     = rInfo.metadataSize;
                this.stripeStats      = rInfo.metadata.StripeStatsList;
                this.types            = rInfo.footer.TypesList;
                this.rowIndexStride   = (int)rInfo.footer.RowIndexStride;
                this.contentLength    = (int)rInfo.footer.ContentLength;
                this.numberOfRows     = (int)rInfo.footer.NumberOfRows;
                this.userMetadata     = rInfo.footer.MetadataList;
                this.fileStats        = rInfo.footer.StatisticsList;
                this.inspector        = rInfo.inspector;
                this.versionList      = footerMetaData.versionList.Select(v => (int)v).ToList();
                this.writerVersion    = footerMetaData.writerVersion;
                this.stripes          = convertProtoStripesToStripes(rInfo.footer.StripesList);
            }
        }