/**
         * Convert from the row include/sarg/columnNames to the event equivalent
         * for the underlying file.
         * @param options options for the row reader
         * @return a cloned options object that is modified for the event reader
         */
        static Reader.Options createEventOptions(Reader.Options options)
        {
            Reader.Options result = options.clone();
            result.range(options.getOffset(), Int64.MaxValue);
            // slide the columns down by 6 for the include array
            if (options.getInclude() != null)
            {
                bool[] orig = options.getInclude();
                // we always need the base row
                orig[0] = true;
                bool[] include = new bool[orig.Length + OrcRecordUpdater.FIELDS];
                Arrays.fill(include, 0, OrcRecordUpdater.FIELDS, true);
                for (int i = 0; i < orig.Length; ++i)
                {
                    include[i + OrcRecordUpdater.FIELDS] = orig[i];
                }
                result.include(include);
            }

            // slide the column names down by 6 for the name array
            if (options.getColumnNames() != null)
            {
                string[] orig = options.getColumnNames();
                string[] cols = new string[orig.Length + OrcRecordUpdater.FIELDS];
                for (int i = 0; i < orig.Length; ++i)
                {
                    cols[i + OrcRecordUpdater.FIELDS] = orig[i];
                }
                result.searchArgument(options.getSearchArgument(), cols);
            }
            return(result);
        }
        /**
         * Find the key range for original bucket files.
         * @param reader the reader
         * @param bucket the bucket number we are reading
         * @param options the options for reading with
         * @
         */
        private void discoverOriginalKeyBounds(Reader reader, int bucket,
                                               Reader.Options options
                                               )
        {
            long rowLength = 0;
            long rowOffset = 0;
            long offset    = options.getOffset();
            long maxOffset = options.getMaxOffset();
            bool isTail    = true;

            foreach (StripeInformation stripe in reader.getStripes())
            {
                if (offset > stripe.getOffset())
                {
                    rowOffset += stripe.getNumberOfRows();
                }
                else if (maxOffset > stripe.getOffset())
                {
                    rowLength += stripe.getNumberOfRows();
                }
                else
                {
                    isTail = false;
                    break;
                }
            }
            if (rowOffset > 0)
            {
                minKey = new RecordIdentifier(0, bucket, rowOffset - 1);
            }
            if (!isTail)
            {
                maxKey = new RecordIdentifier(0, bucket, rowOffset + rowLength - 1);
            }
        }
示例#3
0
            VectorizedOrcRecordReader(Reader file, Configuration conf,
                                      FileSplit fileSplit)
            {
                List <OrcProto.Type> types = file.getTypes();

                Reader.Options options = new Reader.Options();
                this.offset = fileSplit.getStart();
                this.length = fileSplit.getLength();
                options.range(offset, length);
                options.include(OrcInputFormat.genIncludedColumns(types, conf, true));
                OrcInputFormat.setSearchArgument(options, types, conf, true);

                this.reader = file.rowsOptions(options);
                try
                {
                    rbCtx = new VectorizedRowBatchCtx();
                    rbCtx.init(conf, fileSplit);
                }
                catch (Exception e)
                {
                    throw;
                }
            }
        /**
         * Find the key range for bucket files.
         * @param reader the reader
         * @param options the options for reading with
         * @
         */
        private void discoverKeyBounds(Reader reader,
                                       Reader.Options options)
        {
            RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader);
            long offset      = options.getOffset();
            long maxOffset   = options.getMaxOffset();
            int  firstStripe = 0;
            int  stripeCount = 0;
            bool isTail      = true;
            List <StripeInformation> stripes = reader.getStripes();

            foreach (StripeInformation stripe in stripes)
            {
                if (offset > stripe.getOffset())
                {
                    firstStripe += 1;
                }
                else if (maxOffset > stripe.getOffset())
                {
                    stripeCount += 1;
                }
                else
                {
                    isTail = false;
                    break;
                }
            }
            if (firstStripe != 0)
            {
                minKey = keyIndex[firstStripe - 1];
            }
            if (!isTail)
            {
                maxKey = keyIndex[firstStripe + stripeCount - 1];
            }
        }
        /**
         * Create a reader that merge sorts the ACID events together.
         * @param conf the configuration
         * @param collapseEvents should the events on the same row be collapsed
         * @param isOriginal is the base file a pre-acid file
         * @param bucket the bucket we are reading
         * @param options the options to read with
         * @param deltaDirectory the list of delta directories to include
         * @
         */
        OrcRawRecordMerger(Configuration conf,
                           bool collapseEvents,
                           Reader reader,
                           bool isOriginal,
                           int bucket,
                           ValidTxnList validTxnList,
                           Reader.Options options,
                           Path[] deltaDirectory)
        {
            this.conf         = conf;
            this.collapse     = collapseEvents;
            this.offset       = options.getOffset();
            this.length       = options.getLength();
            this.validTxnList = validTxnList;
            TypeDescription typeDescr = OrcUtils.getDesiredRowTypeDescr(conf);

            if (typeDescr == null)
            {
                throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg());
            }

            objectInspector = OrcRecordUpdater.createEventSchema
                                  (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr)));

            // modify the options to reflect the event instead of the base row
            Reader.Options eventOptions = createEventOptions(options);
            if (reader == null)
            {
                baseReader = null;
            }
            else
            {
                // find the min/max based on the offset and length
                if (isOriginal)
                {
                    discoverOriginalKeyBounds(reader, bucket, options);
                }
                else
                {
                    discoverKeyBounds(reader, options);
                }
                LOG.info("min key = " + minKey + ", max key = " + maxKey);
                // use the min/max instead of the byte range
                ReaderPair pair;
                ReaderKey  key = new ReaderKey();
                if (isOriginal)
                {
                    options = options.clone();
                    options.range(options.getOffset(), Long.MAX_VALUE);
                    pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey,
                                                  options);
                }
                else
                {
                    pair = new ReaderPair(key, reader, bucket, minKey, maxKey,
                                          eventOptions, 0);
                }

                // if there is at least one record, put it in the map
                if (pair.nextRecord != null)
                {
                    readers.put(key, pair);
                }
                baseReader = pair.recordReader;
            }

            // we always want to read all of the deltas
            eventOptions.range(0, Long.MAX_VALUE);
            if (deltaDirectory != null)
            {
                foreach (Path delta in deltaDirectory)
                {
                    ReaderKey             key       = new ReaderKey();
                    Path                  deltaFile = AcidUtils.createBucketFile(delta, bucket);
                    AcidUtils.ParsedDelta deltaDir  = AcidUtils.parsedDelta(delta);
                    FileSystem            fs        = deltaFile.getFileSystem(conf);
                    long                  length    = getLastFlushLength(fs, deltaFile);
                    if (length != -1 && fs.exists(deltaFile))
                    {
                        Reader deltaReader = OrcFile.createReader(deltaFile,
                                                                  OrcFile.readerOptions(conf).maxLength(length));
                        Reader.Options deltaEventOptions = null;
                        if (eventOptions.getSearchArgument() != null)
                        {
                            // Turn off the sarg before pushing it to delta.  We never want to push a sarg to a delta as
                            // it can produce wrong results (if the latest valid version of the record is filtered out by
                            // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record)
                            // unless the delta only has insert events
                            OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(deltaReader);
                            if (acidStats.deletes > 0 || acidStats.updates > 0)
                            {
                                deltaEventOptions = eventOptions.clone().searchArgument(null, null);
                            }
                        }
                        ReaderPair deltaPair;
                        deltaPair = new ReaderPair(key, deltaReader, bucket, minKey,
                                                   maxKey, deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId());
                        if (deltaPair.nextRecord != null)
                        {
                            readers.put(key, deltaPair);
                        }
                    }
                }
            }

            // get the first record
            Map.Entry <ReaderKey, ReaderPair> entry = readers.pollFirstEntry();
            if (entry == null)
            {
                columns = 0;
                primary = null;
            }
            else
            {
                primary = entry.getValue();
                if (readers.isEmpty())
                {
                    secondaryKey = null;
                }
                else
                {
                    secondaryKey = readers.firstKey();
                }
                // get the number of columns in the user's rows
                columns = primary.getColumns();
            }
        }
 OriginalReaderPair(ReaderKey key, Reader reader, int bucket,
                    RecordIdentifier minKey, RecordIdentifier maxKey,
                    Reader.Options options) :
     base(key, reader, bucket, minKey, maxKey, options, 0)
 {
 }