/**
         * Convert from the row include/sarg/columnNames to the event equivalent
         * for the underlying file.
         * @param options options for the row reader
         * @return a cloned options object that is modified for the event reader
         */
        static Reader.Options createEventOptions(Reader.Options options)
        {
            Reader.Options result = options.clone();
            result.range(options.getOffset(), Int64.MaxValue);
            // slide the columns down by 6 for the include array
            if (options.getInclude() != null)
            {
                bool[] orig = options.getInclude();
                // we always need the base row
                orig[0] = true;
                bool[] include = new bool[orig.Length + OrcRecordUpdater.FIELDS];
                Arrays.fill(include, 0, OrcRecordUpdater.FIELDS, true);
                for (int i = 0; i < orig.Length; ++i)
                {
                    include[i + OrcRecordUpdater.FIELDS] = orig[i];
                }
                result.include(include);
            }

            // slide the column names down by 6 for the name array
            if (options.getColumnNames() != null)
            {
                string[] orig = options.getColumnNames();
                string[] cols = new string[orig.Length + OrcRecordUpdater.FIELDS];
                for (int i = 0; i < orig.Length; ++i)
                {
                    cols[i + OrcRecordUpdater.FIELDS] = orig[i];
                }
                result.searchArgument(options.getSearchArgument(), cols);
            }
            return(result);
        }
Esempio n. 2
0
            VectorizedOrcRecordReader(Reader file, Configuration conf,
                                      FileSplit fileSplit)
            {
                List <OrcProto.Type> types = file.getTypes();

                Reader.Options options = new Reader.Options();
                this.offset = fileSplit.getStart();
                this.length = fileSplit.getLength();
                options.range(offset, length);
                options.include(OrcInputFormat.genIncludedColumns(types, conf, true));
                OrcInputFormat.setSearchArgument(options, types, conf, true);

                this.reader = file.rowsOptions(options);
                try
                {
                    rbCtx = new VectorizedRowBatchCtx();
                    rbCtx.init(conf, fileSplit);
                }
                catch (Exception e)
                {
                    throw;
                }
            }
        /**
         * Create a reader that merge sorts the ACID events together.
         * @param conf the configuration
         * @param collapseEvents should the events on the same row be collapsed
         * @param isOriginal is the base file a pre-acid file
         * @param bucket the bucket we are reading
         * @param options the options to read with
         * @param deltaDirectory the list of delta directories to include
         * @
         */
        OrcRawRecordMerger(Configuration conf,
                           bool collapseEvents,
                           Reader reader,
                           bool isOriginal,
                           int bucket,
                           ValidTxnList validTxnList,
                           Reader.Options options,
                           Path[] deltaDirectory)
        {
            this.conf         = conf;
            this.collapse     = collapseEvents;
            this.offset       = options.getOffset();
            this.length       = options.getLength();
            this.validTxnList = validTxnList;
            TypeDescription typeDescr = OrcUtils.getDesiredRowTypeDescr(conf);

            if (typeDescr == null)
            {
                throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg());
            }

            objectInspector = OrcRecordUpdater.createEventSchema
                                  (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr)));

            // modify the options to reflect the event instead of the base row
            Reader.Options eventOptions = createEventOptions(options);
            if (reader == null)
            {
                baseReader = null;
            }
            else
            {
                // find the min/max based on the offset and length
                if (isOriginal)
                {
                    discoverOriginalKeyBounds(reader, bucket, options);
                }
                else
                {
                    discoverKeyBounds(reader, options);
                }
                LOG.info("min key = " + minKey + ", max key = " + maxKey);
                // use the min/max instead of the byte range
                ReaderPair pair;
                ReaderKey  key = new ReaderKey();
                if (isOriginal)
                {
                    options = options.clone();
                    options.range(options.getOffset(), Long.MAX_VALUE);
                    pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey,
                                                  options);
                }
                else
                {
                    pair = new ReaderPair(key, reader, bucket, minKey, maxKey,
                                          eventOptions, 0);
                }

                // if there is at least one record, put it in the map
                if (pair.nextRecord != null)
                {
                    readers.put(key, pair);
                }
                baseReader = pair.recordReader;
            }

            // we always want to read all of the deltas
            eventOptions.range(0, Long.MAX_VALUE);
            if (deltaDirectory != null)
            {
                foreach (Path delta in deltaDirectory)
                {
                    ReaderKey             key       = new ReaderKey();
                    Path                  deltaFile = AcidUtils.createBucketFile(delta, bucket);
                    AcidUtils.ParsedDelta deltaDir  = AcidUtils.parsedDelta(delta);
                    FileSystem            fs        = deltaFile.getFileSystem(conf);
                    long                  length    = getLastFlushLength(fs, deltaFile);
                    if (length != -1 && fs.exists(deltaFile))
                    {
                        Reader deltaReader = OrcFile.createReader(deltaFile,
                                                                  OrcFile.readerOptions(conf).maxLength(length));
                        Reader.Options deltaEventOptions = null;
                        if (eventOptions.getSearchArgument() != null)
                        {
                            // Turn off the sarg before pushing it to delta.  We never want to push a sarg to a delta as
                            // it can produce wrong results (if the latest valid version of the record is filtered out by
                            // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record)
                            // unless the delta only has insert events
                            OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(deltaReader);
                            if (acidStats.deletes > 0 || acidStats.updates > 0)
                            {
                                deltaEventOptions = eventOptions.clone().searchArgument(null, null);
                            }
                        }
                        ReaderPair deltaPair;
                        deltaPair = new ReaderPair(key, deltaReader, bucket, minKey,
                                                   maxKey, deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId());
                        if (deltaPair.nextRecord != null)
                        {
                            readers.put(key, deltaPair);
                        }
                    }
                }
            }

            // get the first record
            Map.Entry <ReaderKey, ReaderPair> entry = readers.pollFirstEntry();
            if (entry == null)
            {
                columns = 0;
                primary = null;
            }
            else
            {
                primary = entry.getValue();
                if (readers.isEmpty())
                {
                    secondaryKey = null;
                }
                else
                {
                    secondaryKey = readers.firstKey();
                }
                // get the number of columns in the user's rows
                columns = primary.getColumns();
            }
        }