void next(OrcStruct next) { if (recordReader.hasNext()) { nextRecord = (OrcStruct)recordReader.next(next); // set the key key.setValues(OrcRecordUpdater.getOriginalTransaction(nextRecord), OrcRecordUpdater.getBucket(nextRecord), OrcRecordUpdater.getRowId(nextRecord), OrcRecordUpdater.getCurrentTransaction(nextRecord), statementId); // if this record is larger than maxKey, we need to stop if (maxKey != null && key.compareRow(maxKey) > 0) { LOG.debug("key " + key + " > maxkey " + maxKey); nextRecord = null; recordReader.Dispose(); } } else { nextRecord = null; recordReader.Dispose(); } }
void next(OrcStruct next) { if (recordReader.hasNext()) { long nextRowId = recordReader.getRowNumber(); // have to do initialization here, because the super's constructor // calls next and thus we need to initialize before our constructor // runs if (next == null) { nextRecord = new OrcStruct(OrcRecordUpdater.FIELDS); IntWritable operation = new IntWritable(OrcRecordUpdater.INSERT_OPERATION); nextRecord.setFieldValue(OrcRecordUpdater.OPERATION, operation); nextRecord.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION, new LongWritable(0)); nextRecord.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION, new LongWritable(0)); nextRecord.setFieldValue(OrcRecordUpdater.BUCKET, new IntWritable(bucket)); nextRecord.setFieldValue(OrcRecordUpdater.ROW_ID, new LongWritable(nextRowId)); nextRecord.setFieldValue(OrcRecordUpdater.ROW, recordReader.next(null)); } else { nextRecord = next; ((IntWritable)next.getFieldValue(OrcRecordUpdater.OPERATION)) .set(OrcRecordUpdater.INSERT_OPERATION); ((LongWritable)next.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION)) .set(0); ((IntWritable)next.getFieldValue(OrcRecordUpdater.BUCKET)) .set(bucket); ((LongWritable)next.getFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION)) .set(0); ((LongWritable)next.getFieldValue(OrcRecordUpdater.ROW_ID)) .set(0); nextRecord.setFieldValue(OrcRecordUpdater.ROW, recordReader.next(OrcRecordUpdater.getRow(next))); } key.setValues(0L, bucket, nextRowId, 0L, 0); if (maxKey != null && key.compareRow(maxKey) > 0) { if (LOG.isDebugEnabled()) { LOG.debug("key " + key + " > maxkey " + maxKey); } nextRecord = null; recordReader.close(); } } else { nextRecord = null; recordReader.close(); } }
/** * Read the side file to get the last flush length. * @param fs the file system to use * @param deltaFile the path of the delta file * @return the maximum size of the file to use * @ */ private static long getLastFlushLength(FileSystem fs, Path deltaFile) { Path lengths = OrcRecordUpdater.getSideFile(deltaFile); long result = Long.MAX_VALUE; try { FSDataInputStream stream = fs.open(lengths); result = -1; while (stream.available() > 0) { result = stream.readLong(); } stream.close(); return(result); } catch (IOException ioe) { return(result); } }
/** * Find the key range for bucket files. * @param reader the reader * @param options the options for reading with * @ */ private void discoverKeyBounds(Reader reader, Reader.Options options) { RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader); long offset = options.getOffset(); long maxOffset = options.getMaxOffset(); int firstStripe = 0; int stripeCount = 0; bool isTail = true; List <StripeInformation> stripes = reader.getStripes(); foreach (StripeInformation stripe in stripes) { if (offset > stripe.getOffset()) { firstStripe += 1; } else if (maxOffset > stripe.getOffset()) { stripeCount += 1; } else { isTail = false; break; } } if (firstStripe != 0) { minKey = keyIndex[firstStripe - 1]; } if (!isTail) { maxKey = keyIndex[firstStripe + stripeCount - 1]; } }
public bool isDelete(OrcStruct value) { return(OrcRecordUpdater.getOperation(value) == OrcRecordUpdater.DELETE_OPERATION); }
/** * Create a reader that merge sorts the ACID events together. * @param conf the configuration * @param collapseEvents should the events on the same row be collapsed * @param isOriginal is the base file a pre-acid file * @param bucket the bucket we are reading * @param options the options to read with * @param deltaDirectory the list of delta directories to include * @ */ OrcRawRecordMerger(Configuration conf, bool collapseEvents, Reader reader, bool isOriginal, int bucket, ValidTxnList validTxnList, Reader.Options options, Path[] deltaDirectory) { this.conf = conf; this.collapse = collapseEvents; this.offset = options.getOffset(); this.length = options.getLength(); this.validTxnList = validTxnList; TypeDescription typeDescr = OrcUtils.getDesiredRowTypeDescr(conf); if (typeDescr == null) { throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg()); } objectInspector = OrcRecordUpdater.createEventSchema (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr))); // modify the options to reflect the event instead of the base row Reader.Options eventOptions = createEventOptions(options); if (reader == null) { baseReader = null; } else { // find the min/max based on the offset and length if (isOriginal) { discoverOriginalKeyBounds(reader, bucket, options); } else { discoverKeyBounds(reader, options); } LOG.info("min key = " + minKey + ", max key = " + maxKey); // use the min/max instead of the byte range ReaderPair pair; ReaderKey key = new ReaderKey(); if (isOriginal) { options = options.clone(); options.range(options.getOffset(), Long.MAX_VALUE); pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey, options); } else { pair = new ReaderPair(key, reader, bucket, minKey, maxKey, eventOptions, 0); } // if there is at least one record, put it in the map if (pair.nextRecord != null) { readers.put(key, pair); } baseReader = pair.recordReader; } // we always want to read all of the deltas eventOptions.range(0, Long.MAX_VALUE); if (deltaDirectory != null) { foreach (Path delta in deltaDirectory) { ReaderKey key = new ReaderKey(); Path deltaFile = AcidUtils.createBucketFile(delta, bucket); AcidUtils.ParsedDelta deltaDir = AcidUtils.parsedDelta(delta); FileSystem fs = deltaFile.getFileSystem(conf); long length = getLastFlushLength(fs, deltaFile); if (length != -1 && fs.exists(deltaFile)) { Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); Reader.Options deltaEventOptions = null; if (eventOptions.getSearchArgument() != null) { // Turn off the sarg before pushing it to delta. We never want to push a sarg to a delta as // it can produce wrong results (if the latest valid version of the record is filtered out by // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record) // unless the delta only has insert events OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(deltaReader); if (acidStats.deletes > 0 || acidStats.updates > 0) { deltaEventOptions = eventOptions.clone().searchArgument(null, null); } } ReaderPair deltaPair; deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, maxKey, deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId()); if (deltaPair.nextRecord != null) { readers.put(key, deltaPair); } } } } // get the first record Map.Entry <ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry == null) { columns = 0; primary = null; } else { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } // get the number of columns in the user's rows columns = primary.getColumns(); } }