void next(OrcStruct next) { if (recordReader.hasNext()) { nextRecord = (OrcStruct)recordReader.next(next); // set the key key.setValues(OrcRecordUpdater.getOriginalTransaction(nextRecord), OrcRecordUpdater.getBucket(nextRecord), OrcRecordUpdater.getRowId(nextRecord), OrcRecordUpdater.getCurrentTransaction(nextRecord), statementId); // if this record is larger than maxKey, we need to stop if (maxKey != null && key.compareRow(maxKey) > 0) { LOG.debug("key " + key + " > maxkey " + maxKey); nextRecord = null; recordReader.Dispose(); } } else { nextRecord = null; recordReader.Dispose(); } }
/** * Create a reader that reads from the first key larger than minKey to any * keys equal to maxKey. * @param key the key to read into * @param reader the ORC file reader * @param bucket the bucket number for the file * @param minKey only return keys larger than minKey if it is non-null * @param maxKey only return keys less than or equal to maxKey if it is * non-null * @param options options to provide to read the rows. * @param statementId id of SQL statement within a transaction * @ */ public ReaderPair(ReaderKey key, Reader reader, int bucket, RecordIdentifier minKey, RecordIdentifier maxKey, ReaderImpl.Options options, int statementId) { this.reader = reader; this.key = key; this.maxKey = maxKey; this.bucket = bucket; // TODO use stripe statistics to jump over stripes recordReader = reader.rowsOptions(options); this.statementId = statementId; // advance the reader until we reach the minimum key do { next(nextRecord); } while (nextRecord != null && (minKey != null && key.compareRow(minKey) <= 0)); }
public bool next(RecordIdentifier recordIdentifier, OrcStruct prev) { bool keysSame = true; while (keysSame && primary != null) { // The primary's nextRecord is the next value to return OrcStruct current = primary.nextRecord; recordIdentifier.set(primary.key); // Advance the primary reader to the next record primary.next(extraValue); // Save the current record as the new extraValue for next time so that // we minimize allocations extraValue = current; // now that the primary reader has advanced, we need to see if we // continue to read it or move to the secondary. if (primary.nextRecord == null || primary.key.compareTo(secondaryKey) > 0) { // if the primary isn't done, push it back into the readers if (primary.nextRecord != null) { readers.put(primary.key, primary); } // update primary and secondaryKey Map.Entry <ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry != null) { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } } else { primary = null; } } // if this transaction isn't ok, skip over it if (!validTxnList.isTxnValid( ((ReaderKey)recordIdentifier).getCurrentTransactionId())) { continue; } /*for multi-statement txns, you may have multiple events for the same * row in the same (current) transaction. We want to collapse these to just the last one * regardless whether we are minor compacting. Consider INSERT/UPDATE/UPDATE of the * same row in the same txn. There is no benefit passing along anything except the last * event. If we did want to pass it along, we'd have to include statementId in the row * returned so that compaction could write it out or make minor minor compaction understand * how to write out delta files in delta_xxx_yyy_stid format. There doesn't seem to be any * value in this.*/ bool isSameRow = prevKey.isSameRow((ReaderKey)recordIdentifier); // if we are collapsing, figure out if this is a new row if (collapse || isSameRow) { keysSame = (collapse && prevKey.compareRow(recordIdentifier) == 0) || (isSameRow); if (!keysSame) { prevKey.set(recordIdentifier); } } else { keysSame = false; } // set the output record by fiddling with the pointers so that we can // avoid a copy. prev.linkFields(current); } return(!keysSame); }