public int compareTo(RecordIdentifier other) { int sup = compareToInternal(other); if (sup == 0) { if (other.GetType() == typeof(ReaderKey)) { ReaderKey oth = (ReaderKey)other; if (currentTransactionId != oth.currentTransactionId) { return(currentTransactionId < oth.currentTransactionId ? +1 : -1); } if (statementId != oth.statementId) { return(statementId < oth.statementId ? +1 : -1); } } else { return(-1); } } return(sup); }
public void testOrdering() { ReaderKey left = new ReaderKey(100, 200, 1200, 300); ReaderKey right = new ReaderKey(); right.setValues(100, 200, 1000, 200,1); assertTrue(right.compareTo(left) < 0); assertTrue(left.compareTo(right) > 0); Assert.Equal(false, left.equals(right)); left.set(right); assertTrue(right.compareTo(left) == 0); Assert.Equal(true, right.equals(left)); right.setRowId(2000); assertTrue(right.compareTo(left) > 0); left.setValues(1, 2, 3, 4,-1); right.setValues(100, 2, 3, 4,-1); assertTrue(left.compareTo(right) < 0); assertTrue(right.compareTo(left) > 0); left.setValues(1, 2, 3, 4,-1); right.setValues(1, 100, 3, 4,-1); assertTrue(left.compareTo(right) < 0); assertTrue(right.compareTo(left) > 0); left.setValues(1, 2, 3, 100,-1); right.setValues(1, 2, 3, 4,-1); assertTrue(left.compareTo(right) < 0); assertTrue(right.compareTo(left) > 0); // ensure that we are consistent when comparing to the base class RecordIdentifier ri = new RecordIdentifier(1, 2, 3); Assert.Equal(1, ri.compareTo(left)); Assert.Equal(-1, left.compareTo(ri)); Assert.Equal(false, ri.equals(left)); Assert.Equal(false, left.equals(ri)); }
public IDocumentReader <TKey, TEntity> GetReader <TKey, TEntity>() { var key = ReaderKey.Create <TKey, TEntity>(); var cachedReader = _readersCache.GetOrAdd(key, k => new CachingDocumentReader <TKey, TEntity>(this._store.GetReader <TKey, TEntity>(), this.Strategy)); return((IDocumentReader <TKey, TEntity>)cachedReader); }
/** * Create a reader that reads from the first key larger than minKey to any * keys equal to maxKey. * @param key the key to read into * @param reader the ORC file reader * @param bucket the bucket number for the file * @param minKey only return keys larger than minKey if it is non-null * @param maxKey only return keys less than or equal to maxKey if it is * non-null * @param options options to provide to read the rows. * @param statementId id of SQL statement within a transaction * @ */ public ReaderPair(ReaderKey key, Reader reader, int bucket, RecordIdentifier minKey, RecordIdentifier maxKey, ReaderImpl.Options options, int statementId) { this.reader = reader; this.key = key; this.maxKey = maxKey; this.bucket = bucket; // TODO use stripe statistics to jump over stripes recordReader = reader.rowsOptions(options); this.statementId = statementId; // advance the reader until we reach the minimum key do { next(nextRecord); } while (nextRecord != null && (minKey != null && key.compareRow(minKey) <= 0)); }
/** * Create a reader that merge sorts the ACID events together. * @param conf the configuration * @param collapseEvents should the events on the same row be collapsed * @param isOriginal is the base file a pre-acid file * @param bucket the bucket we are reading * @param options the options to read with * @param deltaDirectory the list of delta directories to include * @ */ OrcRawRecordMerger(Configuration conf, bool collapseEvents, Reader reader, bool isOriginal, int bucket, ValidTxnList validTxnList, Reader.Options options, Path[] deltaDirectory) { this.conf = conf; this.collapse = collapseEvents; this.offset = options.getOffset(); this.length = options.getLength(); this.validTxnList = validTxnList; TypeDescription typeDescr = OrcUtils.getDesiredRowTypeDescr(conf); if (typeDescr == null) { throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg()); } objectInspector = OrcRecordUpdater.createEventSchema (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr))); // modify the options to reflect the event instead of the base row Reader.Options eventOptions = createEventOptions(options); if (reader == null) { baseReader = null; } else { // find the min/max based on the offset and length if (isOriginal) { discoverOriginalKeyBounds(reader, bucket, options); } else { discoverKeyBounds(reader, options); } LOG.info("min key = " + minKey + ", max key = " + maxKey); // use the min/max instead of the byte range ReaderPair pair; ReaderKey key = new ReaderKey(); if (isOriginal) { options = options.clone(); options.range(options.getOffset(), Long.MAX_VALUE); pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey, options); } else { pair = new ReaderPair(key, reader, bucket, minKey, maxKey, eventOptions, 0); } // if there is at least one record, put it in the map if (pair.nextRecord != null) { readers.put(key, pair); } baseReader = pair.recordReader; } // we always want to read all of the deltas eventOptions.range(0, Long.MAX_VALUE); if (deltaDirectory != null) { foreach (Path delta in deltaDirectory) { ReaderKey key = new ReaderKey(); Path deltaFile = AcidUtils.createBucketFile(delta, bucket); AcidUtils.ParsedDelta deltaDir = AcidUtils.parsedDelta(delta); FileSystem fs = deltaFile.getFileSystem(conf); long length = getLastFlushLength(fs, deltaFile); if (length != -1 && fs.exists(deltaFile)) { Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); Reader.Options deltaEventOptions = null; if (eventOptions.getSearchArgument() != null) { // Turn off the sarg before pushing it to delta. We never want to push a sarg to a delta as // it can produce wrong results (if the latest valid version of the record is filtered out by // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record) // unless the delta only has insert events OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(deltaReader); if (acidStats.deletes > 0 || acidStats.updates > 0) { deltaEventOptions = eventOptions.clone().searchArgument(null, null); } } ReaderPair deltaPair; deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, maxKey, deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId()); if (deltaPair.nextRecord != null) { readers.put(key, deltaPair); } } } } // get the first record Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry == null) { columns = 0; primary = null; } else { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } // get the number of columns in the user's rows columns = primary.getColumns(); } }
/** * This means 1 txn modified the same row more than once */ private bool isSameRow(ReaderKey other) { return compareRow(other) == 0 && currentTransactionId == other.currentTransactionId; }
OriginalReaderPair(ReaderKey key, Reader reader, int bucket, RecordIdentifier minKey, RecordIdentifier maxKey, Reader.Options options) : base(key, reader, bucket, minKey, maxKey, options, 0) { }
public bool next(RecordIdentifier recordIdentifier, OrcStruct prev) { bool keysSame = true; while (keysSame && primary != null) { // The primary's nextRecord is the next value to return OrcStruct current = primary.nextRecord; recordIdentifier.set(primary.key); // Advance the primary reader to the next record primary.next(extraValue); // Save the current record as the new extraValue for next time so that // we minimize allocations extraValue = current; // now that the primary reader has advanced, we need to see if we // continue to read it or move to the secondary. if (primary.nextRecord == null || primary.key.compareTo(secondaryKey) > 0) { // if the primary isn't done, push it back into the readers if (primary.nextRecord != null) { readers.put(primary.key, primary); } // update primary and secondaryKey Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry != null) { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } } else { primary = null; } } // if this transaction isn't ok, skip over it if (!validTxnList.isTxnValid( ((ReaderKey)recordIdentifier).getCurrentTransactionId())) { continue; } /*for multi-statement txns, you may have multiple events for the same * row in the same (current) transaction. We want to collapse these to just the last one * regardless whether we are minor compacting. Consider INSERT/UPDATE/UPDATE of the * same row in the same txn. There is no benefit passing along anything except the last * event. If we did want to pass it along, we'd have to include statementId in the row * returned so that compaction could write it out or make minor minor compaction understand * how to write out delta files in delta_xxx_yyy_stid format. There doesn't seem to be any * value in this.*/ bool isSameRow = prevKey.isSameRow((ReaderKey)recordIdentifier); // if we are collapsing, figure out if this is a new row if (collapse || isSameRow) { keysSame = (collapse && prevKey.compareRow(recordIdentifier) == 0) || (isSameRow); if (!keysSame) { prevKey.set(recordIdentifier); } } else { keysSame = false; } // set the output record by fiddling with the pointers so that we can // avoid a copy. prev.linkFields(current); } return !keysSame; }
public bool next(RecordIdentifier recordIdentifier, OrcStruct prev) { bool keysSame = true; while (keysSame && primary != null) { // The primary's nextRecord is the next value to return OrcStruct current = primary.nextRecord; recordIdentifier.set(primary.key); // Advance the primary reader to the next record primary.next(extraValue); // Save the current record as the new extraValue for next time so that // we minimize allocations extraValue = current; // now that the primary reader has advanced, we need to see if we // continue to read it or move to the secondary. if (primary.nextRecord == null || primary.key.compareTo(secondaryKey) > 0) { // if the primary isn't done, push it back into the readers if (primary.nextRecord != null) { readers.put(primary.key, primary); } // update primary and secondaryKey Map.Entry <ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry != null) { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } } else { primary = null; } } // if this transaction isn't ok, skip over it if (!validTxnList.isTxnValid( ((ReaderKey)recordIdentifier).getCurrentTransactionId())) { continue; } /*for multi-statement txns, you may have multiple events for the same * row in the same (current) transaction. We want to collapse these to just the last one * regardless whether we are minor compacting. Consider INSERT/UPDATE/UPDATE of the * same row in the same txn. There is no benefit passing along anything except the last * event. If we did want to pass it along, we'd have to include statementId in the row * returned so that compaction could write it out or make minor minor compaction understand * how to write out delta files in delta_xxx_yyy_stid format. There doesn't seem to be any * value in this.*/ bool isSameRow = prevKey.isSameRow((ReaderKey)recordIdentifier); // if we are collapsing, figure out if this is a new row if (collapse || isSameRow) { keysSame = (collapse && prevKey.compareRow(recordIdentifier) == 0) || (isSameRow); if (!keysSame) { prevKey.set(recordIdentifier); } } else { keysSame = false; } // set the output record by fiddling with the pointers so that we can // avoid a copy. prev.linkFields(current); } return(!keysSame); }
/** * Create a reader that merge sorts the ACID events together. * @param conf the configuration * @param collapseEvents should the events on the same row be collapsed * @param isOriginal is the base file a pre-acid file * @param bucket the bucket we are reading * @param options the options to read with * @param deltaDirectory the list of delta directories to include * @ */ OrcRawRecordMerger(Configuration conf, bool collapseEvents, Reader reader, bool isOriginal, int bucket, ValidTxnList validTxnList, Reader.Options options, Path[] deltaDirectory) { this.conf = conf; this.collapse = collapseEvents; this.offset = options.getOffset(); this.length = options.getLength(); this.validTxnList = validTxnList; TypeDescription typeDescr = OrcUtils.getDesiredRowTypeDescr(conf); if (typeDescr == null) { throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg()); } objectInspector = OrcRecordUpdater.createEventSchema (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr))); // modify the options to reflect the event instead of the base row Reader.Options eventOptions = createEventOptions(options); if (reader == null) { baseReader = null; } else { // find the min/max based on the offset and length if (isOriginal) { discoverOriginalKeyBounds(reader, bucket, options); } else { discoverKeyBounds(reader, options); } LOG.info("min key = " + minKey + ", max key = " + maxKey); // use the min/max instead of the byte range ReaderPair pair; ReaderKey key = new ReaderKey(); if (isOriginal) { options = options.clone(); options.range(options.getOffset(), Long.MAX_VALUE); pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey, options); } else { pair = new ReaderPair(key, reader, bucket, minKey, maxKey, eventOptions, 0); } // if there is at least one record, put it in the map if (pair.nextRecord != null) { readers.put(key, pair); } baseReader = pair.recordReader; } // we always want to read all of the deltas eventOptions.range(0, Long.MAX_VALUE); if (deltaDirectory != null) { foreach (Path delta in deltaDirectory) { ReaderKey key = new ReaderKey(); Path deltaFile = AcidUtils.createBucketFile(delta, bucket); AcidUtils.ParsedDelta deltaDir = AcidUtils.parsedDelta(delta); FileSystem fs = deltaFile.getFileSystem(conf); long length = getLastFlushLength(fs, deltaFile); if (length != -1 && fs.exists(deltaFile)) { Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); Reader.Options deltaEventOptions = null; if (eventOptions.getSearchArgument() != null) { // Turn off the sarg before pushing it to delta. We never want to push a sarg to a delta as // it can produce wrong results (if the latest valid version of the record is filtered out by // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record) // unless the delta only has insert events OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(deltaReader); if (acidStats.deletes > 0 || acidStats.updates > 0) { deltaEventOptions = eventOptions.clone().searchArgument(null, null); } } ReaderPair deltaPair; deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, maxKey, deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId()); if (deltaPair.nextRecord != null) { readers.put(key, deltaPair); } } } } // get the first record Map.Entry <ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry == null) { columns = 0; primary = null; } else { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } // get the number of columns in the user's rows columns = primary.getColumns(); } }
/** * This means 1 txn modified the same row more than once */ private bool isSameRow(ReaderKey other) { return(compareRow(other) == 0 && currentTransactionId == other.currentTransactionId); }
public string GetReaderKey() { return(ReaderKey.Decrypt(KeyHelper.SharedSecret)); }