public OrcUnionObjectInspector(int columnId, IList <OrcProto.Type> types) { OrcProto.Type type = types[columnId]; children = new List <ObjectInspector>(type.SubtypesCount); for (int i = 0; i < type.SubtypesCount; ++i) { children.Add(OrcStruct.createObjectInspector((int)type.SubtypesList[i], types)); } }
public OrcUnionObjectInspector(UnionTypeInfo info) { List <TypeInfo> unionChildren = info.getAllUnionObjectTypeInfos(); this.children = new List <ObjectInspector>(unionChildren.Count); foreach (TypeInfo child in info.getAllUnionObjectTypeInfos()) { this.children.Add(OrcStruct.createObjectInspector(child)); } }
public MetaInfoObjExtractor(CompressionKind compressionKind, int bufferSize, int metadataSize, ByteBuffer footerBuffer) { this.compressionKind = compressionKind; this.bufferSize = bufferSize; this.codec = WriterImpl.createCodec(compressionKind); this.metadataSize = metadataSize; int position = footerBuffer.position(); int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize; this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize); this.footer = extractFooter( footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize); footerBuffer.position(position); this.inspector = OrcStruct.createObjectInspector(0, footer.TypesList); }
/** * Create a reader that merge sorts the ACID events together. * @param conf the configuration * @param collapseEvents should the events on the same row be collapsed * @param isOriginal is the base file a pre-acid file * @param bucket the bucket we are reading * @param options the options to read with * @param deltaDirectory the list of delta directories to include * @ */ OrcRawRecordMerger(Configuration conf, bool collapseEvents, Reader reader, bool isOriginal, int bucket, ValidTxnList validTxnList, Reader.Options options, Path[] deltaDirectory) { this.conf = conf; this.collapse = collapseEvents; this.offset = options.getOffset(); this.length = options.getLength(); this.validTxnList = validTxnList; TypeDescription typeDescr = OrcUtils.getDesiredRowTypeDescr(conf); if (typeDescr == null) { throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg()); } objectInspector = OrcRecordUpdater.createEventSchema (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr))); // modify the options to reflect the event instead of the base row Reader.Options eventOptions = createEventOptions(options); if (reader == null) { baseReader = null; } else { // find the min/max based on the offset and length if (isOriginal) { discoverOriginalKeyBounds(reader, bucket, options); } else { discoverKeyBounds(reader, options); } LOG.info("min key = " + minKey + ", max key = " + maxKey); // use the min/max instead of the byte range ReaderPair pair; ReaderKey key = new ReaderKey(); if (isOriginal) { options = options.clone(); options.range(options.getOffset(), Long.MAX_VALUE); pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey, options); } else { pair = new ReaderPair(key, reader, bucket, minKey, maxKey, eventOptions, 0); } // if there is at least one record, put it in the map if (pair.nextRecord != null) { readers.put(key, pair); } baseReader = pair.recordReader; } // we always want to read all of the deltas eventOptions.range(0, Long.MAX_VALUE); if (deltaDirectory != null) { foreach (Path delta in deltaDirectory) { ReaderKey key = new ReaderKey(); Path deltaFile = AcidUtils.createBucketFile(delta, bucket); AcidUtils.ParsedDelta deltaDir = AcidUtils.parsedDelta(delta); FileSystem fs = deltaFile.getFileSystem(conf); long length = getLastFlushLength(fs, deltaFile); if (length != -1 && fs.exists(deltaFile)) { Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); Reader.Options deltaEventOptions = null; if (eventOptions.getSearchArgument() != null) { // Turn off the sarg before pushing it to delta. We never want to push a sarg to a delta as // it can produce wrong results (if the latest valid version of the record is filtered out by // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record) // unless the delta only has insert events OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(deltaReader); if (acidStats.deletes > 0 || acidStats.updates > 0) { deltaEventOptions = eventOptions.clone().searchArgument(null, null); } } ReaderPair deltaPair; deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, maxKey, deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId()); if (deltaPair.nextRecord != null) { readers.put(key, deltaPair); } } } } // get the first record Map.Entry <ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry == null) { columns = 0; primary = null; } else { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } // get the number of columns in the user's rows columns = primary.getColumns(); } }
/** * Constructor that let's the user specify additional options. * @param path pathname for file * @param options options for reading * @ */ public ReaderImpl(Func <Stream> streamCreator, string path, OrcFile.ReaderOptions options) { this.streamCreator = streamCreator; this.path = path; this.conf = options.getConfiguration(); FileMetadata fileMetadata = options.getFileMetadata(); if (fileMetadata != null) { this.compressionKind = fileMetadata.getCompressionKind(); this.bufferSize = fileMetadata.getCompressionBufferSize(); this.codec = WriterImpl.createCodec(compressionKind); this.metadataSize = fileMetadata.getMetadataSize(); this.stripeStats = fileMetadata.getStripeStats(); this.versionList = fileMetadata.getVersionList(); this.writerVersion = OrcFile.WriterVersionHelpers.from(fileMetadata.getWriterVersionNum()); this.types = fileMetadata.getTypes(); this.rowIndexStride = fileMetadata.getRowIndexStride(); this.contentLength = fileMetadata.getContentLength(); this.numberOfRows = fileMetadata.getNumberOfRows(); this.fileStats = fileMetadata.getFileStats(); this.stripes = fileMetadata.getStripes(); this.inspector = OrcStruct.createObjectInspector(0, fileMetadata.getTypes()); this.footerByteBuffer = null; // not cached and not needed here this.userMetadata = null; // not cached and not needed here this.footerMetaAndPsBuffer = null; } else { FileMetaInfo footerMetaData; if (options.getFileMetaInfo() != null) { footerMetaData = options.getFileMetaInfo(); this.footerMetaAndPsBuffer = null; } else { using (Stream file = streamCreator()) { footerMetaData = extractMetaInfoFromFooter(file, path, options.getMaxLength()); this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer; } } MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(footerMetaData.compressionKind, footerMetaData.bufferSize, footerMetaData.metadataSize, footerMetaData.footerBuffer ); this.footerByteBuffer = footerMetaData.footerBuffer; this.compressionKind = rInfo.compressionKind; this.codec = rInfo.codec; this.bufferSize = rInfo.bufferSize; this.metadataSize = rInfo.metadataSize; this.stripeStats = rInfo.metadata.StripeStatsList; this.types = rInfo.footer.TypesList; this.rowIndexStride = (int)rInfo.footer.RowIndexStride; this.contentLength = (int)rInfo.footer.ContentLength; this.numberOfRows = (int)rInfo.footer.NumberOfRows; this.userMetadata = rInfo.footer.MetadataList; this.fileStats = rInfo.footer.StatisticsList; this.inspector = rInfo.inspector; this.versionList = footerMetaData.versionList.Select(v => (int)v).ToList(); this.writerVersion = footerMetaData.writerVersion; this.stripes = convertProtoStripesToStripes(rInfo.footer.StripesList); } }