public MetaInfoObjExtractor(CompressionKind compressionKind, int bufferSize, int metadataSize, ByteBuffer footerBuffer) { this.compressionKind = compressionKind; this.bufferSize = bufferSize; this.codec = WriterImpl.createCodec(compressionKind); this.metadataSize = metadataSize; int position = footerBuffer.position(); int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize; this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize); this.footer = extractFooter( footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize); footerBuffer.position(position); this.inspector = OrcStruct.createObjectInspector(0, footer.TypesList); }
/** Extracts the necessary metadata from an externally store buffer (fullFooterBuffer). */ public static FooterInfo extractMetaInfoFromFooter(ByteBuffer bb, string srcPath) { // Read the PostScript. Be very careful as some parts of this historically use bb position // and some use absolute offsets that have to take position into account. int baseOffset = bb.position(); int lastByteAbsPos = baseOffset + bb.remaining() - 1; int psLen = bb.get(lastByteAbsPos) & 0xff; int psAbsPos = lastByteAbsPos - psLen; OrcProto.PostScript ps = extractPostScript(bb, srcPath, psLen, psAbsPos); Debug.Assert(baseOffset == bb.position()); // Extract PS information. int footerSize = (int)ps.FooterLength, metadataSize = (int)ps.MetadataLength, footerAbsPos = psAbsPos - footerSize, metadataAbsPos = footerAbsPos - metadataSize; CompressionKind compressionKind = (CompressionKind)Enum.Parse(typeof(CompressionKind), ps.Compression.ToString(), true); CompressionCodec codec = WriterImpl.createCodec(compressionKind); int bufferSize = (int)ps.CompressionBlockSize; bb.position(metadataAbsPos); bb.mark(); // Extract metadata and footer. OrcProto.Metadata metadata = extractMetadata( bb, metadataAbsPos, metadataSize, codec, bufferSize); List <StripeStatistics> stats = new List <StripeStatistics>(metadata.StripeStatsCount); foreach (OrcProto.StripeStatistics ss in metadata.StripeStatsList) { stats.Add(new StripeStatistics(ss.ColStatsList)); } OrcProto.Footer footer = extractFooter(bb, footerAbsPos, footerSize, codec, bufferSize); bb.position(metadataAbsPos); bb.limit(psAbsPos); // TODO: do we need footer buffer here? FileInfo/FileMetaInfo is a mess... FileMetaInfo fmi = new FileMetaInfo( compressionKind, bufferSize, metadataSize, bb, extractWriterVersion(ps)); return(new FooterInfo(stats, footer, fmi)); }
/** * Constructor that let's the user specify additional options. * @param path pathname for file * @param options options for reading * @ */ public ReaderImpl(Func <Stream> streamCreator, string path, OrcFile.ReaderOptions options) { this.streamCreator = streamCreator; this.path = path; this.conf = options.getConfiguration(); FileMetadata fileMetadata = options.getFileMetadata(); if (fileMetadata != null) { this.compressionKind = fileMetadata.getCompressionKind(); this.bufferSize = fileMetadata.getCompressionBufferSize(); this.codec = WriterImpl.createCodec(compressionKind); this.metadataSize = fileMetadata.getMetadataSize(); this.stripeStats = fileMetadata.getStripeStats(); this.versionList = fileMetadata.getVersionList(); this.writerVersion = OrcFile.WriterVersionHelpers.from(fileMetadata.getWriterVersionNum()); this.types = fileMetadata.getTypes(); this.rowIndexStride = fileMetadata.getRowIndexStride(); this.contentLength = fileMetadata.getContentLength(); this.numberOfRows = fileMetadata.getNumberOfRows(); this.fileStats = fileMetadata.getFileStats(); this.stripes = fileMetadata.getStripes(); this.inspector = OrcStruct.createObjectInspector(0, fileMetadata.getTypes()); this.footerByteBuffer = null; // not cached and not needed here this.userMetadata = null; // not cached and not needed here this.footerMetaAndPsBuffer = null; } else { FileMetaInfo footerMetaData; if (options.getFileMetaInfo() != null) { footerMetaData = options.getFileMetaInfo(); this.footerMetaAndPsBuffer = null; } else { using (Stream file = streamCreator()) { footerMetaData = extractMetaInfoFromFooter(file, path, options.getMaxLength()); this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer; } } MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(footerMetaData.compressionKind, footerMetaData.bufferSize, footerMetaData.metadataSize, footerMetaData.footerBuffer ); this.footerByteBuffer = footerMetaData.footerBuffer; this.compressionKind = rInfo.compressionKind; this.codec = rInfo.codec; this.bufferSize = rInfo.bufferSize; this.metadataSize = rInfo.metadataSize; this.stripeStats = rInfo.metadata.StripeStatsList; this.types = rInfo.footer.TypesList; this.rowIndexStride = (int)rInfo.footer.RowIndexStride; this.contentLength = (int)rInfo.footer.ContentLength; this.numberOfRows = (int)rInfo.footer.NumberOfRows; this.userMetadata = rInfo.footer.MetadataList; this.fileStats = rInfo.footer.StatisticsList; this.inspector = rInfo.inspector; this.versionList = footerMetaData.versionList.Select(v => (int)v).ToList(); this.writerVersion = footerMetaData.writerVersion; this.stripes = convertProtoStripesToStripes(rInfo.footer.StripesList); } }