/** Ctor used when creating file info during init and when getting a new one. */ public FileMetaInfo(CompressionKind compressionKind, int bufferSize, int metadataSize, ByteBuffer footerBuffer, IList<uint> versionList, OrcFile.WriterVersion writerVersion, ByteBuffer fullFooterBuffer) { this.compressionKind = compressionKind; this.bufferSize = bufferSize; this.metadataSize = metadataSize; this.footerBuffer = footerBuffer; this.versionList = versionList; this.writerVersion = writerVersion; this.footerMetaAndPsBuffer = fullFooterBuffer; }
/** Ctor used when creating file info during init and when getting a new one. */ public FileMetaInfo(CompressionKind compressionKind, int bufferSize, int metadataSize, ByteBuffer footerBuffer, IList <uint> versionList, OrcFile.WriterVersion writerVersion, ByteBuffer fullFooterBuffer) { this.compressionKind = compressionKind; this.bufferSize = bufferSize; this.metadataSize = metadataSize; this.footerBuffer = footerBuffer; this.versionList = versionList; this.writerVersion = writerVersion; this.footerMetaAndPsBuffer = fullFooterBuffer; }
public void readFields(DataInput @in) { //deserialize path, offset, length using FileSplit base.readFields(@in); byte flags = @in.readByte(); hasFooter = (FOOTER_FLAG & flags) != 0; isOriginal = (ORIGINAL_FLAG & flags) != 0; hasBase = (BASE_FLAG & flags) != 0; bool hasFileId = (HAS_FILEID_FLAG & flags) != 0; deltas.Clear(); int numDeltas = @in.readInt(); for (int i = 0; i < numDeltas; i++) { AcidInputFormat.DeltaMetaData dmd = new AcidInputFormat.DeltaMetaData(); dmd.readFields(@in); deltas.Add(dmd); } if (hasFooter) { // deserialize FileMetaInfo fields string compressionType = Text.readString(@in); int bufferSize = WritableUtils.readVInt(@in); int metadataSize = WritableUtils.readVInt(@in); // deserialize FileMetaInfo field footer int footerBuffSize = WritableUtils.readVInt(@in); ByteBuffer footerBuff = ByteBuffer.allocate(footerBuffSize); @in.readFully(footerBuff.array(), 0, footerBuffSize); OrcFile.WriterVersion writerVersion = ReaderImpl.getWriterVersion(WritableUtils.readVInt(@in)); fileMetaInfo = new FileMetaInfo(compressionType, bufferSize, metadataSize, footerBuff, writerVersion); } if (hasFileId) { fileId = @in.readLong(); } }
/** Ctor used when reading splits - no version list or full footer buffer. */ public FileMetaInfo(CompressionKind compressionKind, int bufferSize, int metadataSize, ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) : this(compressionKind, bufferSize, metadataSize, footerBuffer, null, writerVersion, null) { }
private static FileMetaInfo extractMetaInfoFromFooter(Stream file, string path, long maxFileLength) { // figure out the size of the file using the option or filesystem long size; if (maxFileLength == Int64.MaxValue) { // size = fs.getFileStatus(path).getLen(); size = file.Length; } else { size = maxFileLength; } //read last bytes into buffer to get PostScript int readSize = (int)Math.Min(size, DIRECTORY_SIZE_GUESS); ByteBuffer buffer = ByteBuffer.allocate(readSize); Debug.Assert(buffer.position() == 0); file.readFully((size - readSize), buffer.array(), buffer.arrayOffset(), readSize); buffer.position(0); //read the PostScript //get length of PostScript int psLen = buffer.get(readSize - 1) & 0xff; ensureOrcFooter(file, path, psLen, buffer); int psOffset = readSize - 1 - psLen; OrcProto.PostScript ps = extractPostScript(buffer, path, psLen, psOffset); int footerSize = (int)ps.FooterLength; int metadataSize = (int)ps.MetadataLength; OrcFile.WriterVersion writerVersion = extractWriterVersion(ps); //check if extra bytes need to be read ByteBuffer fullFooterBuffer = null; int extra = Math.Max(0, psLen + 1 + footerSize + metadataSize - readSize); if (extra > 0) { //more bytes need to be read, seek back to the right place and read extra bytes ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize); file.readFully((size - readSize - extra), extraBuf.array(), extraBuf.arrayOffset() + extraBuf.position(), extra); extraBuf.position(extra); //append with already read bytes extraBuf.put(buffer); buffer = extraBuf; buffer.position(0); fullFooterBuffer = buffer.slice(); buffer.limit(footerSize + metadataSize); } else { //footer is already in the bytes in buffer, just adjust position, length buffer.position(psOffset - footerSize - metadataSize); fullFooterBuffer = buffer.slice(); buffer.limit(psOffset); } // remember position for later buffer.mark(); CompressionKind compressionKind = (CompressionKind)Enum.Parse( typeof(CompressionKind), ps.Compression.ToString(), true); return(new FileMetaInfo( compressionKind, (int)ps.CompressionBlockSize, (int)ps.MetadataLength, buffer, ps.VersionList, writerVersion, fullFooterBuffer)); }
/** * Constructor that let's the user specify additional options. * @param path pathname for file * @param options options for reading * @ */ public ReaderImpl(Func <Stream> streamCreator, string path, OrcFile.ReaderOptions options) { this.streamCreator = streamCreator; this.path = path; this.conf = options.getConfiguration(); FileMetadata fileMetadata = options.getFileMetadata(); if (fileMetadata != null) { this.compressionKind = fileMetadata.getCompressionKind(); this.bufferSize = fileMetadata.getCompressionBufferSize(); this.codec = WriterImpl.createCodec(compressionKind); this.metadataSize = fileMetadata.getMetadataSize(); this.stripeStats = fileMetadata.getStripeStats(); this.versionList = fileMetadata.getVersionList(); this.writerVersion = OrcFile.WriterVersionHelpers.from(fileMetadata.getWriterVersionNum()); this.types = fileMetadata.getTypes(); this.rowIndexStride = fileMetadata.getRowIndexStride(); this.contentLength = fileMetadata.getContentLength(); this.numberOfRows = fileMetadata.getNumberOfRows(); this.fileStats = fileMetadata.getFileStats(); this.stripes = fileMetadata.getStripes(); this.inspector = OrcStruct.createObjectInspector(0, fileMetadata.getTypes()); this.footerByteBuffer = null; // not cached and not needed here this.userMetadata = null; // not cached and not needed here this.footerMetaAndPsBuffer = null; } else { FileMetaInfo footerMetaData; if (options.getFileMetaInfo() != null) { footerMetaData = options.getFileMetaInfo(); this.footerMetaAndPsBuffer = null; } else { using (Stream file = streamCreator()) { footerMetaData = extractMetaInfoFromFooter(file, path, options.getMaxLength()); this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer; } } MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(footerMetaData.compressionKind, footerMetaData.bufferSize, footerMetaData.metadataSize, footerMetaData.footerBuffer ); this.footerByteBuffer = footerMetaData.footerBuffer; this.compressionKind = rInfo.compressionKind; this.codec = rInfo.codec; this.bufferSize = rInfo.bufferSize; this.metadataSize = rInfo.metadataSize; this.stripeStats = rInfo.metadata.StripeStatsList; this.types = rInfo.footer.TypesList; this.rowIndexStride = (int)rInfo.footer.RowIndexStride; this.contentLength = (int)rInfo.footer.ContentLength; this.numberOfRows = (int)rInfo.footer.NumberOfRows; this.userMetadata = rInfo.footer.MetadataList; this.fileStats = rInfo.footer.StatisticsList; this.inspector = rInfo.inspector; this.versionList = footerMetaData.versionList.Select(v => (int)v).ToList(); this.writerVersion = footerMetaData.writerVersion; this.stripes = convertProtoStripesToStripes(rInfo.footer.StripesList); } }