private static OrcProto.PostScript extractPostScript(ByteBuffer bb, string path, int psLen, int psAbsOffset) { // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here. Debug.Assert(bb.hasArray()); CodedInputStream @in = CodedInputStream.CreateInstance( bb.array(), bb.arrayOffset() + psAbsOffset, psLen); OrcProto.PostScript ps = OrcProto.PostScript.ParseFrom(@in); checkOrcVersion(LOG, path, ps.VersionList.flip()); // Check compression codec. switch (ps.Compression) { case OrcProto.CompressionKind.NONE: case OrcProto.CompressionKind.ZLIB: case OrcProto.CompressionKind.SNAPPY: case OrcProto.CompressionKind.LZO: break; default: throw new ArgumentException("Unknown compression"); } return(ps); }
/** Extracts the necessary metadata from an externally store buffer (fullFooterBuffer). */ public static FooterInfo extractMetaInfoFromFooter(ByteBuffer bb, string srcPath) { // Read the PostScript. Be very careful as some parts of this historically use bb position // and some use absolute offsets that have to take position into account. int baseOffset = bb.position(); int lastByteAbsPos = baseOffset + bb.remaining() - 1; int psLen = bb.get(lastByteAbsPos) & 0xff; int psAbsPos = lastByteAbsPos - psLen; OrcProto.PostScript ps = extractPostScript(bb, srcPath, psLen, psAbsPos); Debug.Assert(baseOffset == bb.position()); // Extract PS information. int footerSize = (int)ps.FooterLength, metadataSize = (int)ps.MetadataLength, footerAbsPos = psAbsPos - footerSize, metadataAbsPos = footerAbsPos - metadataSize; CompressionKind compressionKind = (CompressionKind)Enum.Parse(typeof(CompressionKind), ps.Compression.ToString(), true); CompressionCodec codec = WriterImpl.createCodec(compressionKind); int bufferSize = (int)ps.CompressionBlockSize; bb.position(metadataAbsPos); bb.mark(); // Extract metadata and footer. OrcProto.Metadata metadata = extractMetadata( bb, metadataAbsPos, metadataSize, codec, bufferSize); List <StripeStatistics> stats = new List <StripeStatistics>(metadata.StripeStatsCount); foreach (OrcProto.StripeStatistics ss in metadata.StripeStatsList) { stats.Add(new StripeStatistics(ss.ColStatsList)); } OrcProto.Footer footer = extractFooter(bb, footerAbsPos, footerSize, codec, bufferSize); bb.position(metadataAbsPos); bb.limit(psAbsPos); // TODO: do we need footer buffer here? FileInfo/FileMetaInfo is a mess... FileMetaInfo fmi = new FileMetaInfo( compressionKind, bufferSize, metadataSize, bb, extractWriterVersion(ps)); return(new FooterInfo(stats, footer, fmi)); }
private static OrcFile.WriterVersion extractWriterVersion(OrcProto.PostScript ps) { return(ps.HasWriterVersion ? getWriterVersion((int)ps.WriterVersion) : OrcFile.WriterVersion.ORIGINAL); }
private static FileMetaInfo extractMetaInfoFromFooter(Stream file, string path, long maxFileLength) { // figure out the size of the file using the option or filesystem long size; if (maxFileLength == Int64.MaxValue) { // size = fs.getFileStatus(path).getLen(); size = file.Length; } else { size = maxFileLength; } //read last bytes into buffer to get PostScript int readSize = (int)Math.Min(size, DIRECTORY_SIZE_GUESS); ByteBuffer buffer = ByteBuffer.allocate(readSize); Debug.Assert(buffer.position() == 0); file.readFully((size - readSize), buffer.array(), buffer.arrayOffset(), readSize); buffer.position(0); //read the PostScript //get length of PostScript int psLen = buffer.get(readSize - 1) & 0xff; ensureOrcFooter(file, path, psLen, buffer); int psOffset = readSize - 1 - psLen; OrcProto.PostScript ps = extractPostScript(buffer, path, psLen, psOffset); int footerSize = (int)ps.FooterLength; int metadataSize = (int)ps.MetadataLength; OrcFile.WriterVersion writerVersion = extractWriterVersion(ps); //check if extra bytes need to be read ByteBuffer fullFooterBuffer = null; int extra = Math.Max(0, psLen + 1 + footerSize + metadataSize - readSize); if (extra > 0) { //more bytes need to be read, seek back to the right place and read extra bytes ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize); file.readFully((size - readSize - extra), extraBuf.array(), extraBuf.arrayOffset() + extraBuf.position(), extra); extraBuf.position(extra); //append with already read bytes extraBuf.put(buffer); buffer = extraBuf; buffer.position(0); fullFooterBuffer = buffer.slice(); buffer.limit(footerSize + metadataSize); } else { //footer is already in the bytes in buffer, just adjust position, length buffer.position(psOffset - footerSize - metadataSize); fullFooterBuffer = buffer.slice(); buffer.limit(psOffset); } // remember position for later buffer.mark(); CompressionKind compressionKind = (CompressionKind)Enum.Parse( typeof(CompressionKind), ps.Compression.ToString(), true); return(new FileMetaInfo( compressionKind, (int)ps.CompressionBlockSize, (int)ps.MetadataLength, buffer, ps.VersionList, writerVersion, fullFooterBuffer)); }