Example #1
0
        private static OrcProto.PostScript extractPostScript(ByteBuffer bb, string path,
                                                             int psLen, int psAbsOffset)
        {
            // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here.
            Debug.Assert(bb.hasArray());
            CodedInputStream @in = CodedInputStream.CreateInstance(
                bb.array(), bb.arrayOffset() + psAbsOffset, psLen);

            OrcProto.PostScript ps = OrcProto.PostScript.ParseFrom(@in);
            checkOrcVersion(LOG, path, ps.VersionList.flip());

            // Check compression codec.
            switch (ps.Compression)
            {
            case OrcProto.CompressionKind.NONE:
            case OrcProto.CompressionKind.ZLIB:
            case OrcProto.CompressionKind.SNAPPY:
            case OrcProto.CompressionKind.LZO:
                break;

            default:
                throw new ArgumentException("Unknown compression");
            }
            return(ps);
        }
Example #2
0
        /** Extracts the necessary metadata from an externally store buffer (fullFooterBuffer). */
        public static FooterInfo extractMetaInfoFromFooter(ByteBuffer bb, string srcPath)
        {
            // Read the PostScript. Be very careful as some parts of this historically use bb position
            // and some use absolute offsets that have to take position into account.
            int baseOffset     = bb.position();
            int lastByteAbsPos = baseOffset + bb.remaining() - 1;
            int psLen          = bb.get(lastByteAbsPos) & 0xff;
            int psAbsPos       = lastByteAbsPos - psLen;

            OrcProto.PostScript ps = extractPostScript(bb, srcPath, psLen, psAbsPos);
            Debug.Assert(baseOffset == bb.position());

            // Extract PS information.
            int footerSize = (int)ps.FooterLength, metadataSize = (int)ps.MetadataLength,
                footerAbsPos = psAbsPos - footerSize, metadataAbsPos = footerAbsPos - metadataSize;
            CompressionKind  compressionKind = (CompressionKind)Enum.Parse(typeof(CompressionKind), ps.Compression.ToString(), true);
            CompressionCodec codec           = WriterImpl.createCodec(compressionKind);
            int bufferSize                   = (int)ps.CompressionBlockSize;

            bb.position(metadataAbsPos);
            bb.mark();

            // Extract metadata and footer.
            OrcProto.Metadata metadata = extractMetadata(
                bb, metadataAbsPos, metadataSize, codec, bufferSize);
            List <StripeStatistics> stats = new List <StripeStatistics>(metadata.StripeStatsCount);

            foreach (OrcProto.StripeStatistics ss in metadata.StripeStatsList)
            {
                stats.Add(new StripeStatistics(ss.ColStatsList));
            }
            OrcProto.Footer footer = extractFooter(bb, footerAbsPos, footerSize, codec, bufferSize);
            bb.position(metadataAbsPos);
            bb.limit(psAbsPos);
            // TODO: do we need footer buffer here? FileInfo/FileMetaInfo is a mess...
            FileMetaInfo fmi = new FileMetaInfo(
                compressionKind, bufferSize, metadataSize, bb, extractWriterVersion(ps));

            return(new FooterInfo(stats, footer, fmi));
        }
Example #3
0
 private static OrcFile.WriterVersion extractWriterVersion(OrcProto.PostScript ps)
 {
     return(ps.HasWriterVersion
         ? getWriterVersion((int)ps.WriterVersion) : OrcFile.WriterVersion.ORIGINAL);
 }
Example #4
0
        private static FileMetaInfo extractMetaInfoFromFooter(Stream file, string path, long maxFileLength)
        {
            // figure out the size of the file using the option or filesystem
            long size;

            if (maxFileLength == Int64.MaxValue)
            {
                // size = fs.getFileStatus(path).getLen();
                size = file.Length;
            }
            else
            {
                size = maxFileLength;
            }

            //read last bytes into buffer to get PostScript
            int        readSize = (int)Math.Min(size, DIRECTORY_SIZE_GUESS);
            ByteBuffer buffer   = ByteBuffer.allocate(readSize);

            Debug.Assert(buffer.position() == 0);
            file.readFully((size - readSize),
                           buffer.array(), buffer.arrayOffset(), readSize);
            buffer.position(0);

            //read the PostScript
            //get length of PostScript
            int psLen = buffer.get(readSize - 1) & 0xff;

            ensureOrcFooter(file, path, psLen, buffer);
            int psOffset = readSize - 1 - psLen;

            OrcProto.PostScript ps = extractPostScript(buffer, path, psLen, psOffset);

            int footerSize   = (int)ps.FooterLength;
            int metadataSize = (int)ps.MetadataLength;

            OrcFile.WriterVersion writerVersion = extractWriterVersion(ps);

            //check if extra bytes need to be read
            ByteBuffer fullFooterBuffer = null;
            int        extra            = Math.Max(0, psLen + 1 + footerSize + metadataSize - readSize);

            if (extra > 0)
            {
                //more bytes need to be read, seek back to the right place and read extra bytes
                ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
                file.readFully((size - readSize - extra), extraBuf.array(),
                               extraBuf.arrayOffset() + extraBuf.position(), extra);
                extraBuf.position(extra);
                //append with already read bytes
                extraBuf.put(buffer);
                buffer = extraBuf;
                buffer.position(0);
                fullFooterBuffer = buffer.slice();
                buffer.limit(footerSize + metadataSize);
            }
            else
            {
                //footer is already in the bytes in buffer, just adjust position, length
                buffer.position(psOffset - footerSize - metadataSize);
                fullFooterBuffer = buffer.slice();
                buffer.limit(psOffset);
            }

            // remember position for later
            buffer.mark();

            CompressionKind compressionKind = (CompressionKind)Enum.Parse(
                typeof(CompressionKind), ps.Compression.ToString(), true);

            return(new FileMetaInfo(
                       compressionKind,
                       (int)ps.CompressionBlockSize,
                       (int)ps.MetadataLength,
                       buffer,
                       ps.VersionList,
                       writerVersion,
                       fullFooterBuffer));
        }