Exemplo n.º 1
0
 /** Ctor used when creating file info during init and when getting a new one. */
 public FileMetaInfo(CompressionKind compressionKind, int bufferSize, int metadataSize,
     ByteBuffer footerBuffer, IList<uint> versionList, OrcFile.WriterVersion writerVersion,
     ByteBuffer fullFooterBuffer)
 {
     this.compressionKind = compressionKind;
     this.bufferSize = bufferSize;
     this.metadataSize = metadataSize;
     this.footerBuffer = footerBuffer;
     this.versionList = versionList;
     this.writerVersion = writerVersion;
     this.footerMetaAndPsBuffer = fullFooterBuffer;
 }
Exemplo n.º 2
0
 /** Ctor used when creating file info during init and when getting a new one. */
 public FileMetaInfo(CompressionKind compressionKind, int bufferSize, int metadataSize,
                     ByteBuffer footerBuffer, IList <uint> versionList, OrcFile.WriterVersion writerVersion,
                     ByteBuffer fullFooterBuffer)
 {
     this.compressionKind       = compressionKind;
     this.bufferSize            = bufferSize;
     this.metadataSize          = metadataSize;
     this.footerBuffer          = footerBuffer;
     this.versionList           = versionList;
     this.writerVersion         = writerVersion;
     this.footerMetaAndPsBuffer = fullFooterBuffer;
 }
Exemplo n.º 3
0
        public void readFields(DataInput @in)
        {
            //deserialize path, offset, length using FileSplit
            base.readFields(@in);

            byte flags = @in.readByte();

            hasFooter  = (FOOTER_FLAG & flags) != 0;
            isOriginal = (ORIGINAL_FLAG & flags) != 0;
            hasBase    = (BASE_FLAG & flags) != 0;
            bool hasFileId = (HAS_FILEID_FLAG & flags) != 0;

            deltas.Clear();
            int numDeltas = @in.readInt();

            for (int i = 0; i < numDeltas; i++)
            {
                AcidInputFormat.DeltaMetaData dmd = new AcidInputFormat.DeltaMetaData();
                dmd.readFields(@in);
                deltas.Add(dmd);
            }
            if (hasFooter)
            {
                // deserialize FileMetaInfo fields
                string compressionType = Text.readString(@in);
                int    bufferSize      = WritableUtils.readVInt(@in);
                int    metadataSize    = WritableUtils.readVInt(@in);

                // deserialize FileMetaInfo field footer
                int        footerBuffSize = WritableUtils.readVInt(@in);
                ByteBuffer footerBuff     = ByteBuffer.allocate(footerBuffSize);
                @in.readFully(footerBuff.array(), 0, footerBuffSize);
                OrcFile.WriterVersion writerVersion =
                    ReaderImpl.getWriterVersion(WritableUtils.readVInt(@in));

                fileMetaInfo = new FileMetaInfo(compressionType, bufferSize,
                                                metadataSize, footerBuff, writerVersion);
            }
            if (hasFileId)
            {
                fileId = @in.readLong();
            }
        }
Exemplo n.º 4
0
 /** Ctor used when reading splits - no version list or full footer buffer. */
 public FileMetaInfo(CompressionKind compressionKind, int bufferSize, int metadataSize,
                     ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion)
     : this(compressionKind, bufferSize, metadataSize, footerBuffer, null,
            writerVersion, null)
 {
 }
Exemplo n.º 5
0
        private static FileMetaInfo extractMetaInfoFromFooter(Stream file, string path, long maxFileLength)
        {
            // figure out the size of the file using the option or filesystem
            long size;

            if (maxFileLength == Int64.MaxValue)
            {
                // size = fs.getFileStatus(path).getLen();
                size = file.Length;
            }
            else
            {
                size = maxFileLength;
            }

            //read last bytes into buffer to get PostScript
            int        readSize = (int)Math.Min(size, DIRECTORY_SIZE_GUESS);
            ByteBuffer buffer   = ByteBuffer.allocate(readSize);

            Debug.Assert(buffer.position() == 0);
            file.readFully((size - readSize),
                           buffer.array(), buffer.arrayOffset(), readSize);
            buffer.position(0);

            //read the PostScript
            //get length of PostScript
            int psLen = buffer.get(readSize - 1) & 0xff;

            ensureOrcFooter(file, path, psLen, buffer);
            int psOffset = readSize - 1 - psLen;

            OrcProto.PostScript ps = extractPostScript(buffer, path, psLen, psOffset);

            int footerSize   = (int)ps.FooterLength;
            int metadataSize = (int)ps.MetadataLength;

            OrcFile.WriterVersion writerVersion = extractWriterVersion(ps);

            //check if extra bytes need to be read
            ByteBuffer fullFooterBuffer = null;
            int        extra            = Math.Max(0, psLen + 1 + footerSize + metadataSize - readSize);

            if (extra > 0)
            {
                //more bytes need to be read, seek back to the right place and read extra bytes
                ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
                file.readFully((size - readSize - extra), extraBuf.array(),
                               extraBuf.arrayOffset() + extraBuf.position(), extra);
                extraBuf.position(extra);
                //append with already read bytes
                extraBuf.put(buffer);
                buffer = extraBuf;
                buffer.position(0);
                fullFooterBuffer = buffer.slice();
                buffer.limit(footerSize + metadataSize);
            }
            else
            {
                //footer is already in the bytes in buffer, just adjust position, length
                buffer.position(psOffset - footerSize - metadataSize);
                fullFooterBuffer = buffer.slice();
                buffer.limit(psOffset);
            }

            // remember position for later
            buffer.mark();

            CompressionKind compressionKind = (CompressionKind)Enum.Parse(
                typeof(CompressionKind), ps.Compression.ToString(), true);

            return(new FileMetaInfo(
                       compressionKind,
                       (int)ps.CompressionBlockSize,
                       (int)ps.MetadataLength,
                       buffer,
                       ps.VersionList,
                       writerVersion,
                       fullFooterBuffer));
        }
Exemplo n.º 6
0
        /**
         * Constructor that let's the user specify additional options.
         * @param path pathname for file
         * @param options options for reading
         * @
         */
        public ReaderImpl(Func <Stream> streamCreator, string path, OrcFile.ReaderOptions options)
        {
            this.streamCreator = streamCreator;
            this.path          = path;
            this.conf          = options.getConfiguration();

            FileMetadata fileMetadata = options.getFileMetadata();

            if (fileMetadata != null)
            {
                this.compressionKind       = fileMetadata.getCompressionKind();
                this.bufferSize            = fileMetadata.getCompressionBufferSize();
                this.codec                 = WriterImpl.createCodec(compressionKind);
                this.metadataSize          = fileMetadata.getMetadataSize();
                this.stripeStats           = fileMetadata.getStripeStats();
                this.versionList           = fileMetadata.getVersionList();
                this.writerVersion         = OrcFile.WriterVersionHelpers.from(fileMetadata.getWriterVersionNum());
                this.types                 = fileMetadata.getTypes();
                this.rowIndexStride        = fileMetadata.getRowIndexStride();
                this.contentLength         = fileMetadata.getContentLength();
                this.numberOfRows          = fileMetadata.getNumberOfRows();
                this.fileStats             = fileMetadata.getFileStats();
                this.stripes               = fileMetadata.getStripes();
                this.inspector             = OrcStruct.createObjectInspector(0, fileMetadata.getTypes());
                this.footerByteBuffer      = null; // not cached and not needed here
                this.userMetadata          = null; // not cached and not needed here
                this.footerMetaAndPsBuffer = null;
            }
            else
            {
                FileMetaInfo footerMetaData;
                if (options.getFileMetaInfo() != null)
                {
                    footerMetaData             = options.getFileMetaInfo();
                    this.footerMetaAndPsBuffer = null;
                }
                else
                {
                    using (Stream file = streamCreator())
                    {
                        footerMetaData             = extractMetaInfoFromFooter(file, path, options.getMaxLength());
                        this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
                    }
                }
                MetaInfoObjExtractor rInfo =
                    new MetaInfoObjExtractor(footerMetaData.compressionKind,
                                             footerMetaData.bufferSize,
                                             footerMetaData.metadataSize,
                                             footerMetaData.footerBuffer
                                             );
                this.footerByteBuffer = footerMetaData.footerBuffer;
                this.compressionKind  = rInfo.compressionKind;
                this.codec            = rInfo.codec;
                this.bufferSize       = rInfo.bufferSize;
                this.metadataSize     = rInfo.metadataSize;
                this.stripeStats      = rInfo.metadata.StripeStatsList;
                this.types            = rInfo.footer.TypesList;
                this.rowIndexStride   = (int)rInfo.footer.RowIndexStride;
                this.contentLength    = (int)rInfo.footer.ContentLength;
                this.numberOfRows     = (int)rInfo.footer.NumberOfRows;
                this.userMetadata     = rInfo.footer.MetadataList;
                this.fileStats        = rInfo.footer.StatisticsList;
                this.inspector        = rInfo.inspector;
                this.versionList      = footerMetaData.versionList.Select(v => (int)v).ToList();
                this.writerVersion    = footerMetaData.writerVersion;
                this.stripes          = convertProtoStripesToStripes(rInfo.footer.StripesList);
            }
        }