Exemple #1
0
            public MetaInfoObjExtractor(CompressionKind compressionKind, int bufferSize, int metadataSize,
                                        ByteBuffer footerBuffer)
            {
                this.compressionKind = compressionKind;
                this.bufferSize      = bufferSize;
                this.codec           = WriterImpl.createCodec(compressionKind);
                this.metadataSize    = metadataSize;

                int position         = footerBuffer.position();
                int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;

                this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize);
                this.footer   = extractFooter(
                    footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize);

                footerBuffer.position(position);
                this.inspector = OrcStruct.createObjectInspector(0, footer.TypesList);
            }
Exemple #2
0
        /** Extracts the necessary metadata from an externally store buffer (fullFooterBuffer). */
        public static FooterInfo extractMetaInfoFromFooter(ByteBuffer bb, string srcPath)
        {
            // Read the PostScript. Be very careful as some parts of this historically use bb position
            // and some use absolute offsets that have to take position into account.
            int baseOffset     = bb.position();
            int lastByteAbsPos = baseOffset + bb.remaining() - 1;
            int psLen          = bb.get(lastByteAbsPos) & 0xff;
            int psAbsPos       = lastByteAbsPos - psLen;

            OrcProto.PostScript ps = extractPostScript(bb, srcPath, psLen, psAbsPos);
            Debug.Assert(baseOffset == bb.position());

            // Extract PS information.
            int footerSize = (int)ps.FooterLength, metadataSize = (int)ps.MetadataLength,
                footerAbsPos = psAbsPos - footerSize, metadataAbsPos = footerAbsPos - metadataSize;
            CompressionKind  compressionKind = (CompressionKind)Enum.Parse(typeof(CompressionKind), ps.Compression.ToString(), true);
            CompressionCodec codec           = WriterImpl.createCodec(compressionKind);
            int bufferSize                   = (int)ps.CompressionBlockSize;

            bb.position(metadataAbsPos);
            bb.mark();

            // Extract metadata and footer.
            OrcProto.Metadata metadata = extractMetadata(
                bb, metadataAbsPos, metadataSize, codec, bufferSize);
            List <StripeStatistics> stats = new List <StripeStatistics>(metadata.StripeStatsCount);

            foreach (OrcProto.StripeStatistics ss in metadata.StripeStatsList)
            {
                stats.Add(new StripeStatistics(ss.ColStatsList));
            }
            OrcProto.Footer footer = extractFooter(bb, footerAbsPos, footerSize, codec, bufferSize);
            bb.position(metadataAbsPos);
            bb.limit(psAbsPos);
            // TODO: do we need footer buffer here? FileInfo/FileMetaInfo is a mess...
            FileMetaInfo fmi = new FileMetaInfo(
                compressionKind, bufferSize, metadataSize, bb, extractWriterVersion(ps));

            return(new FooterInfo(stats, footer, fmi));
        }
Exemple #3
0
        /**
         * Constructor that let's the user specify additional options.
         * @param path pathname for file
         * @param options options for reading
         * @
         */
        public ReaderImpl(Func <Stream> streamCreator, string path, OrcFile.ReaderOptions options)
        {
            this.streamCreator = streamCreator;
            this.path          = path;
            this.conf          = options.getConfiguration();

            FileMetadata fileMetadata = options.getFileMetadata();

            if (fileMetadata != null)
            {
                this.compressionKind       = fileMetadata.getCompressionKind();
                this.bufferSize            = fileMetadata.getCompressionBufferSize();
                this.codec                 = WriterImpl.createCodec(compressionKind);
                this.metadataSize          = fileMetadata.getMetadataSize();
                this.stripeStats           = fileMetadata.getStripeStats();
                this.versionList           = fileMetadata.getVersionList();
                this.writerVersion         = OrcFile.WriterVersionHelpers.from(fileMetadata.getWriterVersionNum());
                this.types                 = fileMetadata.getTypes();
                this.rowIndexStride        = fileMetadata.getRowIndexStride();
                this.contentLength         = fileMetadata.getContentLength();
                this.numberOfRows          = fileMetadata.getNumberOfRows();
                this.fileStats             = fileMetadata.getFileStats();
                this.stripes               = fileMetadata.getStripes();
                this.inspector             = OrcStruct.createObjectInspector(0, fileMetadata.getTypes());
                this.footerByteBuffer      = null; // not cached and not needed here
                this.userMetadata          = null; // not cached and not needed here
                this.footerMetaAndPsBuffer = null;
            }
            else
            {
                FileMetaInfo footerMetaData;
                if (options.getFileMetaInfo() != null)
                {
                    footerMetaData             = options.getFileMetaInfo();
                    this.footerMetaAndPsBuffer = null;
                }
                else
                {
                    using (Stream file = streamCreator())
                    {
                        footerMetaData             = extractMetaInfoFromFooter(file, path, options.getMaxLength());
                        this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
                    }
                }
                MetaInfoObjExtractor rInfo =
                    new MetaInfoObjExtractor(footerMetaData.compressionKind,
                                             footerMetaData.bufferSize,
                                             footerMetaData.metadataSize,
                                             footerMetaData.footerBuffer
                                             );
                this.footerByteBuffer = footerMetaData.footerBuffer;
                this.compressionKind  = rInfo.compressionKind;
                this.codec            = rInfo.codec;
                this.bufferSize       = rInfo.bufferSize;
                this.metadataSize     = rInfo.metadataSize;
                this.stripeStats      = rInfo.metadata.StripeStatsList;
                this.types            = rInfo.footer.TypesList;
                this.rowIndexStride   = (int)rInfo.footer.RowIndexStride;
                this.contentLength    = (int)rInfo.footer.ContentLength;
                this.numberOfRows     = (int)rInfo.footer.NumberOfRows;
                this.userMetadata     = rInfo.footer.MetadataList;
                this.fileStats        = rInfo.footer.StatisticsList;
                this.inspector        = rInfo.inspector;
                this.versionList      = footerMetaData.versionList.Select(v => (int)v).ToList();
                this.writerVersion    = footerMetaData.writerVersion;
                this.stripes          = convertProtoStripesToStripes(rInfo.footer.StripesList);
            }
        }