Exemplo n.º 1
0
 public int CompareTo(OrcFileValueWrapper o)
 {
     if (stripeInformation.getOffset() < o.getStripeInformation().getOffset())
     {
         return(-1);
     }
     else if (stripeInformation.getOffset() > o.getStripeInformation().getOffset())
     {
         return(1);
     }
     else
     {
         return(0);
     }
 }
        protected bool nextStripe(OrcFileKeyWrapper keyWrapper, OrcFileValueWrapper valueWrapper)
        {
            // missing stripe stats (old format). If numRows is 0 then its an empty file and no statistics
            // is present. We have to differentiate no stats (empty file) vs missing stats (old format).
            if ((stripeStatistics == null || stripeStatistics.Count == 0) && reader.getNumberOfRows() > 0)
            {
                keyWrapper.setInputPath(path);
                keyWrapper.setIsIncompatFile(true);
                skipFile = true;
                return(true);
            }

            bool active = iter.MoveNext();

            while (active)
            {
                StripeInformation si = iter.Current;

                // if stripe offset is outside the split boundary then ignore the current
                // stripe as it will be handled by some other mapper.
                if (si.getOffset() >= start && si.getOffset() < end)
                {
                    valueWrapper.setStripeStatistics(stripeStatistics[stripeIdx++]);
                    valueWrapper.setStripeInformation(si);
                    active = iter.MoveNext();
                    if (!active)
                    {
                        valueWrapper.setLastStripeInFile(true);
                        valueWrapper.setUserMetadata(((ReaderImpl)reader).getOrcProtoUserMetadata());
                    }
                    keyWrapper.setInputPath(path);
                    keyWrapper.setCompression(reader.getCompression());
                    keyWrapper.setCompressBufferSize(reader.getCompressionSize());
                    keyWrapper.setVersion(reader.getFileVersion());
                    keyWrapper.setRowIndexStride(reader.getRowIndexStride());
                    keyWrapper.setTypes(reader.getTypes());
                }
                else
                {
                    stripeIdx++;
                    continue;
                }
                return(true);
            }

            return(false);
        }
Exemplo n.º 3
0
 private static void writeStripeInformation(JsonWriter writer, StripeInformation stripe)
 {
     writer.newObject();
     writer.key("offset").value(stripe.getOffset());
     writer.key("indexLength").value(stripe.getIndexLength());
     writer.key("dataLength").value(stripe.getDataLength());
     writer.key("footerLength").value(stripe.getFooterLength());
     writer.key("rowCount").value(stripe.getNumberOfRows());
     writer.endObject();
 }
        public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe)
        {
            long offset     = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
            int  tailLength = (int)stripe.getFooterLength();

            // read the footer
            ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);

            file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
            return(OrcProto.StripeFooter.ParseFrom(InStream.createCodedInputStream(null, "footer",
                                                                                   new List <DiskRange> {
                new RecordReaderImpl.BufferChunk(tailBuf, 0)
            },
                                                                                   tailLength, codec, bufferSize)));
        }
        public RecordReaderImpl.Index readRowIndex(StripeInformation stripe,
            OrcProto.StripeFooter footer, bool[] included, OrcProto.RowIndex[] indexes,
            bool[] sargColumns, OrcProto.BloomFilterIndex[] bloomFilterIndices)
        {
            if (footer == null)
            {
                footer = readStripeFooter(stripe);
            }
            if (indexes == null)
            {
                indexes = new OrcProto.RowIndex[typeCount];
            }
            if (bloomFilterIndices == null)
            {
                bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
            }
            long offset = stripe.getOffset();
            IList<OrcProto.Stream> streams = footer.StreamsList;
            for (int i = 0; i < streams.Count; i++)
            {
                OrcProto.Stream stream = streams[i];
                OrcProto.Stream nextStream = null;
                if (i < streams.Count - 1)
                {
                    nextStream = streams[i + 1];
                }
                int col = (int)stream.Column;
                int len = (int)stream.Length;
                // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
                // filter and combine the io to read row index and bloom filters for that column together
                if (stream.HasKind && (stream.Kind == OrcProto.Stream.Types.Kind.ROW_INDEX))
                {
                    bool readBloomFilter = false;
                    if (sargColumns != null && sargColumns[col] &&
                        nextStream.Kind == OrcProto.Stream.Types.Kind.BLOOM_FILTER)
                    {
                        len += (int)nextStream.Length;
                        i += 1;
                        readBloomFilter = true;
                    }
                    if ((included == null || included[col]) && indexes[col] == null)
                    {
                        byte[] buffer = new byte[len];
                        file.readFully(offset, buffer, 0, buffer.Length);
                        ByteBuffer bb = ByteBuffer.wrap(buffer);
                        indexes[col] = OrcProto.RowIndex.ParseFrom(InStream.create(null, "index",
                            new List<DiskRange> { new RecordReaderImpl.BufferChunk(bb, 0) },
                            (long)stream.Length, codec, bufferSize));
                        if (readBloomFilter)
                        {
                            bb.position((int)stream.Length);
                            bloomFilterIndices[col] = OrcProto.BloomFilterIndex.ParseFrom(InStream.create(
                                null, "bloom_filter", new List<DiskRange> { new RecordReaderImpl.BufferChunk(bb, 0) },
                                (long)nextStream.Length, codec, bufferSize));
                        }
                    }
                }
                offset += len;
            }

            RecordReaderImpl.Index index = new RecordReaderImpl.Index(indexes, bloomFilterIndices);
            return index;
        }
        public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe)
        {
            long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
            int tailLength = (int)stripe.getFooterLength();

            // read the footer
            ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
            file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
            return OrcProto.StripeFooter.ParseFrom(InStream.createCodedInputStream(null, "footer",
                new List<DiskRange> { new RecordReaderImpl.BufferChunk(tailBuf, 0) },
                tailLength, codec, bufferSize));
        }
        public RecordReaderImpl.Index readRowIndex(StripeInformation stripe,
                                                   OrcProto.StripeFooter footer, bool[] included, OrcProto.RowIndex[] indexes,
                                                   bool[] sargColumns, OrcProto.BloomFilterIndex[] bloomFilterIndices)
        {
            if (footer == null)
            {
                footer = readStripeFooter(stripe);
            }
            if (indexes == null)
            {
                indexes = new OrcProto.RowIndex[typeCount];
            }
            if (bloomFilterIndices == null)
            {
                bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
            }
            long offset = stripe.getOffset();
            IList <OrcProto.Stream> streams = footer.StreamsList;

            for (int i = 0; i < streams.Count; i++)
            {
                OrcProto.Stream stream     = streams[i];
                OrcProto.Stream nextStream = null;
                if (i < streams.Count - 1)
                {
                    nextStream = streams[i + 1];
                }
                int col = (int)stream.Column;
                int len = (int)stream.Length;
                // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
                // filter and combine the io to read row index and bloom filters for that column together
                if (stream.HasKind && (stream.Kind == OrcProto.Stream.Types.Kind.ROW_INDEX))
                {
                    bool readBloomFilter = false;
                    if (sargColumns != null && sargColumns[col] &&
                        nextStream.Kind == OrcProto.Stream.Types.Kind.BLOOM_FILTER)
                    {
                        len            += (int)nextStream.Length;
                        i              += 1;
                        readBloomFilter = true;
                    }
                    if ((included == null || included[col]) && indexes[col] == null)
                    {
                        byte[] buffer = new byte[len];
                        file.readFully(offset, buffer, 0, buffer.Length);
                        ByteBuffer bb = ByteBuffer.wrap(buffer);
                        indexes[col] = OrcProto.RowIndex.ParseFrom(InStream.create(null, "index",
                                                                                   new List <DiskRange> {
                            new RecordReaderImpl.BufferChunk(bb, 0)
                        },
                                                                                   (long)stream.Length, codec, bufferSize));
                        if (readBloomFilter)
                        {
                            bb.position((int)stream.Length);
                            bloomFilterIndices[col] = OrcProto.BloomFilterIndex.ParseFrom(InStream.create(
                                                                                              null, "bloom_filter", new List <DiskRange> {
                                new RecordReaderImpl.BufferChunk(bb, 0)
                            },
                                                                                              (long)nextStream.Length, codec, bufferSize));
                        }
                    }
                }
                offset += len;
            }

            RecordReaderImpl.Index index = new RecordReaderImpl.Index(indexes, bloomFilterIndices);
            return(index);
        }
Exemplo n.º 8
0
 private static void writeStripeInformation(JsonWriter writer, StripeInformation stripe)
 {
     writer.newObject();
     writer.key("offset").value(stripe.getOffset());
     writer.key("indexLength").value(stripe.getIndexLength());
     writer.key("dataLength").value(stripe.getDataLength());
     writer.key("footerLength").value(stripe.getFooterLength());
     writer.key("rowCount").value(stripe.getNumberOfRows());
     writer.endObject();
 }