Exemplo n.º 1
0
        public static void addRgFilteredStreamToRanges(OrcProto.Stream stream,
                                                       bool[] includedRowGroups, bool isCompressed, OrcProto.RowIndex index,
                                                       OrcProto.ColumnEncoding encoding, OrcProto.Type type, int compressionSize, bool hasNull,
                                                       long offset, long length, DiskRangeList.CreateHelper list, bool doMergeBuffers)
        {
            for (int group = 0; group < includedRowGroups.Length; ++group)
            {
                if (!includedRowGroups[group])
                {
                    continue;
                }
                int posn = getIndexPosition(
                    encoding.Kind, type.Kind, stream.Kind, isCompressed, hasNull);
                long start = (long)index.EntryList[group].PositionsList[posn];
                long nextGroupOffset;
                bool isLast = group == (includedRowGroups.Length - 1);
                nextGroupOffset = isLast ? length : (int)index.EntryList[group + 1].PositionsList[posn];

                start += offset;
                long end = offset + estimateRgEndOffset(
                    isCompressed, isLast, nextGroupOffset, length, compressionSize);
                list.addOrMerge(start, end, doMergeBuffers, true);
            }
        }
        public RecordReaderImpl.Index readRowIndex(StripeInformation stripe,
                                                   OrcProto.StripeFooter footer, bool[] included, OrcProto.RowIndex[] indexes,
                                                   bool[] sargColumns, OrcProto.BloomFilterIndex[] bloomFilterIndices)
        {
            if (footer == null)
            {
                footer = readStripeFooter(stripe);
            }
            if (indexes == null)
            {
                indexes = new OrcProto.RowIndex[typeCount];
            }
            if (bloomFilterIndices == null)
            {
                bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
            }
            long offset = stripe.getOffset();
            IList <OrcProto.Stream> streams = footer.StreamsList;

            for (int i = 0; i < streams.Count; i++)
            {
                OrcProto.Stream stream     = streams[i];
                OrcProto.Stream nextStream = null;
                if (i < streams.Count - 1)
                {
                    nextStream = streams[i + 1];
                }
                int col = (int)stream.Column;
                int len = (int)stream.Length;
                // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
                // filter and combine the io to read row index and bloom filters for that column together
                if (stream.HasKind && (stream.Kind == OrcProto.Stream.Types.Kind.ROW_INDEX))
                {
                    bool readBloomFilter = false;
                    if (sargColumns != null && sargColumns[col] &&
                        nextStream.Kind == OrcProto.Stream.Types.Kind.BLOOM_FILTER)
                    {
                        len            += (int)nextStream.Length;
                        i              += 1;
                        readBloomFilter = true;
                    }
                    if ((included == null || included[col]) && indexes[col] == null)
                    {
                        byte[] buffer = new byte[len];
                        file.readFully(offset, buffer, 0, buffer.Length);
                        ByteBuffer bb = ByteBuffer.wrap(buffer);
                        indexes[col] = OrcProto.RowIndex.ParseFrom(InStream.create(null, "index",
                                                                                   new List <DiskRange> {
                            new RecordReaderImpl.BufferChunk(bb, 0)
                        },
                                                                                   (long)stream.Length, codec, bufferSize));
                        if (readBloomFilter)
                        {
                            bb.position((int)stream.Length);
                            bloomFilterIndices[col] = OrcProto.BloomFilterIndex.ParseFrom(InStream.create(
                                                                                              null, "bloom_filter", new List <DiskRange> {
                                new RecordReaderImpl.BufferChunk(bb, 0)
                            },
                                                                                              (long)nextStream.Length, codec, bufferSize));
                        }
                    }
                }
                offset += len;
            }

            RecordReaderImpl.Index index = new RecordReaderImpl.Index(indexes, bloomFilterIndices);
            return(index);
        }