public int CompareTo(OrcFileValueWrapper o) { if (stripeInformation.getOffset() < o.getStripeInformation().getOffset()) { return(-1); } else if (stripeInformation.getOffset() > o.getStripeInformation().getOffset()) { return(1); } else { return(0); } }
protected bool nextStripe(OrcFileKeyWrapper keyWrapper, OrcFileValueWrapper valueWrapper) { // missing stripe stats (old format). If numRows is 0 then its an empty file and no statistics // is present. We have to differentiate no stats (empty file) vs missing stats (old format). if ((stripeStatistics == null || stripeStatistics.Count == 0) && reader.getNumberOfRows() > 0) { keyWrapper.setInputPath(path); keyWrapper.setIsIncompatFile(true); skipFile = true; return(true); } bool active = iter.MoveNext(); while (active) { StripeInformation si = iter.Current; // if stripe offset is outside the split boundary then ignore the current // stripe as it will be handled by some other mapper. if (si.getOffset() >= start && si.getOffset() < end) { valueWrapper.setStripeStatistics(stripeStatistics[stripeIdx++]); valueWrapper.setStripeInformation(si); active = iter.MoveNext(); if (!active) { valueWrapper.setLastStripeInFile(true); valueWrapper.setUserMetadata(((ReaderImpl)reader).getOrcProtoUserMetadata()); } keyWrapper.setInputPath(path); keyWrapper.setCompression(reader.getCompression()); keyWrapper.setCompressBufferSize(reader.getCompressionSize()); keyWrapper.setVersion(reader.getFileVersion()); keyWrapper.setRowIndexStride(reader.getRowIndexStride()); keyWrapper.setTypes(reader.getTypes()); } else { stripeIdx++; continue; } return(true); } return(false); }
private static void writeStripeInformation(JsonWriter writer, StripeInformation stripe) { writer.newObject(); writer.key("offset").value(stripe.getOffset()); writer.key("indexLength").value(stripe.getIndexLength()); writer.key("dataLength").value(stripe.getDataLength()); writer.key("footerLength").value(stripe.getFooterLength()); writer.key("rowCount").value(stripe.getNumberOfRows()); writer.endObject(); }
public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) { long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(); int tailLength = (int)stripe.getFooterLength(); // read the footer ByteBuffer tailBuf = ByteBuffer.allocate(tailLength); file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength); return(OrcProto.StripeFooter.ParseFrom(InStream.createCodedInputStream(null, "footer", new List <DiskRange> { new RecordReaderImpl.BufferChunk(tailBuf, 0) }, tailLength, codec, bufferSize))); }
public RecordReaderImpl.Index readRowIndex(StripeInformation stripe, OrcProto.StripeFooter footer, bool[] included, OrcProto.RowIndex[] indexes, bool[] sargColumns, OrcProto.BloomFilterIndex[] bloomFilterIndices) { if (footer == null) { footer = readStripeFooter(stripe); } if (indexes == null) { indexes = new OrcProto.RowIndex[typeCount]; } if (bloomFilterIndices == null) { bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount]; } long offset = stripe.getOffset(); IList<OrcProto.Stream> streams = footer.StreamsList; for (int i = 0; i < streams.Count; i++) { OrcProto.Stream stream = streams[i]; OrcProto.Stream nextStream = null; if (i < streams.Count - 1) { nextStream = streams[i + 1]; } int col = (int)stream.Column; int len = (int)stream.Length; // row index stream and bloom filter are interlaced, check if the sarg column contains bloom // filter and combine the io to read row index and bloom filters for that column together if (stream.HasKind && (stream.Kind == OrcProto.Stream.Types.Kind.ROW_INDEX)) { bool readBloomFilter = false; if (sargColumns != null && sargColumns[col] && nextStream.Kind == OrcProto.Stream.Types.Kind.BLOOM_FILTER) { len += (int)nextStream.Length; i += 1; readBloomFilter = true; } if ((included == null || included[col]) && indexes[col] == null) { byte[] buffer = new byte[len]; file.readFully(offset, buffer, 0, buffer.Length); ByteBuffer bb = ByteBuffer.wrap(buffer); indexes[col] = OrcProto.RowIndex.ParseFrom(InStream.create(null, "index", new List<DiskRange> { new RecordReaderImpl.BufferChunk(bb, 0) }, (long)stream.Length, codec, bufferSize)); if (readBloomFilter) { bb.position((int)stream.Length); bloomFilterIndices[col] = OrcProto.BloomFilterIndex.ParseFrom(InStream.create( null, "bloom_filter", new List<DiskRange> { new RecordReaderImpl.BufferChunk(bb, 0) }, (long)nextStream.Length, codec, bufferSize)); } } } offset += len; } RecordReaderImpl.Index index = new RecordReaderImpl.Index(indexes, bloomFilterIndices); return index; }
public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) { long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(); int tailLength = (int)stripe.getFooterLength(); // read the footer ByteBuffer tailBuf = ByteBuffer.allocate(tailLength); file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength); return OrcProto.StripeFooter.ParseFrom(InStream.createCodedInputStream(null, "footer", new List<DiskRange> { new RecordReaderImpl.BufferChunk(tailBuf, 0) }, tailLength, codec, bufferSize)); }
public RecordReaderImpl.Index readRowIndex(StripeInformation stripe, OrcProto.StripeFooter footer, bool[] included, OrcProto.RowIndex[] indexes, bool[] sargColumns, OrcProto.BloomFilterIndex[] bloomFilterIndices) { if (footer == null) { footer = readStripeFooter(stripe); } if (indexes == null) { indexes = new OrcProto.RowIndex[typeCount]; } if (bloomFilterIndices == null) { bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount]; } long offset = stripe.getOffset(); IList <OrcProto.Stream> streams = footer.StreamsList; for (int i = 0; i < streams.Count; i++) { OrcProto.Stream stream = streams[i]; OrcProto.Stream nextStream = null; if (i < streams.Count - 1) { nextStream = streams[i + 1]; } int col = (int)stream.Column; int len = (int)stream.Length; // row index stream and bloom filter are interlaced, check if the sarg column contains bloom // filter and combine the io to read row index and bloom filters for that column together if (stream.HasKind && (stream.Kind == OrcProto.Stream.Types.Kind.ROW_INDEX)) { bool readBloomFilter = false; if (sargColumns != null && sargColumns[col] && nextStream.Kind == OrcProto.Stream.Types.Kind.BLOOM_FILTER) { len += (int)nextStream.Length; i += 1; readBloomFilter = true; } if ((included == null || included[col]) && indexes[col] == null) { byte[] buffer = new byte[len]; file.readFully(offset, buffer, 0, buffer.Length); ByteBuffer bb = ByteBuffer.wrap(buffer); indexes[col] = OrcProto.RowIndex.ParseFrom(InStream.create(null, "index", new List <DiskRange> { new RecordReaderImpl.BufferChunk(bb, 0) }, (long)stream.Length, codec, bufferSize)); if (readBloomFilter) { bb.position((int)stream.Length); bloomFilterIndices[col] = OrcProto.BloomFilterIndex.ParseFrom(InStream.create( null, "bloom_filter", new List <DiskRange> { new RecordReaderImpl.BufferChunk(bb, 0) }, (long)nextStream.Length, codec, bufferSize)); } } } offset += len; } RecordReaderImpl.Index index = new RecordReaderImpl.Index(indexes, bloomFilterIndices); return(index); }