public void RowIndex_ShouldMatchExpected() { var helper = new ProtocolHelper("demo-12-zlib.orc"); var postscriptLength = helper.GetPostscriptLength(); var postscriptStream = helper.GetPostscriptStream(postscriptLength); var postScript = Serializer.Deserialize <PostScript>(postscriptStream); var footerLength = postScript.FooterLength; var footerStreamCompressed = helper.GetFooterCompressedStream(postscriptLength, footerLength); var footerStream = OrcCompressedStream.GetDecompressingStream(footerStreamCompressed, CompressionKind.Zlib); var footer = Serializer.Deserialize <Footer>(footerStream); var stripeDetails = footer.Stripes[0]; var streamFooterStreamCompressed = helper.GetStripeFooterCompressedStream(stripeDetails.Offset, stripeDetails.IndexLength, stripeDetails.DataLength, stripeDetails.FooterLength); var stripeFooterStream = OrcCompressedStream.GetDecompressingStream(streamFooterStreamCompressed, CompressionKind.Zlib); var stripeFooter = Serializer.Deserialize <StripeFooter>(stripeFooterStream); var offset = stripeDetails.Offset; foreach (var stream in stripeFooter.Streams) { if (stream.Kind == StreamKind.RowIndex) { var rowIndexStreamCompressed = helper.GetRowIndexCompressedStream(offset, stream.Length); var rowIndexStream = OrcCompressedStream.GetDecompressingStream(rowIndexStreamCompressed, CompressionKind.Zlib); var rowIndex = Serializer.Deserialize <RowIndex>(rowIndexStream); } offset += stream.Length; } }
Protocol.Footer ReadFooter(Protocol.PostScript postScript, byte postScriptLength) { _inputStream.Seek(-1 - postScriptLength - (long)postScript.FooterLength, SeekOrigin.End); var compressedStream = new StreamSegment(_inputStream, (long)postScript.FooterLength, true); var footerStream = OrcCompressedStream.GetDecompressingStream(compressedStream, postScript.Compression); return(Serializer.Deserialize <Protocol.Footer>(footerStream)); }
Stream GetStream(ulong offset, ulong length) { //TODO move from using Streams to using MemoryMapped files or another data type that decouples the Stream Position from the Read call, allowing re-entrancy _inputStream.Seek((long)offset, SeekOrigin.Begin); var segment = new StreamSegment(_inputStream, (long)length, true); return(OrcCompressedStream.GetDecompressingStream(segment, _compressionKind)); }
private Metadata ReadMetadata(PostScript postScript, byte postScriptLength) { var offset = -1 - postScriptLength - (long)postScript.FooterLength - (long)postScript.MetadataLength; _inputStream.Seek(offset, SeekOrigin.End); var compressedStream = new StreamSegment(_inputStream, (long)postScript.MetadataLength, true); var metadataStream = OrcCompressedStream.GetDecompressingStream(compressedStream, postScript.Compression); return(Serializer.Deserialize <Metadata>(metadataStream)); }
public void Metadata_ShouldMatchExpected() { var helper = new ProtocolHelper("demo-12-zlib.orc"); var postscriptLength = helper.GetPostscriptLength(); var postscriptStream = helper.GetPostscriptStream(postscriptLength); var postScript = Serializer.Deserialize <PostScript>(postscriptStream); var footerLength = postScript.FooterLength; var metadataLength = postScript.MetadataLength; var metadataStreamCompressed = helper.GetMetadataCompressedStream(postscriptLength, footerLength, metadataLength); var metadataStream = OrcCompressedStream.GetDecompressingStream(metadataStreamCompressed, CompressionKind.Zlib); var metadata = Serializer.Deserialize <Metadata>(metadataStream); Assert.Single(metadata.StripeStats); Assert.Equal(10, metadata.StripeStats[0].ColStats.Count); }
void StripeFooter_ShouldMatchExpected() { var helper = new ProtocolHelper("demo-12-zlib.orc"); var postscriptLength = helper.GetPostscriptLength(); var postscriptStream = helper.GetPostscriptStream(postscriptLength); var postScript = Serializer.Deserialize <PostScript>(postscriptStream); var footerLength = postScript.FooterLength; var footerStreamCompressed = helper.GetFooterCompressedStream(postscriptLength, footerLength); var footerStream = OrcCompressedStream.GetDecompressingStream(footerStreamCompressed, CompressionKind.Zlib); var footer = Serializer.Deserialize <Footer>(footerStream); var stripeDetails = footer.Stripes[0]; var streamFooterStreamCompressed = helper.GetStripeFooterCompressedStream(stripeDetails.Offset, stripeDetails.IndexLength, stripeDetails.DataLength, stripeDetails.FooterLength); var stripeFooterStream = OrcCompressedStream.GetDecompressingStream(streamFooterStreamCompressed, CompressionKind.Zlib); var stripeFooter = Serializer.Deserialize <StripeFooter>(stripeFooterStream); Assert.Equal(10, stripeFooter.Columns.Count); Assert.Equal(27, stripeFooter.Streams.Count); }
public void Footer_ShouldMatchExpected() { var helper = new ProtocolHelper("demo-12-zlib.orc"); var postscriptLength = helper.GetPostscriptLength(); var postscriptStream = helper.GetPostscriptStream(postscriptLength); var postScript = Serializer.Deserialize <PostScript>(postscriptStream); var footerLength = postScript.FooterLength; var footerStreamCompressed = helper.GetFooterCompressedStream(postscriptLength, footerLength); var footerStream = OrcCompressedStream.GetDecompressingStream(footerStreamCompressed, CompressionKind.Zlib); var footer = Serializer.Deserialize <Footer>(footerStream); Assert.Equal(1920800ul, footer.NumberOfRows); Assert.Equal(1, footer.Stripes.Count); Assert.Equal(45592ul, footer.ContentLength); Assert.Equal(10000u, footer.RowIndexStride); Assert.Equal(1920800ul, footer.Stripes[0].NumberOfRows); Assert.Equal(3ul, footer.Stripes[0].Offset); Assert.Equal(14035ul, footer.Stripes[0].IndexLength); Assert.Equal(31388ul, footer.Stripes[0].DataLength); Assert.Equal(166ul, footer.Stripes[0].FooterLength); }
public void ReadIntData() { var helper = new ProtocolHelper("demo-12-zlib.orc"); var postscriptLength = helper.GetPostscriptLength(); var postscriptStream = helper.GetPostscriptStream(postscriptLength); var postScript = Serializer.Deserialize <PostScript>(postscriptStream); var footerLength = postScript.FooterLength; var footerStreamCompressed = helper.GetFooterCompressedStream(postscriptLength, footerLength); var footerStream = OrcCompressedStream.GetDecompressingStream(footerStreamCompressed, CompressionKind.Zlib); var footer = Serializer.Deserialize <Footer>(footerStream); var stripeDetails = footer.Stripes[0]; var streamFooterStreamCompressed = helper.GetStripeFooterCompressedStream(stripeDetails.Offset, stripeDetails.IndexLength, stripeDetails.DataLength, stripeDetails.FooterLength); var stripeFooterStream = OrcCompressedStream.GetDecompressingStream(streamFooterStreamCompressed, CompressionKind.Zlib); var stripeFooter = Serializer.Deserialize <StripeFooter>(stripeFooterStream); var offset = stripeDetails.Offset; foreach (var stream in stripeFooter.Streams) { var columnInFooter = footer.Types[(int)stream.Column]; var columnInStripe = stripeFooter.Columns[(int)stream.Column]; if (columnInFooter.Kind == ColumnTypeKind.Int) { if (stream.Kind == StreamKind.Data) { Assert.Equal(ColumnEncodingKind.DirectV2, columnInStripe.Kind); var dataStreamCompressed = helper.GetDataCompressedStream(offset, stream.Length); var dataStream = OrcCompressedStream.GetDecompressingStream(dataStreamCompressed, CompressionKind.Zlib); var reader = new IntegerRunLengthEncodingV2Reader(dataStream, true); var result = reader.Read().ToArray(); for (int i = 0; i < result.Length; i++) { if (stream.Column == 1) { var expected = i + 1; Assert.Equal(expected, result[i]); } else if (stream.Column == 5) { var expected = ((i / 70) * 500) % 10000 + 500; Assert.Equal(expected, result[i]); } else if (stream.Column == 7) { var expected = (i / 5600) % 7; Assert.Equal(expected, result[i]); } else if (stream.Column == 8) { var expected = (i / 39200) % 7; Assert.Equal(expected, result[i]); } else if (stream.Column == 9) { var expected = (i / 274400); Assert.Equal(expected, result[i]); } else { Assert.True(false, "Unexpected column"); } } } } offset += stream.Length; } }