Example #1
0
        public void RowIndex_ShouldMatchExpected()
        {
            var helper                 = new ProtocolHelper("demo-12-zlib.orc");
            var postscriptLength       = helper.GetPostscriptLength();
            var postscriptStream       = helper.GetPostscriptStream(postscriptLength);
            var postScript             = Serializer.Deserialize <PostScript>(postscriptStream);
            var footerLength           = postScript.FooterLength;
            var footerStreamCompressed = helper.GetFooterCompressedStream(postscriptLength, footerLength);
            var footerStream           = OrcCompressedStream.GetDecompressingStream(footerStreamCompressed, CompressionKind.Zlib);
            var footer                 = Serializer.Deserialize <Footer>(footerStream);

            var stripeDetails = footer.Stripes[0];
            var streamFooterStreamCompressed = helper.GetStripeFooterCompressedStream(stripeDetails.Offset, stripeDetails.IndexLength, stripeDetails.DataLength, stripeDetails.FooterLength);
            var stripeFooterStream           = OrcCompressedStream.GetDecompressingStream(streamFooterStreamCompressed, CompressionKind.Zlib);
            var stripeFooter = Serializer.Deserialize <StripeFooter>(stripeFooterStream);

            var offset = stripeDetails.Offset;

            foreach (var stream in stripeFooter.Streams)
            {
                if (stream.Kind == StreamKind.RowIndex)
                {
                    var rowIndexStreamCompressed = helper.GetRowIndexCompressedStream(offset, stream.Length);
                    var rowIndexStream           = OrcCompressedStream.GetDecompressingStream(rowIndexStreamCompressed, CompressionKind.Zlib);
                    var rowIndex = Serializer.Deserialize <RowIndex>(rowIndexStream);
                }

                offset += stream.Length;
            }
        }
Example #2
0
        Protocol.Footer ReadFooter(Protocol.PostScript postScript, byte postScriptLength)
        {
            _inputStream.Seek(-1 - postScriptLength - (long)postScript.FooterLength, SeekOrigin.End);
            var compressedStream = new StreamSegment(_inputStream, (long)postScript.FooterLength, true);
            var footerStream     = OrcCompressedStream.GetDecompressingStream(compressedStream, postScript.Compression);

            return(Serializer.Deserialize <Protocol.Footer>(footerStream));
        }
Example #3
0
        Stream GetStream(ulong offset, ulong length)
        {
            //TODO move from using Streams to using MemoryMapped files or another data type that decouples the Stream Position from the Read call, allowing re-entrancy
            _inputStream.Seek((long)offset, SeekOrigin.Begin);
            var segment = new StreamSegment(_inputStream, (long)length, true);

            return(OrcCompressedStream.GetDecompressingStream(segment, _compressionKind));
        }
Example #4
0
        private Metadata ReadMetadata(PostScript postScript, byte postScriptLength)
        {
            var offset = -1 - postScriptLength - (long)postScript.FooterLength - (long)postScript.MetadataLength;

            _inputStream.Seek(offset, SeekOrigin.End);
            var compressedStream = new StreamSegment(_inputStream, (long)postScript.MetadataLength, true);
            var metadataStream   = OrcCompressedStream.GetDecompressingStream(compressedStream, postScript.Compression);

            return(Serializer.Deserialize <Metadata>(metadataStream));
        }
Example #5
0
        public void Metadata_ShouldMatchExpected()
        {
            var helper                   = new ProtocolHelper("demo-12-zlib.orc");
            var postscriptLength         = helper.GetPostscriptLength();
            var postscriptStream         = helper.GetPostscriptStream(postscriptLength);
            var postScript               = Serializer.Deserialize <PostScript>(postscriptStream);
            var footerLength             = postScript.FooterLength;
            var metadataLength           = postScript.MetadataLength;
            var metadataStreamCompressed = helper.GetMetadataCompressedStream(postscriptLength, footerLength, metadataLength);
            var metadataStream           = OrcCompressedStream.GetDecompressingStream(metadataStreamCompressed, CompressionKind.Zlib);
            var metadata                 = Serializer.Deserialize <Metadata>(metadataStream);

            Assert.Single(metadata.StripeStats);
            Assert.Equal(10, metadata.StripeStats[0].ColStats.Count);
        }
Example #6
0
        void StripeFooter_ShouldMatchExpected()
        {
            var helper                 = new ProtocolHelper("demo-12-zlib.orc");
            var postscriptLength       = helper.GetPostscriptLength();
            var postscriptStream       = helper.GetPostscriptStream(postscriptLength);
            var postScript             = Serializer.Deserialize <PostScript>(postscriptStream);
            var footerLength           = postScript.FooterLength;
            var footerStreamCompressed = helper.GetFooterCompressedStream(postscriptLength, footerLength);
            var footerStream           = OrcCompressedStream.GetDecompressingStream(footerStreamCompressed, CompressionKind.Zlib);
            var footer                 = Serializer.Deserialize <Footer>(footerStream);

            var stripeDetails = footer.Stripes[0];
            var streamFooterStreamCompressed = helper.GetStripeFooterCompressedStream(stripeDetails.Offset, stripeDetails.IndexLength, stripeDetails.DataLength, stripeDetails.FooterLength);
            var stripeFooterStream           = OrcCompressedStream.GetDecompressingStream(streamFooterStreamCompressed, CompressionKind.Zlib);
            var stripeFooter = Serializer.Deserialize <StripeFooter>(stripeFooterStream);

            Assert.Equal(10, stripeFooter.Columns.Count);
            Assert.Equal(27, stripeFooter.Streams.Count);
        }
Example #7
0
        public void Footer_ShouldMatchExpected()
        {
            var helper                 = new ProtocolHelper("demo-12-zlib.orc");
            var postscriptLength       = helper.GetPostscriptLength();
            var postscriptStream       = helper.GetPostscriptStream(postscriptLength);
            var postScript             = Serializer.Deserialize <PostScript>(postscriptStream);
            var footerLength           = postScript.FooterLength;
            var footerStreamCompressed = helper.GetFooterCompressedStream(postscriptLength, footerLength);
            var footerStream           = OrcCompressedStream.GetDecompressingStream(footerStreamCompressed, CompressionKind.Zlib);
            var footer                 = Serializer.Deserialize <Footer>(footerStream);

            Assert.Equal(1920800ul, footer.NumberOfRows);
            Assert.Equal(1, footer.Stripes.Count);
            Assert.Equal(45592ul, footer.ContentLength);
            Assert.Equal(10000u, footer.RowIndexStride);

            Assert.Equal(1920800ul, footer.Stripes[0].NumberOfRows);
            Assert.Equal(3ul, footer.Stripes[0].Offset);
            Assert.Equal(14035ul, footer.Stripes[0].IndexLength);
            Assert.Equal(31388ul, footer.Stripes[0].DataLength);
            Assert.Equal(166ul, footer.Stripes[0].FooterLength);
        }
Example #8
0
        public void ReadIntData()
        {
            var helper                 = new ProtocolHelper("demo-12-zlib.orc");
            var postscriptLength       = helper.GetPostscriptLength();
            var postscriptStream       = helper.GetPostscriptStream(postscriptLength);
            var postScript             = Serializer.Deserialize <PostScript>(postscriptStream);
            var footerLength           = postScript.FooterLength;
            var footerStreamCompressed = helper.GetFooterCompressedStream(postscriptLength, footerLength);
            var footerStream           = OrcCompressedStream.GetDecompressingStream(footerStreamCompressed, CompressionKind.Zlib);
            var footer                 = Serializer.Deserialize <Footer>(footerStream);

            var stripeDetails = footer.Stripes[0];
            var streamFooterStreamCompressed = helper.GetStripeFooterCompressedStream(stripeDetails.Offset, stripeDetails.IndexLength, stripeDetails.DataLength, stripeDetails.FooterLength);
            var stripeFooterStream           = OrcCompressedStream.GetDecompressingStream(streamFooterStreamCompressed, CompressionKind.Zlib);
            var stripeFooter = Serializer.Deserialize <StripeFooter>(stripeFooterStream);

            var offset = stripeDetails.Offset;

            foreach (var stream in stripeFooter.Streams)
            {
                var columnInFooter = footer.Types[(int)stream.Column];
                var columnInStripe = stripeFooter.Columns[(int)stream.Column];
                if (columnInFooter.Kind == ColumnTypeKind.Int)
                {
                    if (stream.Kind == StreamKind.Data)
                    {
                        Assert.Equal(ColumnEncodingKind.DirectV2, columnInStripe.Kind);

                        var dataStreamCompressed = helper.GetDataCompressedStream(offset, stream.Length);
                        var dataStream           = OrcCompressedStream.GetDecompressingStream(dataStreamCompressed, CompressionKind.Zlib);
                        var reader = new IntegerRunLengthEncodingV2Reader(dataStream, true);
                        var result = reader.Read().ToArray();

                        for (int i = 0; i < result.Length; i++)
                        {
                            if (stream.Column == 1)
                            {
                                var expected = i + 1;
                                Assert.Equal(expected, result[i]);
                            }
                            else if (stream.Column == 5)
                            {
                                var expected = ((i / 70) * 500) % 10000 + 500;
                                Assert.Equal(expected, result[i]);
                            }
                            else if (stream.Column == 7)
                            {
                                var expected = (i / 5600) % 7;
                                Assert.Equal(expected, result[i]);
                            }
                            else if (stream.Column == 8)
                            {
                                var expected = (i / 39200) % 7;
                                Assert.Equal(expected, result[i]);
                            }
                            else if (stream.Column == 9)
                            {
                                var expected = (i / 274400);
                                Assert.Equal(expected, result[i]);
                            }
                            else
                            {
                                Assert.True(false, "Unexpected column");
                            }
                        }
                    }
                }

                offset += stream.Length;
            }
        }