Esempio n. 1
0
 public void testUncompressed()
 {
     OutputCollector collect = new OutputCollector();
     OutStream @out = new OutStream("test", 100, null, collect);
     PositionCollector[] positions = new PositionCollector[1024];
     for (int i = 0; i < 1024; ++i)
     {
         positions[i] = new PositionCollector();
         @out.getPosition(positions[i]);
         @out.WriteByte((byte)i);
     }
     @out.Flush();
     Assert.Equal(1024, collect.buffer.size());
     for (int i = 0; i < 1024; ++i)
     {
         Assert.Equal((byte)i, collect.buffer.get(i));
     }
     ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
     collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
     inBuf.flip();
     #pragma warning disable 612
     InStream @in = InStream.create(null, "test", new ByteBuffer[] { inBuf },
         new long[] { 0 }, inBuf.remaining(), null, 100);
     #pragma warning restore 612
     Assert.Equal("uncompressed stream test position: 0 length: 1024" +
                  " range: 0 offset: 0 limit: 0",
                  @in.ToString());
     for (int i = 0; i < 1024; ++i)
     {
         int x = @in.ReadByte();
         Assert.Equal(i & 0xff, x);
     }
     for (int i = 1023; i >= 0; --i)
     {
         @in.seek(positions[i]);
         Assert.Equal(i & 0xff, @in.ReadByte());
     }
 }
Esempio n. 2
0
 private void runTest(int numBits)
 {
     long[] inp = new long[SIZE];
     for (int i = 0; i < SIZE; i++)
     {
         long val = 0;
         if (numBits <= 32)
         {
             if (numBits == 1)
             {
                 val = -1 * rand.Next(2);
             }
             else
             {
                 int max = (numBits == 32) ? Int32.MaxValue : (int)Math.Pow(2, numBits - 1);
                 val = rand.Next(max);
             }
         }
         else
         {
             val = nextLong(rand, (long)Math.Pow(2, numBits - 2));
         }
         if (val % 2 == 0)
         {
             val = -val;
         }
         inp[i] = val;
     }
     long[] deltaEncoded = deltaEncode(inp);
     long minInput = deltaEncoded.Min();
     long maxInput = deltaEncoded.Max();
     long rangeInput = maxInput - minInput;
     SerializationUtils utils = new SerializationUtils();
     int fixedWidth = utils.findClosestNumBits(rangeInput);
     TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
     OutStream output = new OutStream("test", SIZE, null, collect);
     utils.writeInts(deltaEncoded, 0, deltaEncoded.Length, fixedWidth, output);
     output.Flush();
     ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
     collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
     inBuf.flip();
     long[] buff = new long[SIZE];
     #pragma warning disable 612
     utils.readInts(buff, 0, SIZE, fixedWidth, InStream.create(null, "test", new ByteBuffer[] { inBuf },
         new long[] { 0 }, inBuf.remaining(), null, SIZE));
     #pragma warning restore 612
     for (int i = 0; i < SIZE; i++)
     {
         buff[i] = utils.zigzagDecode(buff[i]);
     }
     Assert.Equal(numBits, fixedWidth);
     Assert.Equal(inp, buff);
 }
Esempio n. 3
0
 Stream getStream()
 {
     if (rawWriter == null)
     {
         rawWriter = baseStream;
         rawWriter.Write(OrcFile.MAGIC, 0, OrcFile.MAGIC.Length);
         headerLength = rawWriter.Position;
         writer = new OutStream("metadata", bufferSize, codec, new DirectStream(rawWriter));
         protobufWriter = CodedOutputStream.CreateInstance(writer);
     }
     return rawWriter;
 }
Esempio n. 4
0
 /**
  * Create a tree writer.
  * @param columnId the column id of the column to write
  * @param inspector the object inspector to use
  * @param schema the row schema
  * @param streamFactory limited access to the Writer's data.
  * @param nullable can the value be null?
  * @
  */
 protected TreeWriter(
     int columnId,
     ObjectInspector inspector,
     TypeDescription schema,
     StreamFactory streamFactory,
     bool nullable)
 {
     this.streamFactory = streamFactory;
     this.isCompressed = streamFactory.isCompressed();
     this.id = columnId;
     this.inspector = inspector;
     if (nullable)
     {
         isPresentOutStream = streamFactory.createStream(id,
             OrcProto.Stream.Types.Kind.PRESENT);
         isPresent = new BitFieldWriter(isPresentOutStream, 1);
     }
     else
     {
         isPresent = null;
     }
     this.foundNulls = false;
     createBloomFilter = streamFactory.getBloomFilterColumns()[columnId];
     indexStatistics = ColumnStatisticsImpl.create(schema);
     stripeColStatistics = ColumnStatisticsImpl.create(schema);
     fileStatistics = ColumnStatisticsImpl.create(schema);
     childrenWriters = new TreeWriter[0];
     rowIndex = OrcProto.RowIndex.CreateBuilder();
     rowIndexEntry = OrcProto.RowIndexEntry.CreateBuilder();
     rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry);
     stripeStatsBuilders = new List<OrcProto.StripeStatistics.Builder>();
     if (streamFactory.buildIndex())
     {
         rowIndexStream = streamFactory.createStream(id, OrcProto.Stream.Types.Kind.ROW_INDEX);
     }
     else
     {
         rowIndexStream = null;
     }
     if (createBloomFilter)
     {
         bloomFilterEntry = OrcProto.BloomFilter.CreateBuilder();
         bloomFilterIndex = OrcProto.BloomFilterIndex.CreateBuilder();
         bloomFilterStream = streamFactory.createStream(id, OrcProto.Stream.Types.Kind.BLOOM_FILTER);
         bloomFilter = new BloomFilter(streamFactory.getRowIndexStride(), streamFactory.getBloomFilterFPP());
     }
     else
     {
         bloomFilterEntry = null;
         bloomFilterIndex = null;
         bloomFilterStream = null;
         bloomFilter = null;
     }
 }
Esempio n. 5
0
 public StringBaseTreeWriter(int columnId,
                  ObjectInspector inspector,
                  TypeDescription schema,
                  StreamFactory writer,
                  bool nullable)
     : base(columnId, inspector, schema, writer, nullable)
 {
     this.isDirectV2 = isNewWriteFormat(writer);
     stringOutput = writer.createStream(id,
         OrcProto.Stream.Types.Kind.DICTIONARY_DATA);
     lengthOutput = createIntegerWriter(writer.createStream(id,
         OrcProto.Stream.Types.Kind.LENGTH), false, isDirectV2, writer);
     rowOutput = createIntegerWriter(writer.createStream(id,
         OrcProto.Stream.Types.Kind.DATA), false, isDirectV2, writer);
     recordPosition(rowIndexPosition);
     rowIndexValueCount.Add(0L);
     buildIndex = writer.buildIndex();
     directStreamOutput = writer.createStream(id, OrcProto.Stream.Types.Kind.DATA);
     directLengthOutput = createIntegerWriter(writer.createStream(id,
         OrcProto.Stream.Types.Kind.LENGTH), false, isDirectV2, writer);
     OrcFile.WriterOptions options = writer.getOptions();
     dictionaryKeySizeThreshold = options.getDictionaryKeySizeThreshold();
     strideDictionaryCheck = options.getStrideDictionaryCheck();
     doneDictionaryCheck = false;
 }
Esempio n. 6
0
 public BufferedStream(string name, int bufferSize, CompressionCodec codec)
 {
     outStream = new OutStream(name, bufferSize, codec, this);
 }
Esempio n. 7
0
        public void testUncompressedDisjointBuffers()
        {
            OutputCollector collect = new OutputCollector();
            OutStream @out = new OutStream("test", 400, null, collect);
            PositionCollector[] positions = new PositionCollector[1024];
            DataOutput stream = new DataOutputStream(@out);
            for (int i = 0; i < 1024; ++i)
            {
                positions[i] = new PositionCollector();
                @out.getPosition(positions[i]);
                stream.writeInt(i);
            }
            @out.Flush();
            Assert.Equal("test", @out.ToString());
            Assert.Equal(4096, collect.buffer.size());
            ByteBuffer[] inBuf = new ByteBuffer[3];
            inBuf[0] = ByteBuffer.allocate(1100);
            inBuf[1] = ByteBuffer.allocate(2200);
            inBuf[2] = ByteBuffer.allocate(1100);
            collect.buffer.setByteBuffer(inBuf[0], 0, 1024);
            collect.buffer.setByteBuffer(inBuf[1], 1024, 2048);
            collect.buffer.setByteBuffer(inBuf[2], 3072, 1024);

            for (int i = 0; i < inBuf.Length; ++i)
            {
                inBuf[i].flip();
            }
            InStream @in = InStream.create(null, "test", inBuf,
                new long[] { 0, 1024, 3072 }, 4096, null, 400);
            Assert.Equal("uncompressed stream test position: 0 length: 4096" +
                         " range: 0 offset: 0 limit: 0",
                         @in.ToString());
            DataInputStream inStream = new DataInputStream(@in);
            for (int i = 0; i < 1024; ++i)
            {
                int x = inStream.readInt();
                Assert.Equal(i, x);
            }
            Assert.Equal(0, @in.available());
            for (int i = 1023; i >= 0; --i)
            {
                @in.seek(positions[i]);
                Assert.Equal(i, inStream.readInt());
            }

            @in = InStream.create(null, "test", new ByteBuffer[] { inBuf[1], inBuf[2] },
                new long[] { 1024, 3072 }, 4096, null, 400);
            inStream = new DataInputStream(@in);
            positions[256].reset();
            @in.seek(positions[256]);
            for (int i = 256; i < 1024; ++i)
            {
                Assert.Equal(i, inStream.readInt());
            }

            @in = InStream.create(null, "test", new ByteBuffer[] { inBuf[0], inBuf[2] },
                new long[] { 0, 3072 }, 4096, null, 400);
            inStream = new DataInputStream(@in);
            positions[768].reset();
            for (int i = 0; i < 256; ++i)
            {
                Assert.Equal(i, inStream.readInt());
            }
            @in.seek(positions[768]);
            for (int i = 768; i < 1024; ++i)
            {
                Assert.Equal(i, inStream.readInt());
            }
        }
Esempio n. 8
0
        public void testDisjointBuffers()
        {
            OutputCollector collect = new OutputCollector();
            CompressionCodec codec = new ZlibCodec();
            OutStream @out = new OutStream("test", 400, codec, collect);
            PositionCollector[] positions = new PositionCollector[1024];
            DataOutput stream = new DataOutputStream(@out);
            for (int i = 0; i < 1024; ++i)
            {
                positions[i] = new PositionCollector();
                @out.getPosition(positions[i]);
                stream.writeInt(i);
            }
            @out.Flush();
            Assert.Equal("test", @out.ToString());
            Assert.Equal(1674, collect.buffer.size());
            ByteBuffer[] inBuf = new ByteBuffer[3];
            inBuf[0] = ByteBuffer.allocate(500);
            inBuf[1] = ByteBuffer.allocate(1200);
            inBuf[2] = ByteBuffer.allocate(500);
            collect.buffer.setByteBuffer(inBuf[0], 0, 483);
            collect.buffer.setByteBuffer(inBuf[1], 483, 1625 - 483);
            collect.buffer.setByteBuffer(inBuf[2], 1625, 1674 - 1625);

            for (int i = 0; i < inBuf.Length; ++i)
            {
                inBuf[i].flip();
            }
            InStream @in = InStream.create(null, "test", inBuf,
                new long[] { 0, 483, 1625 }, 1674, codec, 400);
            Assert.Equal("compressed stream test position: 0 length: 1674 range: 0" +
                         " offset: 0 limit: 0 range 0 = 0 to 483;" +
                         "  range 1 = 483 to 1142;  range 2 = 1625 to 49",
                         @in.ToString());
            DataInputStream inStream = new DataInputStream(@in);
            for (int i = 0; i < 1024; ++i)
            {
                int x = inStream.readInt();
                Assert.Equal(i, x);
            }
            Assert.Equal(0, @in.available());
            for (int i = 1023; i >= 0; --i)
            {
                @in.seek(positions[i]);
                Assert.Equal(i, inStream.readInt());
            }

            @in = InStream.create(null, "test", new ByteBuffer[] { inBuf[1], inBuf[2] },
                new long[] { 483, 1625 }, 1674, codec, 400);
            inStream = new DataInputStream(@in);
            positions[303].reset();
            @in.seek(positions[303]);
            for (int i = 303; i < 1024; ++i)
            {
                Assert.Equal(i, inStream.readInt());
            }

            @in = InStream.create(null, "test", new ByteBuffer[] { inBuf[0], inBuf[2] },
                new long[] { 0, 1625 }, 1674, codec, 400);
            inStream = new DataInputStream(@in);
            positions[1001].reset();
            for (int i = 0; i < 300; ++i)
            {
                Assert.Equal(i, inStream.readInt());
            }
            @in.seek(positions[1001]);
            for (int i = 1001; i < 1024; ++i)
            {
                Assert.Equal(i, inStream.readInt());
            }
        }
Esempio n. 9
0
        public void testCorruptStream()
        {
            OutputCollector collect = new OutputCollector();
            CompressionCodec codec = new ZlibCodec();
            OutStream @out = new OutStream("test", 500, codec, collect);
            PositionCollector[] positions = new PositionCollector[1024];
            for (int i = 0; i < 1024; ++i)
            {
                positions[i] = new PositionCollector();
                @out.getPosition(positions[i]);
                @out.WriteByte((byte)i);
            }
            @out.Flush();

            // now try to read the stream with a buffer that is too small
            ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
            collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
            inBuf.flip();
            InStream @in = InStream.create(null, "test", new ByteBuffer[] { inBuf },
                new long[] { 0 }, inBuf.remaining(), codec, 100);
            byte[] contents = new byte[1024];
            try
            {
                @in.Read(contents, 0, contents.Length);
                fail();
            }
            catch (IllegalArgumentException iae)
            {
                // EXPECTED
            }

            // make a corrupted header
            inBuf.clear();
            inBuf.put((byte)32);
            inBuf.put((byte)0);
            inBuf.flip();
            @in = InStream.create(null, "test2", new ByteBuffer[] { inBuf }, new long[] { 0 },
                inBuf.remaining(), codec, 300);
            try
            {
                @in.ReadByte();
                fail();
            }
            catch (InvalidOperationException ise)
            {
                // EXPECTED
            }
        }
Esempio n. 10
0
 public void testCompressed()
 {
     OutputCollector collect = new OutputCollector();
     CompressionCodec codec = new ZlibCodec();
     OutStream @out = new OutStream("test", 300, codec, collect);
     PositionCollector[] positions = new PositionCollector[1024];
     for (int i = 0; i < 1024; ++i)
     {
         positions[i] = new PositionCollector();
         @out.getPosition(positions[i]);
         @out.WriteByte((byte)i);
     }
     @out.Flush();
     Assert.Equal("test", @out.ToString());
     Assert.Equal(961, collect.buffer.size());
     ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
     collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
     inBuf.flip();
     InStream @in = InStream.create(null, "test", new ByteBuffer[] { inBuf },
         new long[] { 0 }, inBuf.remaining(), codec, 300);
     Assert.Equal("compressed stream test position: 0 length: 961 range: 0" +
                  " offset: 0 limit: 0 range 0 = 0 to 961",
                  @in.ToString());
     for (int i = 0; i < 1024; ++i)
     {
         int x = @in.ReadByte();
         Assert.Equal(i & 0xff, x);
     }
     Assert.Equal(0, @in.available());
     for (int i = 1023; i >= 0; --i)
     {
         @in.seek(positions[i]);
         Assert.Equal(i & 0xff, @in.ReadByte());
     }
 }