Beispiel #1
0
        public void testFixedDeltaOneDescending()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(int));

            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer w = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                                                       .compress(CompressionKind.NONE)
                                                       .inspector(inspector)
                                                       .rowIndexStride(0)
                                                       .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
                                                       .version(OrcFile.Version.V_0_12)))
                {
                    for (int i = 0; i < 5120; ++i)
                    {
                        w.addRow(512 - (i % 512));
                    }
                }

            using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory())
            {
                FileDump.Main(TestFilePath);

                // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
                // and 1 byte delta (delta = 1). In total, 5 bytes per run.
                Assert.True(capture.Text.Contains("Stream: column 0 section DATA start: 3 length 50"));
            }
        }
Beispiel #2
0
        public void testPatchedBase()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(int));

            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer w = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                                                       .compress(CompressionKind.NONE)
                                                       .inspector(inspector)
                                                       .rowIndexStride(0)
                                                       .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
                                                       .version(OrcFile.Version.V_0_12)))
                {
                    Random rand = new Random(123);
                    w.addRow(10000000);
                    for (int i = 0; i < 511; ++i)
                    {
                        w.addRow(rand.Next(i + 1));
                    }
                }

            using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory())
            {
                FileDump.Main(TestFilePath);

                // use PATCHED_BASE encoding
                Assert.True(capture.Text.Contains("Stream: column 0 section DATA start: 3 length 583"));
            }
        }
Beispiel #3
0
        public void testDeltaUnknownSign()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(int));

            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer w = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                                                       .compress(CompressionKind.NONE)
                                                       .inspector(inspector)
                                                       .rowIndexStride(0)
                                                       .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
                                                       .version(OrcFile.Version.V_0_12)))
                {
                    w.addRow(0);
                    for (int i = 0; i < 511; ++i)
                    {
                        w.addRow(i);
                    }
                }

            using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory())
            {
                FileDump.Main(TestFilePath);

                // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding
                // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits
                // each, 5120/8 = 640). Total bytes 642
                Assert.True(capture.Text.Contains("Stream: column 0 section DATA start: 3 length 642"));
            }
        }
Beispiel #4
0
        public void testShortRepeat()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(int));

            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer w = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                                                       .compress(CompressionKind.NONE)
                                                       .inspector(inspector)
                                                       .rowIndexStride(0)
                                                       .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
                                                       .version(OrcFile.Version.V_0_12)))
                {
                    for (int i = 0; i < 5; ++i)
                    {
                        w.addRow(10);
                    }
                }

            using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory())
            {
                FileDump.Main(TestFilePath);

                // 1 byte header + 1 byte value
                Assert.True(capture.Text.Contains("Stream: column 0 section DATA start: 3 length 2"));
            }
        }
Beispiel #5
0
        public void testDataDump()
        {
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.NONE);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Dictionary <string, string> m = new Dictionary <string, string>(2);
                    m.Add("k1", "v1");
                    writer.addRow(new AllTypesRecord(
                                      true,
                                      (sbyte)10,
                                      (short)100,
                                      1000,
                                      10000L,
                                      4.0f,
                                      20.0,
                                      HiveDecimal.Parse("4.2222"),
                                      new Timestamp(1416967764000L),
                                      new Date(1416967764000L),
                                      "string",
                                      m,
                                      new List <int> {
                        100, 200
                    },
                                      new AllTypesRecord.Struct(10, "foo")));
                    m.Clear();
                    m.Add("k3", "v3");
                    writer.addRow(new AllTypesRecord(
                                      false,
                                      (sbyte)20,
                                      (short)200,
                                      2000,
                                      20000L,
                                      8.0f,
                                      40.0,
                                      HiveDecimal.Parse("2.2222"),
                                      new Timestamp(1416967364000L),
                                      new Date(1411967764000L),
                                      "abcd",
                                      m,
                                      new List <int> {
                        200, 300
                    },
                                      new AllTypesRecord.Struct(20, "bar")));
                }
            }

            string[] lines;
            using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory())
            {
                FileDump.Main(TestFilePath, "-d");

                lines = capture.Text.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
            }
            Assert.Equal(2, lines.Length);

            // Don't be fooled by the big space in the middle, this line is quite long
            Assert.Equal("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello                                                                                                                                                                                                                                                          \",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]);
            Assert.Equal("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world                                                                                                                                                                                                                                                          \",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]);
        }
        public void testDataDump()
        {
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.NONE);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Dictionary<string, string> m = new Dictionary<string, string>(2);
                    m.Add("k1", "v1");
                    writer.addRow(new AllTypesRecord(
                        true,
                        (sbyte)10,
                        (short)100,
                        1000,
                        10000L,
                        4.0f,
                        20.0,
                        HiveDecimal.Parse("4.2222"),
                        new Timestamp(1416967764000L),
                        new Date(1416967764000L),
                        "string",
                        m,
                        new List<int> { 100, 200 },
                        new AllTypesRecord.Struct(10, "foo")));
                    m.Clear();
                    m.Add("k3", "v3");
                    writer.addRow(new AllTypesRecord(
                        false,
                        (sbyte)20,
                        (short)200,
                        2000,
                        20000L,
                        8.0f,
                        40.0,
                        HiveDecimal.Parse("2.2222"),
                        new Timestamp(1416967364000L),
                        new Date(1411967764000L),
                        "abcd",
                        m,
                        new List<int> { 200, 300 },
                        new AllTypesRecord.Struct(20, "bar")));
                }
            }

            string[] lines;
            using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory())
            {
                FileDump.Main(TestFilePath, "-d");

                lines = capture.Text.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
            }
            Assert.Equal(2, lines.Length);

            // Don't be fooled by the big space in the middle, this line is quite long
            Assert.Equal("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello                                                                                                                                                                                                                                                          \",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]);
            Assert.Equal("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world                                                                                                                                                                                                                                                          \",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]);
        }