Пример #1
0
        public void testDump()
        {
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.ZLIB);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1 = new Random(1);
                    for (int i = 0; i < 21000; ++i)
                    {
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(),
                                                   TestHelpers.words[r1.Next(TestHelpers.words.Length)]));
                    }
                }
            }

            string outputFilename = "orc-file-dump.out";

            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=1,2,3" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
Пример #2
0
        public void SimpleTest()
        {
            OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), new Configuration());
            options.inspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
            using (Stream file = File.Create(filename))
                using (Writer writer = OrcFile.createWriter(filename, file, options))
                {
                    writer.addRow("hello");
                }

            Reader reader = OrcFile.createReader(() => File.OpenRead(filename), filename);

            using (RecordReader recordReader = reader.rows())
            {
                object value = recordReader.next();
                Assert.True(value is string);
                Assert.Equal("hello", value);
            }
        }
Пример #3
0
        public void testDictionaryThreshold()
        {
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            // conf.setFloat(HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, 0.49f);
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.ZLIB);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1      = new Random(1);
                    int    nextInt = 0;
                    for (int i = 0; i < 21000; ++i)
                    {
                        // Write out the same string twice, this guarantees the fraction of rows with
                        // distinct strings is 0.5
                        if (i % 2 == 0)
                        {
                            nextInt = r1.Next(TestHelpers.words.Length);
                            // Append the value of i to the word, this guarantees when an index or word is repeated
                            // the actual string is unique.
                            TestHelpers.words[nextInt] += "-" + i;
                        }
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), TestHelpers.words[nextInt]));
                    }
                }
            }

            string outputFilename = "orc-file-dump-dictionary-threshold.out";

            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=1,2,3" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
Пример #4
0
        public void testJsonDump()
        {
            ObjectInspector inspector;

            inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord));
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
                                            .inspector(inspector)
                                            .stripeSize(100000)
                                            .compress(CompressionKind.ZLIB)
                                            .bufferSize(10000)
                                            .rowIndexStride(1000)
                                            .bloomFilterColumns("s");
            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1 = new Random(1);
                    for (int i = 0; i < 21000; ++i)
                    {
                        if (i % 100 == 0)
                        {
                            writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), null));
                        }
                        else
                        {
                            writer.addRow(new MyRecord(r1.Next(), r1.NextLong(),
                                                       TestHelpers.words[r1.Next(TestHelpers.words.Length)]));
                        }
                    }
                }

            const string outputFilename = "orc-file-dump.json";

            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "-j", "-p", "--rowindex=3" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
Пример #5
0
        public void testDataDump()
        {
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.NONE);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Dictionary <string, string> m = new Dictionary <string, string>(2);
                    m.Add("k1", "v1");
                    writer.addRow(new AllTypesRecord(
                                      true,
                                      (sbyte)10,
                                      (short)100,
                                      1000,
                                      10000L,
                                      4.0f,
                                      20.0,
                                      HiveDecimal.Parse("4.2222"),
                                      new Timestamp(1416967764000L),
                                      new Date(1416967764000L),
                                      "string",
                                      m,
                                      new List <int> {
                        100, 200
                    },
                                      new AllTypesRecord.Struct(10, "foo")));
                    m.Clear();
                    m.Add("k3", "v3");
                    writer.addRow(new AllTypesRecord(
                                      false,
                                      (sbyte)20,
                                      (short)200,
                                      2000,
                                      20000L,
                                      8.0f,
                                      40.0,
                                      HiveDecimal.Parse("2.2222"),
                                      new Timestamp(1416967364000L),
                                      new Date(1411967764000L),
                                      "abcd",
                                      m,
                                      new List <int> {
                        200, 300
                    },
                                      new AllTypesRecord.Struct(20, "bar")));
                }
            }

            string[] lines;
            using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory())
            {
                FileDump.Main(TestFilePath, "-d");

                lines = capture.Text.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
            }
            Assert.Equal(2, lines.Length);

            // Don't be fooled by the big space in the middle, this line is quite long
            Assert.Equal("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello                                                                                                                                                                                                                                                          \",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]);
            Assert.Equal("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world                                                                                                                                                                                                                                                          \",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]);
        }