예제 #1
0
        public void testBloomFilter2()
        {
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.ZLIB);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                options.bloomFilterColumns("l");
                options.bloomFilterFpp(0.01);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1 = new Random(1);
                    for (int i = 0; i < 21000; ++i)
                    {
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(),
                            TestHelpers.words[r1.Next(TestHelpers.words.Length)]));
                    }
                }
            }

            string outputFilename = "orc-file-dump-bloomfilter2.out";
            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=2" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
예제 #2
0
        public void testDictionaryThreshold()
        {
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            // conf.setFloat(HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, 0.49f);
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.ZLIB);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1 = new Random(1);
                    int nextInt = 0;
                    for (int i = 0; i < 21000; ++i)
                    {
                        // Write out the same string twice, this guarantees the fraction of rows with
                        // distinct strings is 0.5
                        if (i % 2 == 0)
                        {
                            nextInt = r1.Next(TestHelpers.words.Length);
                            // Append the value of i to the word, this guarantees when an index or word is repeated
                            // the actual string is unique.
                            TestHelpers.words[nextInt] += "-" + i;
                        }
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), TestHelpers.words[nextInt]));
                    }
                }
            }

            string outputFilename = "orc-file-dump-dictionary-threshold.out";
            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=1,2,3" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
예제 #3
0
        public void testDataDump()
        {
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.NONE);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Dictionary<string, string> m = new Dictionary<string, string>(2);
                    m.Add("k1", "v1");
                    writer.addRow(new AllTypesRecord(
                        true,
                        (sbyte)10,
                        (short)100,
                        1000,
                        10000L,
                        4.0f,
                        20.0,
                        HiveDecimal.Parse("4.2222"),
                        new Timestamp(1416967764000L),
                        new Date(1416967764000L),
                        "string",
                        m,
                        new List<int> { 100, 200 },
                        new AllTypesRecord.Struct(10, "foo")));
                    m.Clear();
                    m.Add("k3", "v3");
                    writer.addRow(new AllTypesRecord(
                        false,
                        (sbyte)20,
                        (short)200,
                        2000,
                        20000L,
                        8.0f,
                        40.0,
                        HiveDecimal.Parse("2.2222"),
                        new Timestamp(1416967364000L),
                        new Date(1411967764000L),
                        "abcd",
                        m,
                        new List<int> { 200, 300 },
                        new AllTypesRecord.Struct(20, "bar")));
                }
            }

            string[] lines;
            using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory())
            {
                FileDump.Main(TestFilePath, "-d");

                lines = capture.Text.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
            }
            Assert.Equal(2, lines.Length);

            // Don't be fooled by the big space in the middle, this line is quite long
            Assert.Equal("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello                                                                                                                                                                                                                                                          \",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]);
            Assert.Equal("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world                                                                                                                                                                                                                                                          \",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]);
        }