Beispiel #1
0
        public void testDump()
        {
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.ZLIB);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1 = new Random(1);
                    for (int i = 0; i < 21000; ++i)
                    {
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(),
                                                   TestHelpers.words[r1.Next(TestHelpers.words.Length)]));
                    }
                }
            }

            string outputFilename = "orc-file-dump.out";

            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=1,2,3" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
        public void testBloomFilter()
        {
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.ZLIB);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                options.bloomFilterColumns("S");
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1 = new Random(1);
                    for (int i = 0; i < 21000; ++i)
                    {
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(),
                            TestHelpers.words[r1.Next(TestHelpers.words.Length)]));
                    }
                }
            }

            string outputFilename = "orc-file-dump-bloomfilter.out";
            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=3" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
Beispiel #3
0
        public void testDictionaryThreshold()
        {
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            // conf.setFloat(HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, 0.49f);
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.ZLIB);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1      = new Random(1);
                    int    nextInt = 0;
                    for (int i = 0; i < 21000; ++i)
                    {
                        // Write out the same string twice, this guarantees the fraction of rows with
                        // distinct strings is 0.5
                        if (i % 2 == 0)
                        {
                            nextInt = r1.Next(TestHelpers.words.Length);
                            // Append the value of i to the word, this guarantees when an index or word is repeated
                            // the actual string is unique.
                            TestHelpers.words[nextInt] += "-" + i;
                        }
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), TestHelpers.words[nextInt]));
                    }
                }
            }

            string outputFilename = "orc-file-dump-dictionary-threshold.out";

            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=1,2,3" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
Beispiel #4
0
        public void testJsonDump()
        {
            ObjectInspector inspector;

            inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord));
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
                                            .inspector(inspector)
                                            .stripeSize(100000)
                                            .compress(CompressionKind.ZLIB)
                                            .bufferSize(10000)
                                            .rowIndexStride(1000)
                                            .bloomFilterColumns("s");
            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1 = new Random(1);
                    for (int i = 0; i < 21000; ++i)
                    {
                        if (i % 100 == 0)
                        {
                            writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), null));
                        }
                        else
                        {
                            writer.addRow(new MyRecord(r1.Next(), r1.NextLong(),
                                                       TestHelpers.words[r1.Next(TestHelpers.words.Length)]));
                        }
                    }
                }

            const string outputFilename = "orc-file-dump.json";

            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "-j", "-p", "--rowindex=3" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
        public void testJsonDump()
        {
            ObjectInspector inspector;
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord));
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
                .inspector(inspector)
                .stripeSize(100000)
                .compress(CompressionKind.ZLIB)
                .bufferSize(10000)
                .rowIndexStride(1000)
                .bloomFilterColumns("s");
            using (Stream file = File.OpenWrite(TestFilePath))
            using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
            {
                Random r1 = new Random(1);
                for (int i = 0; i < 21000; ++i)
                {
                    if (i % 100 == 0)
                    {
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), null));
                    }
                    else
                    {
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(),
                            TestHelpers.words[r1.Next(TestHelpers.words.Length)]));
                    }
                }
            }

            const string outputFilename = "orc-file-dump.json";
            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "-j", "-p", "--rowindex=3" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
        public void testDictionaryThreshold()
        {
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            // conf.setFloat(HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, 0.49f);
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.ZLIB);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1 = new Random(1);
                    int nextInt = 0;
                    for (int i = 0; i < 21000; ++i)
                    {
                        // Write out the same string twice, this guarantees the fraction of rows with
                        // distinct strings is 0.5
                        if (i % 2 == 0)
                        {
                            nextInt = r1.Next(TestHelpers.words.Length);
                            // Append the value of i to the word, this guarantees when an index or word is repeated
                            // the actual string is unique.
                            TestHelpers.words[nextInt] += "-" + i;
                        }
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), TestHelpers.words[nextInt]));
                    }
                }
            }

            string outputFilename = "orc-file-dump-dictionary-threshold.out";
            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=1,2,3" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }