public void testDump() { // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION"); using (Stream file = File.OpenWrite(TestFilePath)) { OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf); options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord))); options.stripeSize(100000); options.compress(CompressionKind.ZLIB); options.bufferSize(10000); options.rowIndexStride(1000); using (Writer writer = OrcFile.createWriter(TestFilePath, file, options)) { Random r1 = new Random(1); for (int i = 0; i < 21000; ++i) { writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), TestHelpers.words[r1.Next(TestHelpers.words.Length)])); } } } string outputFilename = "orc-file-dump.out"; using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename))) { FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=1,2,3" }); } TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename)); }
public void testBloomFilter() { // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION"); using (Stream file = File.OpenWrite(TestFilePath)) { OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf); options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord))); options.stripeSize(100000); options.compress(CompressionKind.ZLIB); options.bufferSize(10000); options.rowIndexStride(1000); options.bloomFilterColumns("S"); using (Writer writer = OrcFile.createWriter(TestFilePath, file, options)) { Random r1 = new Random(1); for (int i = 0; i < 21000; ++i) { writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), TestHelpers.words[r1.Next(TestHelpers.words.Length)])); } } } string outputFilename = "orc-file-dump-bloomfilter.out"; using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename))) { FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=3" }); } TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename)); }
public void testDictionaryThreshold() { // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION"); // conf.setFloat(HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, 0.49f); using (Stream file = File.OpenWrite(TestFilePath)) { OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf); options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord))); options.stripeSize(100000); options.compress(CompressionKind.ZLIB); options.bufferSize(10000); options.rowIndexStride(1000); using (Writer writer = OrcFile.createWriter(TestFilePath, file, options)) { Random r1 = new Random(1); int nextInt = 0; for (int i = 0; i < 21000; ++i) { // Write out the same string twice, this guarantees the fraction of rows with // distinct strings is 0.5 if (i % 2 == 0) { nextInt = r1.Next(TestHelpers.words.Length); // Append the value of i to the word, this guarantees when an index or word is repeated // the actual string is unique. TestHelpers.words[nextInt] += "-" + i; } writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), TestHelpers.words[nextInt])); } } } string outputFilename = "orc-file-dump-dictionary-threshold.out"; using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename))) { FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=1,2,3" }); } TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename)); }
public void testJsonDump() { ObjectInspector inspector; inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)); // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION"); OrcFile.WriterOptions options = OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .compress(CompressionKind.ZLIB) .bufferSize(10000) .rowIndexStride(1000) .bloomFilterColumns("s"); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, options)) { Random r1 = new Random(1); for (int i = 0; i < 21000; ++i) { if (i % 100 == 0) { writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), null)); } else { writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), TestHelpers.words[r1.Next(TestHelpers.words.Length)])); } } } const string outputFilename = "orc-file-dump.json"; using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename))) { FileDump.Main(new string[] { TestFilePath.ToString(), "-j", "-p", "--rowindex=3" }); } TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename)); }