public void testBloomFilter2()
        {
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.ZLIB);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                options.bloomFilterColumns("l");
                options.bloomFilterFpp(0.01);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1 = new Random(1);
                    for (int i = 0; i < 21000; ++i)
                    {
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(),
                            TestHelpers.words[r1.Next(TestHelpers.words.Length)]));
                    }
                }
            }

            string outputFilename = "orc-file-dump-bloomfilter2.out";
            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=2" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
Beispiel #2
0
        public void SimpleTest()
        {
            OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), new Configuration());
            options.inspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
            using (Stream file = File.Create(filename))
            using (Writer writer = OrcFile.createWriter(filename, file, options))
            {
                writer.addRow("hello");
            }

            Reader reader = OrcFile.createReader(() => File.OpenRead(filename), filename);
            using (RecordReader recordReader = reader.rows())
            {
                object value = recordReader.next();
                Assert.True(value is string);
                Assert.Equal("hello", value);
            }
        }
        OrcRecordUpdater(Path path,
                         AcidOutputFormat.Options options)
        {
            this.options = options;
            this.bucket.set(options.getBucket());
            this.path = AcidUtils.createFilename(path, options);
            FileSystem fs = options.getFilesystem();

            if (fs == null)
            {
                fs = path.getFileSystem(options.getConfiguration());
            }
            this.fs = fs;
            try
            {
                FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false);
                strm.writeInt(ORC_ACID_VERSION);
                strm.close();
            }
            catch (IOException ioe)
            {
                if (LOG.isDebugEnabled())
                {
                    LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " +
                              ioe);
                }
            }
            if (options.getMinimumTransactionId() != options.getMaximumTransactionId() &&
                !options.isWritingBase())
            {
                flushLengths = fs.create(getSideFile(this.path), true, 8,
                                         options.getReporter());
            }
            else
            {
                flushLengths = null;
            }
            OrcFile.WriterOptions writerOptions = null;
            if (options is OrcOptions)
            {
                writerOptions = ((OrcOptions)options).getOrcOptions();
            }
            if (writerOptions == null)
            {
                writerOptions = OrcFile.writerOptions( /* options.getTableProperties(), */
                    options.getConfiguration());
            }
            writerOptions.fileSystem(fs).callback(indexBuilder);
            if (!options.isWritingBase())
            {
                writerOptions.blockPadding(false);
                writerOptions.bufferSize(DELTA_BUFFER_SIZE);
                writerOptions.stripeSize(DELTA_STRIPE_SIZE);
            }
            rowInspector = (StructObjectInspector)options.getInspector();
            writerOptions.inspector(createEventSchema(findRecId(options.getInspector(),
                                                                options.getRecordIdColumn())));
            this.writer = OrcFile.createWriter(this.path, writerOptions);
            item        = new OrcStruct(FIELDS);
            item.setFieldValue(OPERATION, operation);
            item.setFieldValue(CURRENT_TRANSACTION, currentTransaction);
            item.setFieldValue(ORIGINAL_TRANSACTION, originalTransaction);
            item.setFieldValue(BUCKET, bucket);
            item.setFieldValue(ROW_ID, rowId);
        }
 public OrcOptions orcOptions(OrcFile.WriterOptions opts)
 {
     this.orcOptions = opts;
     return(this);
 }
Beispiel #5
0
        public WriterImpl(
            Stream stream,
            string path,
            OrcFile.WriterOptions options,
            ObjectInspector inspector,
            TypeDescription schema,
            long stripeSize,
            CompressionKind compress,
            int bufferSize,
            int rowIndexStride,
            MemoryManager memoryManager,
            bool addBlockPadding,
            OrcFile.Version version,
            OrcFile.WriterCallback callback,
            OrcFile.EncodingStrategy encodingStrategy,
            OrcFile.CompressionStrategy compressionStrategy,
            double paddingTolerance,
            long blockSizeValue,
            string bloomFilterColumnNames,
            double bloomFilterFpp)
        {
            this.baseStream = stream;
            this.streamFactory = new StreamFactory(this);
            this.path = path;
            this.options = options;
            this.callback = callback;
            this.schema = schema;
            this.adjustedStripeSize = stripeSize;
            this.defaultStripeSize = stripeSize;
            this.version = version;
            this.encodingStrategy = encodingStrategy;
            this.compressionStrategy = compressionStrategy;
            this.addBlockPadding = addBlockPadding;
            this.blockSize = blockSizeValue;
            this.paddingTolerance = paddingTolerance;
            this.compress = compress;
            this.rowIndexStride = rowIndexStride;
            this.memoryManager = memoryManager;
            buildIndex = rowIndexStride > 0;
            codec = createCodec(compress);
            int numColumns = schema.getMaximumId() + 1;
            this.bufferSize = getEstimatedBufferSize(defaultStripeSize, numColumns, bufferSize);
            if (version == OrcFile.Version.V_0_11)
            {
                /* do not write bloom filters for ORC v11 */
                this.bloomFilterColumns = new bool[schema.getMaximumId() + 1];
            }
            else
            {
                this.bloomFilterColumns =
                    OrcUtils.includeColumns(bloomFilterColumnNames, schema);
            }
            this.bloomFilterFpp = bloomFilterFpp;
            treeWriter = createTreeWriter(inspector, schema, streamFactory, false);
            if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE)
            {
                throw new ArgumentException("Row stride must be at least " +
                    MIN_ROW_INDEX_STRIDE);
            }

            // ensure that we are able to handle callbacks before we register ourselves
            memoryManager.addWriter(path, stripeSize, this);
        }
 public OrcOptions orcOptions(OrcFile.WriterOptions opts)
 {
     this.orcOptions = opts;
     return this;
 }
        public void testDictionaryThreshold()
        {
            // conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
            // conf.setFloat(HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, 0.49f);
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.ZLIB);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Random r1 = new Random(1);
                    int nextInt = 0;
                    for (int i = 0; i < 21000; ++i)
                    {
                        // Write out the same string twice, this guarantees the fraction of rows with
                        // distinct strings is 0.5
                        if (i % 2 == 0)
                        {
                            nextInt = r1.Next(TestHelpers.words.Length);
                            // Append the value of i to the word, this guarantees when an index or word is repeated
                            // the actual string is unique.
                            TestHelpers.words[nextInt] += "-" + i;
                        }
                        writer.addRow(new MyRecord(r1.Next(), r1.NextLong(), TestHelpers.words[nextInt]));
                    }
                }
            }

            string outputFilename = "orc-file-dump-dictionary-threshold.out";
            using (CaptureStdout capture = new CaptureStdout(Path.Combine(workDir, outputFilename)))
            {
                FileDump.Main(new string[] { TestFilePath.ToString(), "--rowindex=1,2,3" });
            }

            TestHelpers.CompareFilesByLine(outputFilename, Path.Combine(workDir, outputFilename));
        }
        public void testDataDump()
        {
            using (Stream file = File.OpenWrite(TestFilePath))
            {
                OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf);
                options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRecord)));
                options.stripeSize(100000);
                options.compress(CompressionKind.NONE);
                options.bufferSize(10000);
                options.rowIndexStride(1000);
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, options))
                {
                    Dictionary<string, string> m = new Dictionary<string, string>(2);
                    m.Add("k1", "v1");
                    writer.addRow(new AllTypesRecord(
                        true,
                        (sbyte)10,
                        (short)100,
                        1000,
                        10000L,
                        4.0f,
                        20.0,
                        HiveDecimal.Parse("4.2222"),
                        new Timestamp(1416967764000L),
                        new Date(1416967764000L),
                        "string",
                        m,
                        new List<int> { 100, 200 },
                        new AllTypesRecord.Struct(10, "foo")));
                    m.Clear();
                    m.Add("k3", "v3");
                    writer.addRow(new AllTypesRecord(
                        false,
                        (sbyte)20,
                        (short)200,
                        2000,
                        20000L,
                        8.0f,
                        40.0,
                        HiveDecimal.Parse("2.2222"),
                        new Timestamp(1416967364000L),
                        new Date(1411967764000L),
                        "abcd",
                        m,
                        new List<int> { 200, 300 },
                        new AllTypesRecord.Struct(20, "bar")));
                }
            }

            string[] lines;
            using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory())
            {
                FileDump.Main(TestFilePath, "-d");

                lines = capture.Text.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
            }
            Assert.Equal(2, lines.Length);

            // Don't be fooled by the big space in the middle, this line is quite long
            Assert.Equal("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello                                                                                                                                                                                                                                                          \",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]);
            Assert.Equal("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world                                                                                                                                                                                                                                                          \",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]);
        }