public void testCallback() { Configuration conf = new Configuration(); MemoryManager mgr = new MemoryManager(configuredPoolSize); long pool = mgr.getTotalMemoryPool(); LoggingCallback[] calls = new LoggingCallback[20]; for (int i = 0; i < calls.Length; ++i) { calls[i] = new LoggingCallback(); mgr.addWriter(i.ToString(), pool / 4, calls[i]); } // add enough rows to get the memory manager to check the limits for (int i = 0; i < 10000; ++i) { mgr.addedRow(1); } for (int call = 0; call < calls.Length; ++call) { Assert.Equal(2, calls[call].LogLength); foreach (double argument in calls[call].Log) { Assert.Equal(0.2, argument, ERROR); } } }
public OrcTestBase() { conf = new Configuration(); workDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); Directory.CreateDirectory(workDir); testClassName = GetType().Name; }
/** * Create a set of write options based on a set of table properties and * configuration. * @param tableProperties the properties of the table * @param conf the configuration of the query * @return a WriterOptions object that can be modified */ public static WriterOptions writerOptions(Properties tableProperties, Configuration conf) { return new WriterOptions(tableProperties, conf); }
private static MemoryManager getMemoryManager(Configuration conf) { return memoryManager.Value; }
public static ReaderOptions readerOptions(Configuration conf) { return new ReaderOptions(conf); }
/** * Create a set of writer options based on a configuration. * @param conf the configuration to use for values * @return A WriterOptions object that can be modified */ public static WriterOptions writerOptions(Configuration conf) { return new WriterOptions(null, conf); }
public double getDouble(Properties tbl, Configuration conf) { string value = lookupValue(tbl, conf); if (value != null) { return Double.Parse(value); } return ((IConvertible)defaultValue).ToDouble(CultureInfo.InvariantCulture); }
public static string getString(this OrcConf orcConf, Configuration conf) { return OrcConfDetails.details[(int)orcConf].getString(null, conf); }
public static double getDouble(this OrcConf orcConf, Configuration conf) { return OrcConfDetails.details[(int)orcConf].getDouble(null, conf); }
public void testConfig() { Configuration conf = new Configuration(); conf.set("hive.exec.orc.memory.pool", "0.9"); MemoryManager mgr = new MemoryManager(conf); long mem = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax(); System.Console.WriteLine("Memory = " + mem); long pool = mgr.getTotalMemoryPool(); Assert.True("Pool too small: " + pool, mem * 0.899 < pool); Assert.True("Pool too big: " + pool, pool < mem * 0.901); }
public static bool getBoolean(this OrcConf orcConf, Configuration conf) { return OrcConfDetails.details[(int)orcConf].getBoolean(null, conf); }
public static double getDouble(this OrcConf orcConf, Properties tbl, Configuration conf) { return OrcConfDetails.details[(int)orcConf].getDouble(tbl, conf); }
private string lookupValue(Properties tbl, Configuration conf) { string result = null; if (tbl != null) { result = tbl.getProperty(attribute); } if (result == null && conf != null) { result = conf.get(attribute); if (result == null) { result = conf.get(hiveConfName); } } return result; }
public string getString(Properties tbl, Configuration conf) { string value = lookupValue(tbl, conf); return value == null ? (String)defaultValue : value; }
public long getLong(Properties tbl, Configuration conf) { string value = lookupValue(tbl, conf); if (value != null) { return Int64.Parse(value); } return ((IConvertible)defaultValue).ToInt64(CultureInfo.InvariantCulture); }
public ReaderOptions(Configuration conf) { this.conf = conf; }
public void testZeroCopySeek() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(BigRow)); const int COUNT = 32768; long[] intValues = new long[COUNT]; double[] doubleValues = new double[COUNT]; string[] stringValues = new string[COUNT]; byte[][] byteValues = new byte[COUNT][]; string[] words = new string[128]; using (Stream file = FileOpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(200000) .bufferSize(65536) .rowIndexStride(1000))) { Random rand = new Random(42); for (int i = 0; i < words.Length; ++i) { words[i] = Integer.toHexString(rand.Next()); } for (int i = 0; i < COUNT / 2; ++i) { intValues[2 * i] = rand.NextLong(); intValues[2 * i + 1] = intValues[2 * i]; stringValues[2 * i] = words[rand.Next(words.Length)]; stringValues[2 * i + 1] = stringValues[2 * i]; } for (int i = 0; i < COUNT; ++i) { doubleValues[i] = rand.NextDouble(); byte[] buf = new byte[20]; rand.NextBytes(buf); byteValues[i] = buf; } for (int i = 0; i < COUNT; ++i) { writer.addRow(createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i)); } } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); Assert.Equal(COUNT, reader.getNumberOfRows()); /* enable zero copy record reader */ #if false Configuration conf = new Configuration(); HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_ORC_ZEROCOPY, true); #endif using (RecordReader rows = reader.rows()) { /* all tests are identical to the other seek() tests */ for (int i = COUNT - 1; i >= 0; --i) { rows.seekToRow(i); OrcStruct row = (OrcStruct)rows.next(); BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i); Assert.Equal(expected.boolean1, row.getFieldValue(0)); Assert.Equal(expected.byte1, row.getFieldValue(1)); Assert.Equal(expected.short1, row.getFieldValue(2)); Assert.Equal(expected.int1, row.getFieldValue(3)); Assert.Equal(expected.long1, row.getFieldValue(4)); Assert.Equal(expected.float1, (float)row.getFieldValue(5), 4); Assert.Equal(expected.double1, (double)row.getFieldValue(6), 4); Assert.Equal(expected.bytes1, row.getFieldValue(7)); Assert.Equal(expected.string1, row.getFieldValue(8)); List<InnerStruct> expectedList = expected.middle.list; List<object> actualList = (List<object>)((OrcStruct)row.getFieldValue(9)).getFieldValue(0); compareList(expectedList, actualList); compareList(expected.list, (List<object>)row.getFieldValue(10)); } } IList<StripeInformation> stripes = reader.getStripes(); long offsetOfStripe2 = 0; long offsetOfStripe4 = 0; long lastRowOfStripe2 = 0; for (int i = 0; i < 5; ++i) { StripeInformation stripe = stripes[i]; if (i < 2) { lastRowOfStripe2 += stripe.getNumberOfRows(); } else if (i == 2) { offsetOfStripe2 = stripe.getOffset(); lastRowOfStripe2 += stripe.getNumberOfRows() - 1; } else if (i == 4) { offsetOfStripe4 = stripe.getOffset(); } } bool[] columns = new bool[reader.getStatistics().Length]; columns[5] = true; // long colulmn columns[9] = true; // text column /* use zero copy record reader */ using (RecordReader rows = reader.rowsOptions(new RecordReaderOptions() .range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2) .include(columns))) { rows.seekToRow(lastRowOfStripe2); for (int i = 0; i < 2; ++i) { OrcStruct row = (OrcStruct)rows.next(); BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, (int)(lastRowOfStripe2 + i)); Assert.Equal(expected.long1, row.getFieldValue(4)); Assert.Equal(expected.string1, row.getFieldValue(8)); } } }
public WriterOptions(Properties tableProperties, Configuration conf) { configuration = conf; memoryManagerValue = getMemoryManager(conf); stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf); blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf); rowIndexStrideValue = (int)OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf); bufferSizeValue = (int)OrcConf.BUFFER_SIZE.getLong(tableProperties, conf); blockPaddingValue = OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf); compressValue = (CompressionKind)Enum.Parse( typeof(CompressionKind), OrcConf.COMPRESS.getString(tableProperties, conf), true); string versionName = OrcConf.WRITE_FORMAT.getString(tableProperties, conf); versionValue = VersionHelper.byName(versionName); string enString = OrcConf.ENCODING_STRATEGY.getString(tableProperties, conf); _encodingStrategy = (EncodingStrategy)Enum.Parse(typeof(EncodingStrategy), enString, true); string compString = OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf); compressionStrategy = (CompressionStrategy)Enum.Parse(typeof(CompressionStrategy), compString, true); _paddingTolerance = OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf); _bloomFilterColumns = OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties, conf); _bloomFilterFpp = OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties, conf); timeZone = TimeZoneInfo.Local.Id; }
public void testMaxLengthToReader() { Configuration conf = new Configuration(); OrcProto.Type rowType = OrcProto.Type.CreateBuilder() .SetKind(OrcProto.Type.Types.Kind.STRUCT).Build(); OrcProto.Footer footer = OrcProto.Footer.CreateBuilder() .SetHeaderLength(0).SetContentLength(0).SetNumberOfRows(0) .SetRowIndexStride(0).AddTypes(rowType).Build(); OrcProto.PostScript ps = OrcProto.PostScript.CreateBuilder() .SetCompression(OrcProto.CompressionKind.NONE) .SetFooterLength((ulong)footer.SerializedSize) .SetMagic("ORC").AddVersion(0).AddVersion(11).Build(); DataOutputBuffer buffer = new DataOutputBuffer(); footer.WriteTo(buffer); ps.WriteTo(buffer); buffer.write(ps.SerializedSize); FileSystem fs = Mockito.mock(typeof(FileSystem), settings); FSDataInputStream file = new FSDataInputStream(new BufferInStream(buffer.getData(), buffer.getLength())); string p = "/dir/file.orc"; Mockito.when(fs.open(p)).thenReturn(file); OrcFile.ReaderOptions options = OrcFile.readerOptions(conf); options.filesystem(fs); options.maxLength(buffer.getLength()); Mockito.when(fs.getFileStatus(p)) .thenReturn(new FileStatus(10, false, 3, 3000, 0, p)); Reader reader = OrcFile.createReader(p, options); }
public static long getLong(this OrcConf orcConf, Properties tbl, Configuration conf) { return OrcConfDetails.details[(int)orcConf].getLong(tbl, conf); }
public bool getBoolean(Properties tbl, Configuration conf) { string value = lookupValue(tbl, conf); if (value != null) { return Boolean.Parse(value); } return (bool)defaultValue; }
public MyMemoryManager(Configuration conf, long totalSpace, double rate) : base(totalSpace) { this.totalSpace = totalSpace; this.rate = rate; }
public OrcOptions(Configuration conf) : base(conf) { }
/** * Create a reader that merge sorts the ACID events together. * @param conf the configuration * @param collapseEvents should the events on the same row be collapsed * @param isOriginal is the base file a pre-acid file * @param bucket the bucket we are reading * @param options the options to read with * @param deltaDirectory the list of delta directories to include * @ */ OrcRawRecordMerger(Configuration conf, bool collapseEvents, Reader reader, bool isOriginal, int bucket, ValidTxnList validTxnList, Reader.Options options, Path[] deltaDirectory) { this.conf = conf; this.collapse = collapseEvents; this.offset = options.getOffset(); this.length = options.getLength(); this.validTxnList = validTxnList; TypeDescription typeDescr = OrcUtils.getDesiredRowTypeDescr(conf); if (typeDescr == null) { throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg()); } objectInspector = OrcRecordUpdater.createEventSchema (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr))); // modify the options to reflect the event instead of the base row Reader.Options eventOptions = createEventOptions(options); if (reader == null) { baseReader = null; } else { // find the min/max based on the offset and length if (isOriginal) { discoverOriginalKeyBounds(reader, bucket, options); } else { discoverKeyBounds(reader, options); } LOG.info("min key = " + minKey + ", max key = " + maxKey); // use the min/max instead of the byte range ReaderPair pair; ReaderKey key = new ReaderKey(); if (isOriginal) { options = options.clone(); options.range(options.getOffset(), Long.MAX_VALUE); pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey, options); } else { pair = new ReaderPair(key, reader, bucket, minKey, maxKey, eventOptions, 0); } // if there is at least one record, put it in the map if (pair.nextRecord != null) { readers.put(key, pair); } baseReader = pair.recordReader; } // we always want to read all of the deltas eventOptions.range(0, Long.MAX_VALUE); if (deltaDirectory != null) { foreach (Path delta in deltaDirectory) { ReaderKey key = new ReaderKey(); Path deltaFile = AcidUtils.createBucketFile(delta, bucket); AcidUtils.ParsedDelta deltaDir = AcidUtils.parsedDelta(delta); FileSystem fs = deltaFile.getFileSystem(conf); long length = getLastFlushLength(fs, deltaFile); if (length != -1 && fs.exists(deltaFile)) { Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); Reader.Options deltaEventOptions = null; if (eventOptions.getSearchArgument() != null) { // Turn off the sarg before pushing it to delta. We never want to push a sarg to a delta as // it can produce wrong results (if the latest valid version of the record is filtered out by // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record) // unless the delta only has insert events OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(deltaReader); if (acidStats.deletes > 0 || acidStats.updates > 0) { deltaEventOptions = eventOptions.clone().searchArgument(null, null); } } ReaderPair deltaPair; deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, maxKey, deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId()); if (deltaPair.nextRecord != null) { readers.put(key, deltaPair); } } } } // get the first record Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry == null) { columns = 0; primary = null; } else { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } // get the number of columns in the user's rows columns = primary.getColumns(); } }
public static void printJsonMetaData(List<string> files, Configuration conf, List<int> rowIndexCols, bool prettyPrint, bool printTimeZone) { JsonWriter writer = new JsonWriter(); bool multiFile = files.Count > 1; if (multiFile) { writer.array(); } else { writer.newObject(); } foreach (string filename in files) { if (multiFile) { writer.newObject(); } writer.key("fileName").value(Path.GetFileName(filename)); Reader reader = OrcFile.createReader(filename, OrcFile.readerOptions(conf)); writer.key("fileVersion").value(OrcFile.VersionHelper.getName(reader.getFileVersion())); writer.key("writerVersion").value(reader.getWriterVersion().ToString()); using (RecordReaderImpl rows = (RecordReaderImpl)reader.rows()) { writer.key("numberOfRows").value(reader.getNumberOfRows()); writer.key("compression").value(reader.getCompression().ToString()); if (reader.getCompression() != CompressionKind.NONE) { writer.key("compressionBufferSize").value(reader.getCompressionSize()); } writer.key("schemaString").value(reader.getObjectInspector().getTypeName()); writer.key("schema").array(); writeSchema(writer, reader.getTypes()); writer.endArray(); writer.key("stripeStatistics").array(); List<StripeStatistics> stripeStatistics = reader.getStripeStatistics(); for (int n = 0; n < stripeStatistics.Count; n++) { writer.newObject(); writer.key("stripeNumber").value(n + 1); StripeStatistics ss = stripeStatistics[n]; writer.key("columnStatistics").array(); for (int i = 0; i < ss.getColumnStatistics().Length; i++) { writer.newObject(); writer.key("columnId").value(i); writeColumnStatistics(writer, ss.getColumnStatistics()[i]); writer.endObject(); } writer.endArray(); writer.endObject(); } writer.endArray(); ColumnStatistics[] stats = reader.getStatistics(); int colCount = stats.Length; writer.key("fileStatistics").array(); for (int i = 0; i < stats.Length; ++i) { writer.newObject(); writer.key("columnId").value(i); writeColumnStatistics(writer, stats[i]); writer.endObject(); } writer.endArray(); writer.key("stripes").array(); int stripeIx = -1; foreach (StripeInformation stripe in reader.getStripes()) { ++stripeIx; long stripeStart = stripe.getOffset(); OrcProto.StripeFooter footer = rows.readStripeFooter(stripe); writer.newObject(); // start of stripe information writer.key("stripeNumber").value(stripeIx + 1); writer.key("stripeInformation"); writeStripeInformation(writer, stripe); if (printTimeZone) { writer.key("writerTimezone").value( footer.HasWriterTimezone ? footer.WriterTimezone : FileDump.UNKNOWN); } long sectionStart = stripeStart; writer.key("streams").array(); foreach (OrcProto.Stream section in footer.StreamsList) { writer.newObject(); string kind = section.HasKind ? section.Kind.ToString() : FileDump.UNKNOWN; writer.key("columnId").value(section.Column); writer.key("section").value(kind); writer.key("startOffset").value(sectionStart); writer.key("length").value(section.Length); sectionStart += (long)section.Length; writer.endObject(); } writer.endArray(); writer.key("encodings").array(); for (int i = 0; i < footer.ColumnsCount; ++i) { writer.newObject(); OrcProto.ColumnEncoding encoding = footer.ColumnsList[i]; writer.key("columnId").value(i); writer.key("kind").value(encoding.Kind.ToString()); if (encoding.Kind == OrcProto.ColumnEncoding.Types.Kind.DICTIONARY || encoding.Kind == OrcProto.ColumnEncoding.Types.Kind.DICTIONARY_V2) { writer.key("dictionarySize").value(encoding.DictionarySize); } writer.endObject(); } writer.endArray(); if (rowIndexCols != null && rowIndexCols.Count != 0) { // include the columns that are specified, only if the columns are included, bloom filter // will be read bool[] sargColumns = new bool[colCount]; foreach (int colIdx in rowIndexCols) { sargColumns[colIdx] = true; } RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns); writer.key("indexes").array(); foreach (int col in rowIndexCols) { writer.newObject(); writer.key("columnId").value(col); writeRowGroupIndexes(writer, col, indices.getRowGroupIndex()); writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex()); writer.endObject(); } writer.endArray(); } writer.endObject(); // end of stripe information } writer.endArray(); long fileLen = new FileInfo(filename).Length; long paddedBytes = FileDump.getTotalPaddingSize(reader); // empty ORC file is ~45 bytes. Assumption here is file length always >0 double percentPadding = ((double)paddedBytes / (double)fileLen) * 100; writer.key("fileLength").value(fileLen); writer.key("paddingLength").value(paddedBytes); writer.key("paddingRatio").value(percentPadding); rows.close(); } writer.endObject(); } if (multiFile) { writer.endArray(); } if (prettyPrint) { #if false string prettyJson; if (multiFile) { JSONArray jsonArray = new JSONArray(writer.toString()); prettyJson = jsonArray.toString(2); } else { JSONObject jsonObject = new JSONObject(writer.toString()); prettyJson = jsonObject.toString(2); } #else string prettyJson = writer.ToString(); #endif System.Console.WriteLine(prettyJson); } else { System.Console.WriteLine(writer.ToString()); } }