public void testCallback()
 {
     Configuration conf = new Configuration();
     MemoryManager mgr = new MemoryManager(configuredPoolSize);
     long pool = mgr.getTotalMemoryPool();
     LoggingCallback[] calls = new LoggingCallback[20];
     for (int i = 0; i < calls.Length; ++i)
     {
         calls[i] = new LoggingCallback();
         mgr.addWriter(i.ToString(), pool / 4, calls[i]);
     }
     // add enough rows to get the memory manager to check the limits
     for (int i = 0; i < 10000; ++i)
     {
         mgr.addedRow(1);
     }
     for (int call = 0; call < calls.Length; ++call)
     {
         Assert.Equal(2, calls[call].LogLength);
         foreach (double argument in calls[call].Log)
         {
             Assert.Equal(0.2, argument, ERROR);
         }
     }
 }
 public OrcTestBase()
 {
     conf = new Configuration();
     workDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());
     Directory.CreateDirectory(workDir);
     testClassName = GetType().Name;
 }
Beispiel #3
0
 /**
  * Create a set of write options based on a set of table properties and
  * configuration.
  * @param tableProperties the properties of the table
  * @param conf the configuration of the query
  * @return a WriterOptions object that can be modified
  */
 public static WriterOptions writerOptions(Properties tableProperties, Configuration conf)
 {
     return new WriterOptions(tableProperties, conf);
 }
Beispiel #4
0
 private static MemoryManager getMemoryManager(Configuration conf)
 {
     return memoryManager.Value;
 }
Beispiel #5
0
 public static ReaderOptions readerOptions(Configuration conf)
 {
     return new ReaderOptions(conf);
 }
Beispiel #6
0
 /**
  * Create a set of writer options based on a configuration.
  * @param conf the configuration to use for values
  * @return A WriterOptions object that can be modified
  */
 public static WriterOptions writerOptions(Configuration conf)
 {
     return new WriterOptions(null, conf);
 }
Beispiel #7
0
 public double getDouble(Properties tbl, Configuration conf)
 {
     string value = lookupValue(tbl, conf);
     if (value != null)
     {
         return Double.Parse(value);
     }
     return ((IConvertible)defaultValue).ToDouble(CultureInfo.InvariantCulture);
 }
Beispiel #8
0
 public static string getString(this OrcConf orcConf, Configuration conf)
 {
     return OrcConfDetails.details[(int)orcConf].getString(null, conf);
 }
Beispiel #9
0
 public static double getDouble(this OrcConf orcConf, Configuration conf)
 {
     return OrcConfDetails.details[(int)orcConf].getDouble(null, conf);
 }
 public void testConfig()
 {
     Configuration conf = new Configuration();
     conf.set("hive.exec.orc.memory.pool", "0.9");
     MemoryManager mgr = new MemoryManager(conf);
     long mem = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
     System.Console.WriteLine("Memory = " + mem);
     long pool = mgr.getTotalMemoryPool();
     Assert.True("Pool too small: " + pool, mem * 0.899 < pool);
     Assert.True("Pool too big: " + pool, pool < mem * 0.901);
 }
Beispiel #11
0
 public static bool getBoolean(this OrcConf orcConf, Configuration conf)
 {
     return OrcConfDetails.details[(int)orcConf].getBoolean(null, conf);
 }
Beispiel #12
0
 public static double getDouble(this OrcConf orcConf, Properties tbl, Configuration conf)
 {
     return OrcConfDetails.details[(int)orcConf].getDouble(tbl, conf);
 }
Beispiel #13
0
 private string lookupValue(Properties tbl, Configuration conf)
 {
     string result = null;
     if (tbl != null)
     {
         result = tbl.getProperty(attribute);
     }
     if (result == null && conf != null)
     {
         result = conf.get(attribute);
         if (result == null)
         {
             result = conf.get(hiveConfName);
         }
     }
     return result;
 }
Beispiel #14
0
 public string getString(Properties tbl, Configuration conf)
 {
     string value = lookupValue(tbl, conf);
     return value == null ? (String)defaultValue : value;
 }
Beispiel #15
0
 public long getLong(Properties tbl, Configuration conf)
 {
     string value = lookupValue(tbl, conf);
     if (value != null)
     {
         return Int64.Parse(value);
     }
     return ((IConvertible)defaultValue).ToInt64(CultureInfo.InvariantCulture);
 }
Beispiel #16
0
 public ReaderOptions(Configuration conf)
 {
     this.conf = conf;
 }
Beispiel #17
0
        public void testZeroCopySeek()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(BigRow));

            const int COUNT = 32768;
            long[] intValues = new long[COUNT];
            double[] doubleValues = new double[COUNT];
            string[] stringValues = new string[COUNT];
            byte[][] byteValues = new byte[COUNT][];
            string[] words = new string[128];

            using (Stream file = FileOpenWrite(TestFilePath))
            using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                .inspector(inspector)
                .stripeSize(200000)
                .bufferSize(65536)
                .rowIndexStride(1000)))
            {
                Random rand = new Random(42);
                for (int i = 0; i < words.Length; ++i)
                {
                    words[i] = Integer.toHexString(rand.Next());
                }
                for (int i = 0; i < COUNT / 2; ++i)
                {
                    intValues[2 * i] = rand.NextLong();
                    intValues[2 * i + 1] = intValues[2 * i];
                    stringValues[2 * i] = words[rand.Next(words.Length)];
                    stringValues[2 * i + 1] = stringValues[2 * i];
                }
                for (int i = 0; i < COUNT; ++i)
                {
                    doubleValues[i] = rand.NextDouble();
                    byte[] buf = new byte[20];
                    rand.NextBytes(buf);
                    byteValues[i] = buf;
                }
                for (int i = 0; i < COUNT; ++i)
                {
                    writer.addRow(createRandomRow(intValues, doubleValues, stringValues,
                        byteValues, words, i));
                }
            }

            Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf));
            Assert.Equal(COUNT, reader.getNumberOfRows());
            /* enable zero copy record reader */
            #if false
            Configuration conf = new Configuration();
            HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_ORC_ZEROCOPY, true);
            #endif
            using (RecordReader rows = reader.rows())
            {
                /* all tests are identical to the other seek() tests */
                for (int i = COUNT - 1; i >= 0; --i)
                {
                    rows.seekToRow(i);
                    OrcStruct row = (OrcStruct)rows.next();
                    BigRow expected = createRandomRow(intValues, doubleValues,
                        stringValues, byteValues, words, i);
                    Assert.Equal(expected.boolean1, row.getFieldValue(0));
                    Assert.Equal(expected.byte1, row.getFieldValue(1));
                    Assert.Equal(expected.short1, row.getFieldValue(2));
                    Assert.Equal(expected.int1, row.getFieldValue(3));
                    Assert.Equal(expected.long1, row.getFieldValue(4));
                    Assert.Equal(expected.float1, (float)row.getFieldValue(5), 4);
                    Assert.Equal(expected.double1, (double)row.getFieldValue(6), 4);
                    Assert.Equal(expected.bytes1, row.getFieldValue(7));
                    Assert.Equal(expected.string1, row.getFieldValue(8));
                    List<InnerStruct> expectedList = expected.middle.list;
                    List<object> actualList =
                        (List<object>)((OrcStruct)row.getFieldValue(9)).getFieldValue(0);
                    compareList(expectedList, actualList);
                    compareList(expected.list, (List<object>)row.getFieldValue(10));
                }
            }

            IList<StripeInformation> stripes = reader.getStripes();
            long offsetOfStripe2 = 0;
            long offsetOfStripe4 = 0;
            long lastRowOfStripe2 = 0;
            for (int i = 0; i < 5; ++i)
            {
                StripeInformation stripe = stripes[i];
                if (i < 2)
                {
                    lastRowOfStripe2 += stripe.getNumberOfRows();
                }
                else if (i == 2)
                {
                    offsetOfStripe2 = stripe.getOffset();
                    lastRowOfStripe2 += stripe.getNumberOfRows() - 1;
                }
                else if (i == 4)
                {
                    offsetOfStripe4 = stripe.getOffset();
                }
            }
            bool[] columns = new bool[reader.getStatistics().Length];
            columns[5] = true; // long colulmn
            columns[9] = true; // text column
            /* use zero copy record reader */
            using (RecordReader rows = reader.rowsOptions(new RecordReaderOptions()
                .range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2)
                .include(columns)))
            {
                rows.seekToRow(lastRowOfStripe2);
                for (int i = 0; i < 2; ++i)
                {
                    OrcStruct row = (OrcStruct)rows.next();
                    BigRow expected = createRandomRow(intValues, doubleValues,
                                                      stringValues, byteValues, words,
                                                      (int)(lastRowOfStripe2 + i));

                    Assert.Equal(expected.long1, row.getFieldValue(4));
                    Assert.Equal(expected.string1, row.getFieldValue(8));
                }
            }
        }
Beispiel #18
0
            public WriterOptions(Properties tableProperties, Configuration conf)
            {
                configuration = conf;
                memoryManagerValue = getMemoryManager(conf);
                stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf);
                blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf);
                rowIndexStrideValue =
                    (int)OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf);
                bufferSizeValue = (int)OrcConf.BUFFER_SIZE.getLong(tableProperties,
                    conf);
                blockPaddingValue =
                    OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf);
                compressValue = (CompressionKind)Enum.Parse(
                    typeof(CompressionKind),
                    OrcConf.COMPRESS.getString(tableProperties, conf),
                    true);
                string versionName = OrcConf.WRITE_FORMAT.getString(tableProperties,
                    conf);
                versionValue = VersionHelper.byName(versionName);
                string enString = OrcConf.ENCODING_STRATEGY.getString(tableProperties,
                    conf);
                _encodingStrategy = (EncodingStrategy)Enum.Parse(typeof(EncodingStrategy), enString, true);

                string compString =
                    OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf);
                compressionStrategy = (CompressionStrategy)Enum.Parse(typeof(CompressionStrategy), compString, true);

                _paddingTolerance =
                    OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf);

                _bloomFilterColumns = OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties,
                    conf);
                _bloomFilterFpp = OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties,
                    conf);
                timeZone = TimeZoneInfo.Local.Id;
            }
 public void testMaxLengthToReader()
 {
     Configuration conf = new Configuration();
     OrcProto.Type rowType = OrcProto.Type.CreateBuilder()
         .SetKind(OrcProto.Type.Types.Kind.STRUCT).Build();
     OrcProto.Footer footer = OrcProto.Footer.CreateBuilder()
         .SetHeaderLength(0).SetContentLength(0).SetNumberOfRows(0)
         .SetRowIndexStride(0).AddTypes(rowType).Build();
     OrcProto.PostScript ps = OrcProto.PostScript.CreateBuilder()
         .SetCompression(OrcProto.CompressionKind.NONE)
         .SetFooterLength((ulong)footer.SerializedSize)
         .SetMagic("ORC").AddVersion(0).AddVersion(11).Build();
     DataOutputBuffer buffer = new DataOutputBuffer();
     footer.WriteTo(buffer);
     ps.WriteTo(buffer);
     buffer.write(ps.SerializedSize);
     FileSystem fs = Mockito.mock(typeof(FileSystem), settings);
     FSDataInputStream file = new FSDataInputStream(new BufferInStream(buffer.getData(),
             buffer.getLength()));
     string p = "/dir/file.orc";
     Mockito.when(fs.open(p)).thenReturn(file);
     OrcFile.ReaderOptions options = OrcFile.readerOptions(conf);
     options.filesystem(fs);
     options.maxLength(buffer.getLength());
     Mockito.when(fs.getFileStatus(p))
         .thenReturn(new FileStatus(10, false, 3, 3000, 0, p));
     Reader reader = OrcFile.createReader(p, options);
 }
Beispiel #20
0
 public static long getLong(this OrcConf orcConf, Properties tbl, Configuration conf)
 {
     return OrcConfDetails.details[(int)orcConf].getLong(tbl, conf);
 }
Beispiel #21
0
 public bool getBoolean(Properties tbl, Configuration conf)
 {
     string value = lookupValue(tbl, conf);
     if (value != null)
     {
         return Boolean.Parse(value);
     }
     return (bool)defaultValue;
 }
Beispiel #22
0
 public MyMemoryManager(Configuration conf, long totalSpace, double rate)
     : base(totalSpace)
 {
     this.totalSpace = totalSpace;
     this.rate = rate;
 }
 public OrcOptions(Configuration conf)
     : base(conf)
 {
 }
        /**
         * Create a reader that merge sorts the ACID events together.
         * @param conf the configuration
         * @param collapseEvents should the events on the same row be collapsed
         * @param isOriginal is the base file a pre-acid file
         * @param bucket the bucket we are reading
         * @param options the options to read with
         * @param deltaDirectory the list of delta directories to include
         * @
         */
        OrcRawRecordMerger(Configuration conf,
                           bool collapseEvents,
                           Reader reader,
                           bool isOriginal,
                           int bucket,
                           ValidTxnList validTxnList,
                           Reader.Options options,
                           Path[] deltaDirectory)
        {
            this.conf = conf;
            this.collapse = collapseEvents;
            this.offset = options.getOffset();
            this.length = options.getLength();
            this.validTxnList = validTxnList;
            TypeDescription typeDescr = OrcUtils.getDesiredRowTypeDescr(conf);
            if (typeDescr == null) {
              throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg());
            }

            objectInspector = OrcRecordUpdater.createEventSchema
            (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr)));

            // modify the options to reflect the event instead of the base row
            Reader.Options eventOptions = createEventOptions(options);
            if (reader == null)
            {
                baseReader = null;
            }
            else
            {

                // find the min/max based on the offset and length
                if (isOriginal)
                {
                    discoverOriginalKeyBounds(reader, bucket, options);
                }
                else
                {
                    discoverKeyBounds(reader, options);
                }
                LOG.info("min key = " + minKey + ", max key = " + maxKey);
                // use the min/max instead of the byte range
                ReaderPair pair;
                ReaderKey key = new ReaderKey();
                if (isOriginal)
                {
                    options = options.clone();
                    options.range(options.getOffset(), Long.MAX_VALUE);
                    pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey,
                                                  options);
                }
                else
                {
                    pair = new ReaderPair(key, reader, bucket, minKey, maxKey,
                                          eventOptions, 0);
                }

                // if there is at least one record, put it in the map
                if (pair.nextRecord != null)
                {
                    readers.put(key, pair);
                }
                baseReader = pair.recordReader;
            }

            // we always want to read all of the deltas
            eventOptions.range(0, Long.MAX_VALUE);
            if (deltaDirectory != null)
            {
                foreach (Path delta in deltaDirectory)
                {
                    ReaderKey key = new ReaderKey();
                    Path deltaFile = AcidUtils.createBucketFile(delta, bucket);
                    AcidUtils.ParsedDelta deltaDir = AcidUtils.parsedDelta(delta);
                    FileSystem fs = deltaFile.getFileSystem(conf);
                    long length = getLastFlushLength(fs, deltaFile);
                    if (length != -1 && fs.exists(deltaFile))
                    {
                        Reader deltaReader = OrcFile.createReader(deltaFile,
                            OrcFile.readerOptions(conf).maxLength(length));
                        Reader.Options deltaEventOptions = null;
                        if (eventOptions.getSearchArgument() != null)
                        {
                            // Turn off the sarg before pushing it to delta.  We never want to push a sarg to a delta as
                            // it can produce wrong results (if the latest valid version of the record is filtered out by
                            // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record)
                            // unless the delta only has insert events
                            OrcRecordUpdater.AcidStats acidStats = OrcRecordUpdater.parseAcidStats(deltaReader);
                            if (acidStats.deletes > 0 || acidStats.updates > 0)
                            {
                                deltaEventOptions = eventOptions.clone().searchArgument(null, null);
                            }
                        }
                        ReaderPair deltaPair;
                        deltaPair = new ReaderPair(key, deltaReader, bucket, minKey,
                          maxKey, deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId());
                        if (deltaPair.nextRecord != null)
                        {
                            readers.put(key, deltaPair);
                        }
                    }
                }
            }

            // get the first record
            Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry();
            if (entry == null)
            {
                columns = 0;
                primary = null;
            }
            else
            {
                primary = entry.getValue();
                if (readers.isEmpty())
                {
                    secondaryKey = null;
                }
                else
                {
                    secondaryKey = readers.firstKey();
                }
                // get the number of columns in the user's rows
                columns = primary.getColumns();
            }
        }
        public static void printJsonMetaData(List<string> files, Configuration conf,
            List<int> rowIndexCols, bool prettyPrint, bool printTimeZone)
        {
            JsonWriter writer = new JsonWriter();
            bool multiFile = files.Count > 1;
            if (multiFile)
            {
                writer.array();
            }
            else
            {
                writer.newObject();
            }
            foreach (string filename in files)
            {
                if (multiFile)
                {
                    writer.newObject();
                }
                writer.key("fileName").value(Path.GetFileName(filename));
                Reader reader = OrcFile.createReader(filename, OrcFile.readerOptions(conf));
                writer.key("fileVersion").value(OrcFile.VersionHelper.getName(reader.getFileVersion()));
                writer.key("writerVersion").value(reader.getWriterVersion().ToString());
                using (RecordReaderImpl rows = (RecordReaderImpl)reader.rows())
                {
                    writer.key("numberOfRows").value(reader.getNumberOfRows());
                    writer.key("compression").value(reader.getCompression().ToString());
                    if (reader.getCompression() != CompressionKind.NONE)
                    {
                        writer.key("compressionBufferSize").value(reader.getCompressionSize());
                    }
                    writer.key("schemaString").value(reader.getObjectInspector().getTypeName());
                    writer.key("schema").array();
                    writeSchema(writer, reader.getTypes());
                    writer.endArray();

                    writer.key("stripeStatistics").array();
                    List<StripeStatistics> stripeStatistics = reader.getStripeStatistics();
                    for (int n = 0; n < stripeStatistics.Count; n++)
                    {
                        writer.newObject();
                        writer.key("stripeNumber").value(n + 1);
                        StripeStatistics ss = stripeStatistics[n];
                        writer.key("columnStatistics").array();
                        for (int i = 0; i < ss.getColumnStatistics().Length; i++)
                        {
                            writer.newObject();
                            writer.key("columnId").value(i);
                            writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
                            writer.endObject();
                        }
                        writer.endArray();
                        writer.endObject();
                    }
                    writer.endArray();

                    ColumnStatistics[] stats = reader.getStatistics();
                    int colCount = stats.Length;
                    writer.key("fileStatistics").array();
                    for (int i = 0; i < stats.Length; ++i)
                    {
                        writer.newObject();
                        writer.key("columnId").value(i);
                        writeColumnStatistics(writer, stats[i]);
                        writer.endObject();
                    }
                    writer.endArray();

                    writer.key("stripes").array();
                    int stripeIx = -1;
                    foreach (StripeInformation stripe in reader.getStripes())
                    {
                        ++stripeIx;
                        long stripeStart = stripe.getOffset();
                        OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
                        writer.newObject(); // start of stripe information
                        writer.key("stripeNumber").value(stripeIx + 1);
                        writer.key("stripeInformation");
                        writeStripeInformation(writer, stripe);
                        if (printTimeZone)
                        {
                            writer.key("writerTimezone").value(
                                footer.HasWriterTimezone ? footer.WriterTimezone : FileDump.UNKNOWN);
                        }
                        long sectionStart = stripeStart;

                        writer.key("streams").array();
                        foreach (OrcProto.Stream section in footer.StreamsList)
                        {
                            writer.newObject();
                            string kind = section.HasKind ? section.Kind.ToString() : FileDump.UNKNOWN;
                            writer.key("columnId").value(section.Column);
                            writer.key("section").value(kind);
                            writer.key("startOffset").value(sectionStart);
                            writer.key("length").value(section.Length);
                            sectionStart += (long)section.Length;
                            writer.endObject();
                        }
                        writer.endArray();

                        writer.key("encodings").array();
                        for (int i = 0; i < footer.ColumnsCount; ++i)
                        {
                            writer.newObject();
                            OrcProto.ColumnEncoding encoding = footer.ColumnsList[i];
                            writer.key("columnId").value(i);
                            writer.key("kind").value(encoding.Kind.ToString());
                            if (encoding.Kind == OrcProto.ColumnEncoding.Types.Kind.DICTIONARY ||
                                encoding.Kind == OrcProto.ColumnEncoding.Types.Kind.DICTIONARY_V2)
                            {
                                writer.key("dictionarySize").value(encoding.DictionarySize);
                            }
                            writer.endObject();
                        }
                        writer.endArray();

                        if (rowIndexCols != null && rowIndexCols.Count != 0)
                        {
                            // include the columns that are specified, only if the columns are included, bloom filter
                            // will be read
                            bool[] sargColumns = new bool[colCount];
                            foreach (int colIdx in rowIndexCols)
                            {
                                sargColumns[colIdx] = true;
                            }
                            RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns);
                            writer.key("indexes").array();
                            foreach (int col in rowIndexCols)
                            {
                                writer.newObject();
                                writer.key("columnId").value(col);
                                writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
                                writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
                                writer.endObject();
                            }
                            writer.endArray();
                        }
                        writer.endObject(); // end of stripe information
                    }
                    writer.endArray();

                    long fileLen = new FileInfo(filename).Length;
                    long paddedBytes = FileDump.getTotalPaddingSize(reader);
                    // empty ORC file is ~45 bytes. Assumption here is file length always >0
                    double percentPadding = ((double)paddedBytes / (double)fileLen) * 100;
                    writer.key("fileLength").value(fileLen);
                    writer.key("paddingLength").value(paddedBytes);
                    writer.key("paddingRatio").value(percentPadding);
                    rows.close();
                }

                writer.endObject();
            }
            if (multiFile)
            {
                writer.endArray();
            }

            if (prettyPrint)
            {
            #if false
                string prettyJson;
                if (multiFile)
                {
                    JSONArray jsonArray = new JSONArray(writer.toString());
                    prettyJson = jsonArray.toString(2);
                }
                else
                {
                    JSONObject jsonObject = new JSONObject(writer.toString());
                    prettyJson = jsonObject.toString(2);
                }
            #else
                string prettyJson = writer.ToString();
            #endif
                System.Console.WriteLine(prettyJson);
            }
            else
            {
                System.Console.WriteLine(writer.ToString());
            }
        }