public void testBuilderComplexTypes2() { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() .lessThan("x", PredicateLeaf.Type.DATE, Date.Parse("2005-3-12")) .lessThanEquals("y", PredicateLeaf.Type.STRING, "hi") .equals("z", PredicateLeaf.Type.DECIMAL, HiveDecimal.Parse("1.0")) .end() .build(); Assert.Equal("leaf-0 = (LESS_THAN x 2005-03-12), " + "leaf-1 = (LESS_THAN_EQUALS y hi), " + "leaf-2 = (EQUALS z 1.0), " + "expr = (and leaf-0 leaf-1 leaf-2)", sarg.ToString()); sarg = SearchArgumentFactory.newBuilder() .startNot() .startOr() .isNull("x", PredicateLeaf.Type.LONG) .between("y", PredicateLeaf.Type.DECIMAL, HiveDecimal.Parse("10"), HiveDecimal.Parse("20.0")) .@in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L) .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger") .end() .end() .build(); Assert.Equal("leaf-0 = (IS_NULL x), " + "leaf-1 = (BETWEEN y 10 20.0), " + "leaf-2 = (IN z 1 2 3), " + "leaf-3 = (NULL_SAFE_EQUALS a stinger), " + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.ToString()); }
private void writeData(Writer writer) { for (int i = 0; i < 25000; i++) { if (i == 0) { writer.addRow(new AllTypesRow(2L, "foo", 0.8, HiveDecimal.Parse("1.2"), new Timestamp(0))); } else if (i == 5000) { writer.addRow(new AllTypesRow(13L, "bar", 80.0, HiveDecimal.Parse("2.2"), new Timestamp(5000))); } else if (i == 10000) { writer.addRow(new AllTypesRow(29L, "cat", 8.0, HiveDecimal.Parse("3.3"), new Timestamp(10000))); } else if (i == 15000) { writer.addRow(new AllTypesRow(70L, "dog", 1.8, HiveDecimal.Parse("4.4"), new Timestamp(15000))); } else if (i == 20000) { writer.addRow(new AllTypesRow(5L, "eat", 0.8, HiveDecimal.Parse("5.5"), new Timestamp(20000))); } else { writer.addRow(new AllTypesRow(100L, "zebra", 8.0, HiveDecimal.Parse("0.0"), new Timestamp(250000))); } } }
public override void reset() { base.reset(); minimum = null; maximum = null; sum = HiveDecimal.Zero; }
public AllTypesRow(long uid, string s1, double d1, HiveDecimal @decimal, Timestamp ts) { this.userid = uid; this.string1 = s1; this.subtype = d1; this.decimal1 = @decimal; this.ts = ts; }
public MyRecord(bool?bo, sbyte?by, int?i, long?l, short?s, double?d, string k, Timestamp?t, Date?dt, HiveDecimal hd) { this.bo = bo; this.by = by; this.i = i; this.l = l; this.s = s; this.d = d; this.k = k; this.t = t; this.dt = dt; this.hd = hd; }
public AllTypesRecord(bool b, sbyte bt, short s, int i, long l, float f, double d, HiveDecimal de, Timestamp t, Date dt, string str, Dictionary <string, string> m, List <int> a, Struct st) { this.b = b; this.bt = bt; this.s = s; this.i = i; this.l = l; this.f = f; this.d = d; this.de = de; this.t = t; this.dt = dt; this.str = str; this.m = m; this.a = a; this.st = st; }
protected internal override void updateDecimal(HiveDecimal value) { if (minimum == null) { minimum = value; maximum = value; } else if (minimum.CompareTo(value) > 0) { minimum = value; } else if (maximum.CompareTo(value) < 0) { maximum = value; } if (sum != null) { sum += value; } }
public BigRow(bool b1, sbyte b2, short s1, int i1, long l1, float f1, double d1, byte[] b3, string s2, MiddleStruct m1, List <InnerStruct> l2, Dictionary <string, InnerStruct> m2, Timestamp ts1, HiveDecimal dec1) { this.boolean1 = b1; this.byte1 = b2; this.short1 = s1; this.int1 = i1; this.long1 = l1; this.float1 = f1; this.double1 = d1; this.bytes1 = b3; this.string1 = s2; this.middle = m1; this.list = l2; this.map = m2; this.ts = ts1; this.decimal1 = dec1; }
public DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) : base(stats) { OrcProto.DecimalStatistics dec = stats.DecimalStatistics; if (dec.HasMaximum) { maximum = HiveDecimal.Parse(dec.Maximum); } if (dec.HasMinimum) { minimum = HiveDecimal.Parse(dec.Minimum); } if (dec.HasSum) { sum = HiveDecimal.Parse(dec.Sum); } else { sum = null; } }
public override void merge(ColumnStatisticsImpl other) { if (other is DecimalStatisticsImpl) { DecimalStatisticsImpl dec = (DecimalStatisticsImpl)other; if (minimum == null) { minimum = dec.minimum; maximum = dec.maximum; sum = dec.sum; } else if (dec.minimum != null) { if (minimum.CompareTo(dec.minimum) > 0) { minimum = dec.minimum; } if (maximum.CompareTo(dec.maximum) < 0) { maximum = dec.maximum; } if (sum == null || dec.sum == null) { sum = null; } else { sum += dec.sum; } } } else { if (isStatsExists() && minimum != null) { throw new ArgumentException("Incompatible merging of decimal column statistics"); } } base.merge(other); }
public void testDecimalMerge() { TypeDescription schema = TypeDescription.createDecimal() .withPrecision(38).withScale(16); ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema); ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema); stats1.updateDecimal(HiveDecimal.create(10)); stats1.updateDecimal(HiveDecimal.create(100)); stats2.updateDecimal(HiveDecimal.create(1)); stats2.updateDecimal(HiveDecimal.create(1000)); stats1.merge(stats2); DecimalColumnStatistics typed = (DecimalColumnStatistics)stats1; Assert.Equal(1, typed.getMinimum().longValue()); Assert.Equal(1000, typed.getMaximum().longValue()); stats1.reset(); stats1.updateDecimal(HiveDecimal.create(-10)); stats1.updateDecimal(HiveDecimal.create(10000)); stats1.merge(stats2); Assert.Equal(-10, typed.getMinimum().longValue()); Assert.Equal(10000, typed.getMaximum().longValue()); }
public void createFile() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(MyRecord)); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .stripeSize(100000) .compress(CompressionKind.ZLIB) .inspector(inspector) .bufferSize(10000) .rowIndexStride(10000))) { Random r1 = new Random(1); string[] words = TestHelpers.words; string[] dates = new string[] { "1991-02-28", "1970-01-31", "1950-04-23" }; string[] decimalStrings = new string[] { "234.443", "10001000", "0.3333367", "67788798.0", "-234.443", "-10001000", "-0.3333367", "-67788798.0", "0" }; for (int i = 0; i < 21000; ++i) { if ((i % 7) != 0) { writer.addRow(new MyRecord(((i % 3) == 0), (sbyte)(i % 5), i, (long)200, (short)(300 + i), (double)(400 + i), words[r1.Next(words.Length)], new Timestamp(DateTime.Now), Date.Parse(dates[i % 3]), HiveDecimal.Parse(decimalStrings[i % decimalStrings.Length]))); } else { writer.addRow(new MyRecord(null, null, i, (long)200, null, null, null, null, null, null)); } } } checkVectorizedReader(); }
private void checkVectorizedReader() { Reader vreader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); using (RecordReaderImpl vrr = (RecordReaderImpl)vreader.rows()) using (RecordReaderImpl rr = (RecordReaderImpl)reader.rows()) { VectorizedRowBatch batch = null; // Check Vectorized ORC reader against ORC row reader while (vrr.hasNext()) { batch = vrr.nextBatch(batch); for (int i = 0; i < batch.size; i++) { OrcStruct row = (OrcStruct)rr.next(); for (int j = 0; j < batch.cols.Length; j++) { object a = (row.getFieldValue(j)); ColumnVector cv = batch.cols[j]; // if the value is repeating, use row 0 int rowId = cv.isRepeating ? 0 : i; // make sure the null flag agrees if (a == null) { Assert.True(!cv.noNulls && cv.isNull[rowId]); } else if (a is bool) { // bool values are stores a 1's and 0's, so convert and compare long temp = (bool)a ? 1 : 0; long b = ((LongColumnVector)cv).vector[rowId]; Assert.Equal(temp.ToString(), b.ToString()); } else if (a is Timestamp) { // Timestamps are stored as long, so convert and compare Timestamp t = (Timestamp)a; // Timestamp.getTime() is overriden and is // long time = super.getTime(); // return (time + (nanos / 1000000)); long timeInNanoSec = (t.Milliseconds * 1000000) + (t.getNanos() % 1000000); long b = ((LongColumnVector)cv).vector[rowId]; Assert.Equal(timeInNanoSec.ToString(), b.ToString()); } else if (a is Date) { // Dates are stored as long, so convert and compare Date adt = (Date)a; long b = ((LongColumnVector)cv).vector[rowId]; // Assert.Equal(adt, Date.daysToMillis((int)b)); Assert.Equal(adt.Days, (int)b); } else if (a is HiveDecimal) { // Decimals are stored as BigInteger, so convert and compare HiveDecimal dec = (HiveDecimal)a; HiveDecimal b = ((DecimalColumnVector)cv).vector[i]; Assert.Equal(dec, b); } else if (a is double) { double b = ((DoubleColumnVector)cv).vector[rowId]; Assert.Equal(a.ToString(), b.ToString()); } else if (a is string) { BytesColumnVector bcv = (BytesColumnVector)cv; string b = Encoding.UTF8.GetString(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]); Assert.Equal((string)a, b); } else if (a is int || a is long || a is sbyte || a is short) { Assert.Equal(a.ToString(), ((LongColumnVector)cv).vector[rowId].ToString()); } else { Assert.True(false); } } } // Check repeating Assert.Equal(false, batch.cols[0].isRepeating); Assert.Equal(false, batch.cols[1].isRepeating); Assert.Equal(false, batch.cols[2].isRepeating); Assert.Equal(true, batch.cols[3].isRepeating); Assert.Equal(false, batch.cols[4].isRepeating); Assert.Equal(false, batch.cols[5].isRepeating); Assert.Equal(false, batch.cols[6].isRepeating); Assert.Equal(false, batch.cols[7].isRepeating); Assert.Equal(false, batch.cols[8].isRepeating); Assert.Equal(false, batch.cols[9].isRepeating); // Check non null Assert.Equal(false, batch.cols[0].noNulls); Assert.Equal(false, batch.cols[1].noNulls); Assert.Equal(true, batch.cols[2].noNulls); Assert.Equal(true, batch.cols[3].noNulls); Assert.Equal(false, batch.cols[4].noNulls); Assert.Equal(false, batch.cols[5].noNulls); Assert.Equal(false, batch.cols[6].noNulls); Assert.Equal(false, batch.cols[7].noNulls); Assert.Equal(false, batch.cols[8].noNulls); Assert.Equal(false, batch.cols[9].noNulls); } Assert.Equal(false, rr.hasNext()); } }
protected internal virtual void updateDecimal(HiveDecimal value) { throw new NotSupportedException("Can't update decimal"); }
public void testOrcSerDeStatsComplexOldFormat() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(BigRow)); long rawDataSize; using (Stream file = File.OpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(100000) .version(OrcFile.Version.V_0_11) .bufferSize(10000))) { // 1 + 2 + 4 + 8 + 4 + 8 + 5 + 2 + 4 + 3 + 4 + 4 + 4 + 4 + 4 + 3 = 64 writer.addRow(new BigRow(false, (sbyte)1, (short)1024, 65536, Int64.MaxValue, (float)1.0, -15.0, bytes(0, 1, 2, 3, 4), "hi", new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list(inner(3, "good"), inner(4, "bad")), map(), Timestamp.Parse("2000-03-12 15:00:00"), HiveDecimal.Parse( "12345678.6547456"))); // 1 + 2 + 4 + 8 + 4 + 8 + 3 + 4 + 3 + 4 + 4 + 4 + 3 + 4 + 2 + 4 + 3 + 5 + 4 + 5 + 7 + 4 + 7 = // 97 writer.addRow(new BigRow(true, (sbyte)100, (short)2048, 65536, Int64.MaxValue, (float)2.0, -5.0, bytes(), "bye", new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")), map(inner(5, "chani"), inner(1, "mauddib")), Timestamp.Parse("2000-03-11 15:00:00"), HiveDecimal.Parse("12345678.6547452"))); writer.close(); long rowCount = writer.getNumberOfRows(); rawDataSize = writer.getRawDataSize(); Assert.Equal(2, rowCount); Assert.Equal(1740, rawDataSize); } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); Assert.Equal(2, reader.getNumberOfRows()); Assert.Equal(1740, reader.getRawDataSize()); Assert.Equal(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1"))); Assert.Equal(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("byte1"))); Assert.Equal(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("short1"))); Assert.Equal(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("int1"))); Assert.Equal(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("long1"))); Assert.Equal(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("float1"))); Assert.Equal(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("double1"))); Assert.Equal(5, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1"))); Assert.Equal(172, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1"))); Assert.Equal(455, reader.getRawDataSizeOfColumns(Lists.newArrayList("list"))); Assert.Equal(368, reader.getRawDataSizeOfColumns(Lists.newArrayList("map"))); Assert.Equal(364, reader.getRawDataSizeOfColumns(Lists.newArrayList("middle"))); Assert.Equal(80, reader.getRawDataSizeOfColumns(Lists.newArrayList("ts"))); Assert.Equal(224, reader.getRawDataSizeOfColumns(Lists.newArrayList("decimal1"))); Assert.Equal(88, reader.getRawDataSizeOfColumns(Lists.newArrayList("ts", "int1"))); Assert.Equal(1195, reader.getRawDataSizeOfColumns(Lists.newArrayList("middle", "list", "map", "float1"))); Assert.Equal(185, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "byte1", "string1"))); Assert.Equal(rawDataSize, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1", "byte1", "short1", "int1", "long1", "float1", "double1", "bytes1", "string1", "list", "map", "middle", "ts", "decimal1"))); // check the stats ColumnStatistics[] stats = reader.getStatistics(); Assert.Equal(2, stats[1].getNumberOfValues()); Assert.Equal(1, ((BooleanColumnStatistics)stats[1]).getFalseCount()); Assert.Equal(1, ((BooleanColumnStatistics)stats[1]).getTrueCount()); Assert.Equal("count: 2 hasNull: False true: 1", stats[1].ToString()); Assert.Equal(2048, ((IntegerColumnStatistics)stats[3]).getMaximum()); Assert.Equal(1024, ((IntegerColumnStatistics)stats[3]).getMinimum()); Assert.Equal(true, ((IntegerColumnStatistics)stats[3]).isSumDefined()); Assert.Equal(3072, ((IntegerColumnStatistics)stats[3]).getSum()); Assert.Equal("count: 2 hasNull: False min: 1024 max: 2048 sum: 3072", stats[3].ToString()); Assert.Equal(Int64.MaxValue, ((IntegerColumnStatistics)stats[5]).getMaximum()); Assert.Equal(Int64.MaxValue, ((IntegerColumnStatistics)stats[5]).getMinimum()); Assert.Equal(false, ((IntegerColumnStatistics)stats[5]).isSumDefined()); Assert.Equal("count: 2 hasNull: False min: 9223372036854775807 max: 9223372036854775807", stats[5].ToString()); Assert.Equal(-15.0, ((DoubleColumnStatistics)stats[7]).getMinimum()); Assert.Equal(-5.0, ((DoubleColumnStatistics)stats[7]).getMaximum()); Assert.Equal(-20.0, ((DoubleColumnStatistics)stats[7]).getSum(), 5); Assert.Equal("count: 2 hasNull: False min: -15 max: -5 sum: -20", stats[7].ToString()); Assert.Equal(5, ((BinaryColumnStatistics)stats[8]).getSum()); Assert.Equal("count: 2 hasNull: False sum: 5", stats[8].ToString()); Assert.Equal("bye", ((StringColumnStatistics)stats[9]).getMinimum()); Assert.Equal("hi", ((StringColumnStatistics)stats[9]).getMaximum()); Assert.Equal(5, ((StringColumnStatistics)stats[9]).getSum()); Assert.Equal("count: 2 hasNull: False min: bye max: hi sum: 5", stats[9].ToString()); }
private static void setUnion(VectorizedRowBatch batch, int rowId, Timestamp? ts, int? tag, int? i, string s, HiveDecimal dec) { UnionColumnVector union = (UnionColumnVector)batch.cols[1]; if (ts != null) { ((LongColumnVector)batch.cols[0]).vector[rowId] = ts.Value.Nanoseconds; } else { batch.cols[0].isNull[rowId] = true; batch.cols[0].noNulls = false; } if (tag != null) { union.tags[rowId] = tag.Value; if (tag == 0) { if (i != null) { ((LongColumnVector)union.fields[tag.Value]).vector[rowId] = i.Value; } else { union.fields[tag.Value].isNull[rowId] = true; union.fields[tag.Value].noNulls = false; } } else if (tag == 1) { if (s != null) { ((BytesColumnVector)union.fields[tag.Value]).setVal(rowId, s.getBytes()); } else { union.fields[tag.Value].isNull[rowId] = true; union.fields[tag.Value].noNulls = false; } } else { throw new ArgumentException("Bad tag " + tag); } } else { batch.cols[1].isNull[rowId] = true; batch.cols[1].noNulls = false; } if (dec != null) { ((DecimalColumnVector)batch.cols[2]).vector[rowId] = dec; } else { batch.cols[2].isNull[rowId] = true; batch.cols[2].noNulls = false; } }
// Fill the all the vector entries with provided value public void fill(HiveDecimal value) { noNulls = true; isRepeating = true; vector[0] = value; }
public void testDataDump() { using (Stream file = File.OpenWrite(TestFilePath)) { OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf); options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRecord))); options.stripeSize(100000); options.compress(CompressionKind.NONE); options.bufferSize(10000); options.rowIndexStride(1000); using (Writer writer = OrcFile.createWriter(TestFilePath, file, options)) { Dictionary <string, string> m = new Dictionary <string, string>(2); m.Add("k1", "v1"); writer.addRow(new AllTypesRecord( true, (sbyte)10, (short)100, 1000, 10000L, 4.0f, 20.0, HiveDecimal.Parse("4.2222"), new Timestamp(1416967764000L), new Date(1416967764000L), "string", m, new List <int> { 100, 200 }, new AllTypesRecord.Struct(10, "foo"))); m.Clear(); m.Add("k3", "v3"); writer.addRow(new AllTypesRecord( false, (sbyte)20, (short)200, 2000, 20000L, 8.0f, 40.0, HiveDecimal.Parse("2.2222"), new Timestamp(1416967364000L), new Date(1411967764000L), "abcd", m, new List <int> { 200, 300 }, new AllTypesRecord.Struct(20, "bar"))); } } string[] lines; using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory()) { FileDump.Main(TestFilePath, "-d"); lines = capture.Text.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); } Assert.Equal(2, lines.Length); // Don't be fooled by the big space in the middle, this line is quite long Assert.Equal("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello \",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]); Assert.Equal("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world \",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]); }
BigRow(long x) { booleanValue = x % 2 == 0; byteValue = (byte) x; shortValue = (short) x; intValue = (int) x; longValue = x; floatValue = x; doubleValue = x; stringValue = Long.toHexString(x); decimalValue = HiveDecimal.create(x); long millisUtc = x * MILLIS_IN_DAY; millisUtc -= LOCAL_TIMEZONE.getOffset(millisUtc); dateValue = new Date(millisUtc); timestampValue = new Timestamp(millisUtc); }
public void set(int elementNum, HiveDecimal hiveDec) { HiveDecimal checkedDec = HiveDecimal.enforcePrecisionScale(hiveDec, precision, scale); if (checkedDec == null) { noNulls = false; isNull[elementNum] = true; } else { vector[elementNum] = checkedDec; } }
public BigRow(bool b1, sbyte b2, short s1, int i1, long l1, float f1, double d1, byte[] b3, string s2, MiddleStruct m1, List<InnerStruct> l2, Dictionary<string, InnerStruct> m2, Timestamp ts1, HiveDecimal dec1) { this.boolean1 = b1; this.byte1 = b2; this.short1 = s1; this.int1 = i1; this.long1 = l1; this.float1 = f1; this.double1 = d1; this.bytes1 = b3; this.string1 = s2; this.middle = m1; this.list = l2; this.map = m2; this.ts = ts1; this.decimal1 = dec1; }
public MyRecord(bool? bo, sbyte? by, int? i, long? l, short? s, double? d, string k, Timestamp? t, Date? dt, HiveDecimal hd) { this.bo = bo; this.by = by; this.i = i; this.l = l; this.s = s; this.d = d; this.k = k; this.t = t; this.dt = dt; this.hd = hd; }
public AllTypesRecord(bool b, sbyte bt, short s, int i, long l, float f, double d, HiveDecimal de, Timestamp t, Date dt, string str, Dictionary<string, string> m, List<int> a, Struct st) { this.b = b; this.bt = bt; this.s = s; this.i = i; this.l = l; this.f = f; this.d = d; this.de = de; this.t = t; this.dt = dt; this.str = str; this.m = m; this.a = a; this.st = st; }