private static void writeRowGroupIndexes(JsonWriter writer, int col, OrcProto.RowIndex[] rowGroupIndex) { OrcProto.RowIndex index; if (rowGroupIndex == null || (col >= rowGroupIndex.Length) || ((index = rowGroupIndex[col]) == null)) { return; } writer.key("rowGroupIndexes").array(); for (int entryIx = 0; entryIx < index.EntryCount; ++entryIx) { writer.newObject(); writer.key("entryId").value(entryIx); OrcProto.RowIndexEntry entry = index.EntryList[entryIx]; if (entry == null) { continue; } OrcProto.ColumnStatistics colStats = entry.Statistics; writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(colStats)); writer.key("positions").array(); for (int posIx = 0; posIx < entry.PositionsCount; ++posIx) { writer.value(entry.PositionsList[posIx]); } writer.endArray(); writer.endObject(); } writer.endArray(); }
public BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) : base(stats) { OrcProto.BinaryStatistics binStats = stats.BinaryStatistics; if (binStats.HasSum) { sum = binStats.Sum; } }
private static long getRawDataSizeOfColumn(int colIdx, IList <OrcProto.Type> types, IList <OrcProto.ColumnStatistics> stats) { OrcProto.ColumnStatistics colStat = stats[colIdx]; long numVals = (long)colStat.NumberOfValues; OrcProto.Type type = types[colIdx]; switch (type.Kind) { case OrcProto.Type.Types.Kind.BINARY: // old orc format doesn't support binary statistics. checking for binary // statistics is not required as protocol buffers takes care of it. return(colStat.BinaryStatistics.Sum); case OrcProto.Type.Types.Kind.STRING: case OrcProto.Type.Types.Kind.CHAR: case OrcProto.Type.Types.Kind.VARCHAR: // old orc format doesn't support sum for string statistics. checking for // existence is not required as protocol buffers takes care of it. // ORC strings are deserialized to java strings. so use java data model's // string size numVals = numVals == 0 ? 1 : numVals; int avgStrLen = (int)(colStat.StringStatistics.Sum / numVals); return(numVals * JavaDataModel.lengthForStringOfLength(avgStrLen)); case OrcProto.Type.Types.Kind.TIMESTAMP: return(numVals * JavaDataModel.lengthOfTimestamp()); case OrcProto.Type.Types.Kind.DATE: return(numVals * JavaDataModel.lengthOfDate()); case OrcProto.Type.Types.Kind.DECIMAL: return(numVals * JavaDataModel.lengthOfDecimal()); case OrcProto.Type.Types.Kind.DOUBLE: case OrcProto.Type.Types.Kind.LONG: return(numVals * JavaDataModel.Eight); case OrcProto.Type.Types.Kind.FLOAT: case OrcProto.Type.Types.Kind.INT: case OrcProto.Type.Types.Kind.SHORT: case OrcProto.Type.Types.Kind.BOOLEAN: case OrcProto.Type.Types.Kind.BYTE: return(numVals * JavaDataModel.Four); default: LOG.debug("Unknown primitive category: " + type.Kind); break; } return(0); }
public TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) : base(stats) { OrcProto.TimestampStatistics timestampStats = stats.TimestampStatistics; // min,max values serialized/deserialized as int (milliseconds since epoch) if (timestampStats.HasMaximum) { maximum = new Timestamp(timestampStats.Maximum); } if (timestampStats.HasMinimum) { minimum = new Timestamp(timestampStats.Minimum); } }
public DateStatisticsImpl(OrcProto.ColumnStatistics stats) : base(stats) { OrcProto.DateStatistics dateStats = stats.DateStatistics; // min,max values serialized/deserialized as int (days since epoch) if (dateStats.HasMaximum) { maximum = new Date(dateStats.Maximum); } if (dateStats.HasMinimum) { minimum = new Date(dateStats.Minimum); } }
protected ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) { if (stats.HasNumberOfValues) { count = stats.NumberOfValues; } if (stats.HasHasNull) { _hasNull = stats.HasNull; } else { _hasNull = true; } }
public StringStatisticsImpl(OrcProto.ColumnStatistics stats) : base(stats) { OrcProto.StringStatistics str = stats.StringStatistics; if (str.HasMaximum) { maximum = str.Maximum; } if (str.HasMinimum) { minimum = str.Minimum; } if (str.HasSum) { sum = str.Sum; } }
public DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) : base(stats) { OrcProto.DoubleStatistics dbl = stats.DoubleStatistics; if (dbl.HasMinimum) { hasMinimum = true; minimum = dbl.Minimum; } if (dbl.HasMaximum) { maximum = dbl.Maximum; } if (dbl.HasSum) { sum = dbl.Sum; } }
public DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) : base(stats) { OrcProto.DecimalStatistics dec = stats.DecimalStatistics; if (dec.HasMaximum) { maximum = HiveDecimal.Parse(dec.Maximum); } if (dec.HasMinimum) { minimum = HiveDecimal.Parse(dec.Minimum); } if (dec.HasSum) { sum = HiveDecimal.Parse(dec.Sum); } else { sum = null; } }
public static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) { if (stats.HasBucketStatistics) { return(new BooleanStatisticsImpl(stats)); } else if (stats.HasIntStatistics) { return(new IntegerStatisticsImpl(stats)); } else if (stats.HasDoubleStatistics) { return(new DoubleStatisticsImpl(stats)); } else if (stats.HasStringStatistics) { return(new StringStatisticsImpl(stats)); } else if (stats.HasDecimalStatistics) { return(new DecimalStatisticsImpl(stats)); } else if (stats.HasDateStatistics) { return(new DateStatisticsImpl(stats)); } else if (stats.HasTimestampStatistics) { return(new TimestampStatisticsImpl(stats)); } else if (stats.HasBinaryStatistics) { return(new BinaryStatisticsImpl(stats)); } else { return(new ColumnStatisticsImpl(stats)); } }
public IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) : base(stats) { OrcProto.IntegerStatistics intStat = stats.IntStatistics; if (intStat.HasMinimum) { hasMinimum = true; minimum = intStat.Minimum; } if (intStat.HasMaximum) { maximum = intStat.Maximum; } if (intStat.HasSum) { sum = intStat.Sum; } else { overflow = true; } }
public BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) : base(stats) { trueCount = stats.BucketStatistics.CountList[0]; }