Exemplo n.º 1
0
        private static void writeRowGroupIndexes(JsonWriter writer, int col,
                                                 OrcProto.RowIndex[] rowGroupIndex)
        {
            OrcProto.RowIndex index;
            if (rowGroupIndex == null || (col >= rowGroupIndex.Length) ||
                ((index = rowGroupIndex[col]) == null))
            {
                return;
            }

            writer.key("rowGroupIndexes").array();
            for (int entryIx = 0; entryIx < index.EntryCount; ++entryIx)
            {
                writer.newObject();
                writer.key("entryId").value(entryIx);
                OrcProto.RowIndexEntry entry = index.EntryList[entryIx];
                if (entry == null)
                {
                    continue;
                }
                OrcProto.ColumnStatistics colStats = entry.Statistics;
                writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(colStats));
                writer.key("positions").array();
                for (int posIx = 0; posIx < entry.PositionsCount; ++posIx)
                {
                    writer.value(entry.PositionsList[posIx]);
                }
                writer.endArray();
                writer.endObject();
            }
            writer.endArray();
        }
 public BinaryStatisticsImpl(OrcProto.ColumnStatistics stats)
     : base(stats)
 {
     OrcProto.BinaryStatistics binStats = stats.BinaryStatistics;
     if (binStats.HasSum)
     {
         sum = binStats.Sum;
     }
 }
Exemplo n.º 3
0
        private static long getRawDataSizeOfColumn(int colIdx, IList <OrcProto.Type> types,
                                                   IList <OrcProto.ColumnStatistics> stats)
        {
            OrcProto.ColumnStatistics colStat = stats[colIdx];
            long numVals = (long)colStat.NumberOfValues;

            OrcProto.Type type = types[colIdx];

            switch (type.Kind)
            {
            case OrcProto.Type.Types.Kind.BINARY:
                // old orc format doesn't support binary statistics. checking for binary
                // statistics is not required as protocol buffers takes care of it.
                return(colStat.BinaryStatistics.Sum);

            case OrcProto.Type.Types.Kind.STRING:
            case OrcProto.Type.Types.Kind.CHAR:
            case OrcProto.Type.Types.Kind.VARCHAR:
                // old orc format doesn't support sum for string statistics. checking for
                // existence is not required as protocol buffers takes care of it.

                // ORC strings are deserialized to java strings. so use java data model's
                // string size
                numVals = numVals == 0 ? 1 : numVals;
                int avgStrLen = (int)(colStat.StringStatistics.Sum / numVals);
                return(numVals * JavaDataModel.lengthForStringOfLength(avgStrLen));

            case OrcProto.Type.Types.Kind.TIMESTAMP:
                return(numVals * JavaDataModel.lengthOfTimestamp());

            case OrcProto.Type.Types.Kind.DATE:
                return(numVals * JavaDataModel.lengthOfDate());

            case OrcProto.Type.Types.Kind.DECIMAL:
                return(numVals * JavaDataModel.lengthOfDecimal());

            case OrcProto.Type.Types.Kind.DOUBLE:
            case OrcProto.Type.Types.Kind.LONG:
                return(numVals * JavaDataModel.Eight);

            case OrcProto.Type.Types.Kind.FLOAT:
            case OrcProto.Type.Types.Kind.INT:
            case OrcProto.Type.Types.Kind.SHORT:
            case OrcProto.Type.Types.Kind.BOOLEAN:
            case OrcProto.Type.Types.Kind.BYTE:
                return(numVals * JavaDataModel.Four);

            default:
                LOG.debug("Unknown primitive category: " + type.Kind);
                break;
            }

            return(0);
        }
 public TimestampStatisticsImpl(OrcProto.ColumnStatistics stats)
     : base(stats)
 {
     OrcProto.TimestampStatistics timestampStats = stats.TimestampStatistics;
     // min,max values serialized/deserialized as int (milliseconds since epoch)
     if (timestampStats.HasMaximum)
     {
         maximum = new Timestamp(timestampStats.Maximum);
     }
     if (timestampStats.HasMinimum)
     {
         minimum = new Timestamp(timestampStats.Minimum);
     }
 }
 public DateStatisticsImpl(OrcProto.ColumnStatistics stats)
     : base(stats)
 {
     OrcProto.DateStatistics dateStats = stats.DateStatistics;
     // min,max values serialized/deserialized as int (days since epoch)
     if (dateStats.HasMaximum)
     {
         maximum = new Date(dateStats.Maximum);
     }
     if (dateStats.HasMinimum)
     {
         minimum = new Date(dateStats.Minimum);
     }
 }
        protected ColumnStatisticsImpl(OrcProto.ColumnStatistics stats)
        {
            if (stats.HasNumberOfValues)
            {
                count = stats.NumberOfValues;
            }

            if (stats.HasHasNull)
            {
                _hasNull = stats.HasNull;
            }
            else
            {
                _hasNull = true;
            }
        }
 public StringStatisticsImpl(OrcProto.ColumnStatistics stats)
     : base(stats)
 {
     OrcProto.StringStatistics str = stats.StringStatistics;
     if (str.HasMaximum)
     {
         maximum = str.Maximum;
     }
     if (str.HasMinimum)
     {
         minimum = str.Minimum;
     }
     if (str.HasSum)
     {
         sum = str.Sum;
     }
 }
 public DoubleStatisticsImpl(OrcProto.ColumnStatistics stats)
     : base(stats)
 {
     OrcProto.DoubleStatistics dbl = stats.DoubleStatistics;
     if (dbl.HasMinimum)
     {
         hasMinimum = true;
         minimum    = dbl.Minimum;
     }
     if (dbl.HasMaximum)
     {
         maximum = dbl.Maximum;
     }
     if (dbl.HasSum)
     {
         sum = dbl.Sum;
     }
 }
 public DecimalStatisticsImpl(OrcProto.ColumnStatistics stats)
     : base(stats)
 {
     OrcProto.DecimalStatistics dec = stats.DecimalStatistics;
     if (dec.HasMaximum)
     {
         maximum = HiveDecimal.Parse(dec.Maximum);
     }
     if (dec.HasMinimum)
     {
         minimum = HiveDecimal.Parse(dec.Minimum);
     }
     if (dec.HasSum)
     {
         sum = HiveDecimal.Parse(dec.Sum);
     }
     else
     {
         sum = null;
     }
 }
 public static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats)
 {
     if (stats.HasBucketStatistics)
     {
         return(new BooleanStatisticsImpl(stats));
     }
     else if (stats.HasIntStatistics)
     {
         return(new IntegerStatisticsImpl(stats));
     }
     else if (stats.HasDoubleStatistics)
     {
         return(new DoubleStatisticsImpl(stats));
     }
     else if (stats.HasStringStatistics)
     {
         return(new StringStatisticsImpl(stats));
     }
     else if (stats.HasDecimalStatistics)
     {
         return(new DecimalStatisticsImpl(stats));
     }
     else if (stats.HasDateStatistics)
     {
         return(new DateStatisticsImpl(stats));
     }
     else if (stats.HasTimestampStatistics)
     {
         return(new TimestampStatisticsImpl(stats));
     }
     else if (stats.HasBinaryStatistics)
     {
         return(new BinaryStatisticsImpl(stats));
     }
     else
     {
         return(new ColumnStatisticsImpl(stats));
     }
 }
 public IntegerStatisticsImpl(OrcProto.ColumnStatistics stats)
     : base(stats)
 {
     OrcProto.IntegerStatistics intStat = stats.IntStatistics;
     if (intStat.HasMinimum)
     {
         hasMinimum = true;
         minimum    = intStat.Minimum;
     }
     if (intStat.HasMaximum)
     {
         maximum = intStat.Maximum;
     }
     if (intStat.HasSum)
     {
         sum = intStat.Sum;
     }
     else
     {
         overflow = true;
     }
 }
 public BooleanStatisticsImpl(OrcProto.ColumnStatistics stats)
     : base(stats)
 {
     trueCount = stats.BucketStatistics.CountList[0];
 }