private long getRawDataSize(TreeWriter child, TypeDescription schema) { long total = 0; long numVals = child.fileStatistics.getNumberOfValues(); switch (schema.getCategory()) { case Category.BOOLEAN: case Category.BYTE: case Category.SHORT: case Category.INT: case Category.FLOAT: return numVals * JavaDataModel.Four; case Category.LONG: case Category.DOUBLE: return numVals * JavaDataModel.Eight; case Category.STRING: case Category.VARCHAR: case Category.CHAR: // ORC strings are converted to java Strings. so use JavaDataModel to // compute the overall size of strings StringColumnStatistics scs = (StringColumnStatistics)child.fileStatistics; numVals = numVals == 0 ? 1 : numVals; int avgStringLen = (int)(scs.getSum() / numVals); return numVals * JavaDataModel.lengthForStringOfLength(avgStringLen); case Category.DECIMAL: return numVals * JavaDataModel.lengthOfDecimal(); case Category.DATE: return numVals * JavaDataModel.lengthOfDate(); case Category.BINARY: // get total length of binary blob BinaryColumnStatistics bcs = (BinaryColumnStatistics)child.fileStatistics; return bcs.getSum(); case Category.TIMESTAMP: return numVals * JavaDataModel.lengthOfTimestamp(); case Category.LIST: case Category.MAP: case Category.UNION: case Category.STRUCT: { TreeWriter[] childWriters = child.getChildrenWriters(); IList<TypeDescription> childTypes = schema.getChildren(); for (int i = 0; i < childWriters.Length; ++i) { total += getRawDataSize(childWriters[i], childTypes[i]); } break; } default: LOG.debug("Unknown object inspector category."); break; } return total; }
private void writeFileStatistics(OrcProto.Footer.Builder builder, TreeWriter writer) { builder.AddStatistics(writer.fileStatistics.serialize()); foreach (TreeWriter child in writer.getChildrenWriters()) { writeFileStatistics(builder, child); } }
private void writeStripeStatistics(OrcProto.StripeStatistics.Builder builder, TreeWriter treeWriter) { treeWriter.fileStatistics.merge(treeWriter.stripeColStatistics); builder.AddColStats(treeWriter.stripeColStatistics.serialize().Build()); treeWriter.stripeColStatistics.reset(); foreach (TreeWriter child in treeWriter.getChildrenWriters()) { writeStripeStatistics(builder, child); } }