public override void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { ListColumnVector input = (ListColumnVector)inputVector; if (input.isRepeating) { inputElementNum = 0; } if (!input.noNulls && input.isNull[inputElementNum]) { isNull[outElementNum] = true; noNulls = false; } else { isNull[outElementNum] = false; int offset = childCount; int length = (int)input.lengths[inputElementNum]; int inputOffset = (int)input.offsets[inputElementNum]; offsets[outElementNum] = offset; childCount += length; lengths[outElementNum] = length; child.ensureSize(childCount, true); for (int i = 0; i < length; ++i) { child.setElement(i + offset, inputOffset + i, input.child); } } }
/** * Constructor for MapColumnVector * * @param len Vector length * @param keys The keys column vector * @param values The values column vector */ public MapColumnVector( int len = VectorizedRowBatch.DEFAULT_SIZE, ColumnVector keys = null, ColumnVector values = null) : base(len) { this.keys = keys; this.values = values; }
public override void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { if (inputVector.isRepeating) { inputElementNum = 0; } if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { isNull[outElementNum] = false; UnionColumnVector input = (UnionColumnVector)inputVector; tags[outElementNum] = input.tags[inputElementNum]; fields[tags[outElementNum]].setElement(outElementNum, inputElementNum, input.fields[tags[outElementNum]]); } else { noNulls = false; isNull[outElementNum] = true; } }
internal override void writeBatch(ColumnVector vector, int offset, int length) { base.writeBatch(vector, offset, length); BytesColumnVector vec = (BytesColumnVector)vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { int itemLength = Math.Min(vec.length[0], maxLength); if (useDictionaryEncoding) { int id = dictionary.add(vec.vector[0], vec.start[0], itemLength); for (int i = 0; i < length; ++i) { rows.add(id); } } else { for (int i = 0; i < length; ++i) { directStreamOutput.Write(vec.vector[0], vec.start[0], itemLength); directLengthOutput.write(itemLength); } } indexStatistics.updateString(vec.vector[0], vec.start[0], itemLength, length); if (createBloomFilter) { bloomFilter.addBytes(vec.vector[0], vec.start[0], itemLength); } } } else { for (int i = 0; i < length; ++i) { if (vec.noNulls || !vec.isNull[i + offset]) { int itemLength = Math.Min(vec.length[offset + i], maxLength); if (useDictionaryEncoding) { rows.add(dictionary.add(vec.vector[offset + i], vec.start[offset + i], itemLength)); } else { directStreamOutput.Write(vec.vector[offset + i], vec.start[offset + i], itemLength); directLengthOutput.write(itemLength); } indexStatistics.updateString(vec.vector[offset + i], vec.start[offset + i], itemLength, 1); if (createBloomFilter) { bloomFilter.addBytes(vec.vector[offset + i], vec.start[offset + i], itemLength); } } } } }
public override void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { if (inputVector.isRepeating) { inputElementNum = 0; } if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { isNull[outElementNum] = false; ColumnVector[] inputFields = ((StructColumnVector)inputVector).fields; for (int i = 0; i < inputFields.Length; ++i) { fields[i].setElement(outElementNum, inputElementNum, inputFields[i]); } } else { noNulls = false; isNull[outElementNum] = true; } }
/** * Write the values from the given vector from offset for length elements. * @param vector the vector to write from * @param offset the first value from the vector to write * @param length the number of values from the vector to write * @throws IOException */ internal virtual void writeBatch(ColumnVector vector, int offset, int length) { if (vector.noNulls) { indexStatistics.increment(length); if (isPresent != null) { for (int i = 0; i < length; ++i) { isPresent.write(1); } } } else { if (vector.isRepeating) { bool isNull = vector.isNull[0]; if (isPresent != null) { for (int i = 0; i < length; ++i) { isPresent.write(isNull ? 0 : 1); } } if (isNull) { foundNulls = true; indexStatistics.setNull(); } else { indexStatistics.increment(length); } } else { // count the number of non-null values int nonNullCount = 0; for (int i = 0; i < length; ++i) { bool isNull = vector.isNull[i + offset]; if (!isNull) { nonNullCount += 1; } if (isPresent != null) { isPresent.write(isNull ? 0 : 1); } } indexStatistics.increment(nonNullCount); if (nonNullCount != length) { foundNulls = true; indexStatistics.setNull(); } } } }
internal override void writeBatch(ColumnVector vector, int offset, int length) { base.writeBatch(vector, offset, length); UnionColumnVector vec = (UnionColumnVector)vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { byte tag = (byte)vec.tags[0]; for (int i = 0; i < length; ++i) { tags.write(tag); } if (createBloomFilter) { bloomFilter.addLong(tag); } childrenWriters[tag].writeBatch(vec.fields[tag], offset, length); } } else { // write the records in runs of the same tag byte prevTag = 0; int currentRun = 0; bool started = false; for (int i = 0; i < length; ++i) { if (!vec.isNull[i + offset]) { byte tag = (byte)vec.tags[offset + i]; tags.write(tag); if (!started) { started = true; currentRun = i; prevTag = tag; } else if (tag != prevTag) { childrenWriters[prevTag].writeBatch(vec.fields[prevTag], offset + currentRun, i - currentRun); currentRun = i; prevTag = tag; } } else if (started) { started = false; childrenWriters[prevTag].writeBatch(vec.fields[prevTag], offset + currentRun, i - currentRun); } } if (started) { childrenWriters[prevTag].writeBatch(vec.fields[prevTag], offset + currentRun, length - currentRun); } } }
public override void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { if (inputVector.isRepeating) { inputElementNum = 0; } if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { HiveDecimal hiveDec = ((DecimalColumnVector)inputVector).vector[inputElementNum]; if (hiveDec == null) { isNull[outElementNum] = true; noNulls = false; } else { isNull[outElementNum] = false; vector[outElementNum] = hiveDec; } } else { isNull[outElementNum] = true; noNulls = false; } }
internal override void writeBatch(ColumnVector vector, int offset, int length) { base.writeBatch(vector, offset, length); LongColumnVector vec = (LongColumnVector)vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { long value = vec.vector[0]; long valueMillis = value / MILLIS_PER_NANO; indexStatistics.updateTimestamp(valueMillis); if (createBloomFilter) { bloomFilter.addLong(valueMillis); } long secs = value / NANOS_PER_SECOND - base_timestamp; long nano = formatNanos((int)(value % NANOS_PER_SECOND)); for (int i = 0; i < length; ++i) { seconds.write(secs); nanos.write(nano); } } } else { for (int i = 0; i < length; ++i) { if (vec.noNulls || !vec.isNull[i + offset]) { long value = vec.vector[i + offset]; long valueMillis = value / MILLIS_PER_NANO; long valueSecs = value / NANOS_PER_SECOND - base_timestamp; int valueNanos = (int)(value % NANOS_PER_SECOND); if (valueNanos < 0) { valueNanos += NANOS_PER_SECOND; } seconds.write(valueSecs); nanos.write(formatNanos(valueNanos)); indexStatistics.updateTimestamp(valueMillis); if (createBloomFilter) { bloomFilter.addLong(valueMillis); } } } } }
internal override void writeBatch(ColumnVector vector, int offset, int length) { base.writeBatch(vector, offset, length); LongColumnVector vec = (LongColumnVector)vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { long value = vec.vector[0]; indexStatistics.updateInteger(value, length); if (createBloomFilter) { bloomFilter.addLong(value); } for (int i = 0; i < length; ++i) { writer.write(value); } } } else { for (int i = 0; i < length; ++i) { if (vec.noNulls || !vec.isNull[i + offset]) { long value = vec.vector[i + offset]; writer.write(value); indexStatistics.updateInteger(value, 1); if (createBloomFilter) { bloomFilter.addLong(value); } } } } }
/** * Constructor for ListColumnVector. * * @param len Vector length * @param child The child vector */ public ListColumnVector(int len = VectorizedRowBatch.DEFAULT_SIZE, ColumnVector child = null) : base(len) { this.child = child; }
internal override void writeBatch(ColumnVector vector, int offset, int length) { base.writeBatch(vector, offset, length); DecimalColumnVector vec = (DecimalColumnVector)vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { HiveDecimal value = vec.vector[0]; indexStatistics.updateDecimal(value); if (createBloomFilter) { bloomFilter.addString(value.ToString()); } for (int i = 0; i < length; ++i) { SerializationUtils.writeBigInteger(valueStream, value.unscaledValue()); scaleStream.write(value.scale()); } } } else { for (int i = 0; i < length; ++i) { if (vec.noNulls || !vec.isNull[i + offset]) { HiveDecimal value = vec.vector[i + offset]; SerializationUtils.writeBigInteger(valueStream, value.unscaledValue()); scaleStream.write(value.scale()); indexStatistics.updateDecimal(value); if (createBloomFilter) { bloomFilter.addString(value.ToString()); } } } } }
internal override void writeBatch(ColumnVector vector, int offset, int length) { base.writeBatch(vector, offset, length); DoubleColumnVector vec = (DoubleColumnVector)vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { float value = (float)vec.vector[0]; indexStatistics.updateDouble(value); if (createBloomFilter) { bloomFilter.addDouble(value); } for (int i = 0; i < length; ++i) { utils.writeFloat(stream, value); } } } else { for (int i = 0; i < length; ++i) { if (vec.noNulls || !vec.isNull[i + offset]) { float value = (float)vec.vector[i + offset]; utils.writeFloat(stream, value); indexStatistics.updateDouble(value); if (createBloomFilter) { bloomFilter.addDouble(value); } } } } }
internal override void writeBatch(ColumnVector vector, int offset, int length) { base.writeBatch(vector, offset, length); BytesColumnVector vec = (BytesColumnVector)vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { byte[] ptr; int ptrOffset; if (vec.length[0] >= itemLength) { ptr = vec.vector[0]; ptrOffset = vec.start[0]; } else { ptr = padding; ptrOffset = 0; Array.Copy(vec.vector[0], vec.start[0], ptr, 0, vec.length[0]); Arrays.fill(ptr, vec.length[0], itemLength, (byte)' '); } if (useDictionaryEncoding) { int id = dictionary.add(ptr, ptrOffset, itemLength); for (int i = 0; i < length; ++i) { rows.add(id); } } else { for (int i = 0; i < length; ++i) { directStreamOutput.Write(ptr, ptrOffset, itemLength); directLengthOutput.write(itemLength); } } indexStatistics.updateString(ptr, ptrOffset, itemLength, length); if (createBloomFilter) { bloomFilter.addBytes(ptr, ptrOffset, itemLength); } } } else { for (int i = 0; i < length; ++i) { if (vec.noNulls || !vec.isNull[i + offset]) { byte[] ptr; int ptrOffset; if (vec.length[offset + i] >= itemLength) { ptr = vec.vector[offset + i]; ptrOffset = vec.start[offset + i]; } else { // it is the wrong length, so copy it ptr = padding; ptrOffset = 0; Array.Copy(vec.vector[offset + i], vec.start[offset + i], ptr, 0, vec.length[offset + i]); Arrays.fill(ptr, vec.length[offset + i], itemLength, (byte)' '); } if (useDictionaryEncoding) { rows.add(dictionary.add(ptr, ptrOffset, itemLength)); } else { directStreamOutput.Write(ptr, ptrOffset, itemLength); directLengthOutput.write(itemLength); } indexStatistics.updateString(ptr, ptrOffset, itemLength, 1); if (createBloomFilter) { bloomFilter.addBytes(ptr, ptrOffset, itemLength); } } } } }
internal override void writeBatch(ColumnVector vector, int offset, int length) { base.writeBatch(vector, offset, length); LongColumnVector vec = (LongColumnVector)vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { int value = vec.vector[0] == 0 ? 0 : 1; indexStatistics.updateBoolean(value != 0, length); for (int i = 0; i < length; ++i) { writer.write(value); } } } else { for (int i = 0; i < length; ++i) { if (vec.noNulls || !vec.isNull[i + offset]) { int value = vec.vector[i + offset] == 0 ? 0 : 1; writer.write(value); indexStatistics.updateBoolean(value != 0, 1); } } } }
internal override void writeBatch(ColumnVector vector, int offset, int length) { base.writeBatch(vector, offset, length); BytesColumnVector vec = (BytesColumnVector)vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { for (int i = 0; i < length; ++i) { stream.Write(vec.vector[0], vec.start[0], vec.length[0]); this.length.write(vec.length[0]); } indexStatistics.updateBinary(vec.vector[0], vec.start[0], vec.length[0], length); if (createBloomFilter) { bloomFilter.addBytes(vec.vector[0], vec.start[0], vec.length[0]); } } } else { for (int i = 0; i < length; ++i) { if (vec.noNulls || !vec.isNull[i + offset]) { stream.Write(vec.vector[offset + i], vec.start[offset + i], vec.length[offset + i]); this.length.write(vec.length[offset + i]); indexStatistics.updateBinary(vec.vector[offset + i], vec.start[offset + i], vec.length[offset + i], 1); if (createBloomFilter) { bloomFilter.addBytes(vec.vector[offset + i], vec.start[offset + i], vec.length[offset + i]); } } } } }
public override void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { if (inputVector.isRepeating) { inputElementNum = 0; } if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { isNull[outElementNum] = false; BytesColumnVector @in = (BytesColumnVector)inputVector; setVal(outElementNum, @in.vector[inputElementNum], @in.start[inputElementNum], @in.length[inputElementNum]); } else { isNull[outElementNum] = true; noNulls = false; } }
public override void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { if (inputVector.isRepeating) { inputElementNum = 0; } if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { isNull[outElementNum] = false; vector[outElementNum] = ((DoubleColumnVector)inputVector).vector[inputElementNum]; } else { isNull[outElementNum] = true; noNulls = false; } }
internal override void writeBatch(ColumnVector vector, int offset, int length) { base.writeBatch(vector, offset, length); MapColumnVector vec = (MapColumnVector)vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { int childOffset = (int)vec.offsets[0]; int childLength = (int)vec.lengths[0]; for (int i = 0; i < length; ++i) { lengths.write(childLength); childrenWriters[0].writeBatch(vec.keys, childOffset, childLength); childrenWriters[1].writeBatch(vec.values, childOffset, childLength); } if (createBloomFilter) { bloomFilter.addLong(childLength); } } } else { // write the elements in runs int currentOffset = 0; int currentLength = 0; for (int i = 0; i < length; ++i) { if (!vec.isNull[i + offset]) { int nextLength = (int)vec.lengths[offset + i]; int nextOffset = (int)vec.offsets[offset + i]; lengths.write(nextLength); if (currentLength == 0) { currentOffset = nextOffset; currentLength = nextLength; } else if (currentOffset + currentLength != nextOffset) { childrenWriters[0].writeBatch(vec.keys, currentOffset, currentLength); childrenWriters[1].writeBatch(vec.values, currentOffset, currentLength); currentOffset = nextOffset; currentLength = nextLength; } else { currentLength += nextLength; } } } if (currentLength != 0) { childrenWriters[0].writeBatch(vec.keys, currentOffset, currentLength); childrenWriters[1].writeBatch(vec.values, currentOffset, currentLength); } } }
private ColumnVector createColumn() { switch (category) { case Category.BOOLEAN: case Category.BYTE: case Category.SHORT: case Category.INT: case Category.LONG: case Category.TIMESTAMP: case Category.DATE: return new LongColumnVector(); case Category.FLOAT: case Category.DOUBLE: return new DoubleColumnVector(); case Category.DECIMAL: return new DecimalColumnVector(precision, scale); case Category.STRING: case Category.BINARY: case Category.CHAR: case Category.VARCHAR: return new BytesColumnVector(); case Category.STRUCT: { ColumnVector[] fieldVector = new ColumnVector[children.Count]; for (int i = 0; i < fieldVector.Length; ++i) { fieldVector[i] = children[i].createColumn(); } return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, fieldVector); } case Category.UNION: { ColumnVector[] fieldVector = new ColumnVector[children.Count]; for (int i = 0; i < fieldVector.Length; ++i) { fieldVector[i] = children[i].createColumn(); } return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE, fieldVector); } case Category.LIST: return new ListColumnVector(VectorizedRowBatch.DEFAULT_SIZE, children[0].createColumn()); case Category.MAP: return new MapColumnVector(VectorizedRowBatch.DEFAULT_SIZE, children[0].createColumn(), children[1].createColumn()); default: throw new ArgumentException("Unknown type " + category); } }
internal override void writeBatch(ColumnVector vector, int offset, int length) { base.writeBatch(vector, offset, length); StructColumnVector vec = (StructColumnVector)vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { writeFields(vec, childrenWriters, offset, length); } } else if (vector.noNulls) { writeFields(vec, childrenWriters, offset, length); } else { // write the records in runs int currentRun = 0; bool started = false; for (int i = 0; i < length; ++i) { if (!vec.isNull[i + offset]) { if (!started) { started = true; currentRun = i; } } else if (started) { started = false; writeFields(vec, childrenWriters, offset + currentRun, i - currentRun); } } if (started) { writeFields(vec, childrenWriters, offset + currentRun, length - currentRun); } } }
/** * Set the element in this column vector from the given input vector. * This method can assume that the output does not have isRepeating set. */ public abstract void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector);