private static KuduSchema GenerateProjectionSchema( KuduSchema schema, List <string>?projectedColumnNames, List <int>?projectedColumnIndexes, bool includeDeletedColumn) { var numColumns = projectedColumnNames?.Count ?? projectedColumnIndexes?.Count ?? schema.Columns.Count; if (includeDeletedColumn) { numColumns++; } // Map the column names to actual columns in the table schema. // If the user set this to 'null', we scan all columns. var columns = new List <ColumnSchema>(numColumns); if (projectedColumnNames is not null) { foreach (string columnName in projectedColumnNames) { var columnSchema = schema.GetColumn(columnName); columns.Add(columnSchema); } } else if (projectedColumnIndexes is not null) { foreach (int columnIndex in projectedColumnIndexes) { var columnSchema = schema.GetColumn(columnIndex); columns.Add(columnSchema); } } else { columns.AddRange(schema.Columns); } int isDeletedIndex = -1; if (includeDeletedColumn) { var deletedColumn = GenerateIsDeletedColumn(schema); columns.Add(deletedColumn); isDeletedIndex = columns.Count - 1; } return(new KuduSchema(columns, isDeletedIndex)); }
private static List <ColumnSchemaPB> ToColumnSchemaPbs(KuduSchema schema) { var columnSchemas = schema.Columns; var deletedColumn = schema.HasIsDeleted ? schema.GetColumn(schema.IsDeletedIndex) : null; var columnSchemaPbs = new List <ColumnSchemaPB>(columnSchemas.Count); foreach (var columnSchema in columnSchemas) { var isDeleted = columnSchema == deletedColumn; var columnSchemaPb = ToColumnSchemaPb(columnSchema, isDeleted); columnSchemaPbs.Add(columnSchemaPb); } return(columnSchemaPbs); }
private static IReadOnlyList <int> ComputeProjectedColumnIndexes( ScanTokenPB message, KuduSchema schema) { if (message.ProjectedColumnIdx.Count != 0) { return(message.ProjectedColumnIdx); } var columns = new List <int>(message.ProjectedColumns.Count); foreach (var colSchemaFromPb in message.ProjectedColumns) { int colIdx = colSchemaFromPb.HasId && schema.HasColumnIds ? schema.GetColumnIndex((int)colSchemaFromPb.Id) : schema.GetColumnIndex(colSchemaFromPb.Name); var colSchema = schema.GetColumn(colIdx); if (colSchemaFromPb.Type != (DataType)colSchema.Type) { throw new Exception($"Invalid type {colSchemaFromPb.Type} " + $"for column '{colSchemaFromPb.Name}' in scan token, " + $"expected: {colSchema.Type}"); } if (colSchemaFromPb.IsNullable != colSchema.IsNullable) { throw new Exception($"Invalid nullability for column '{colSchemaFromPb.Name}' " + $"in scan token, expected: {(colSchema.IsNullable ? "NULLABLE" : "NOT NULL")}"); } columns.Add(colIdx); } return(columns); }
private static ResultSet CreateResultSet( KuduMessage message, KuduSchema schema, ColumnarRowBlockPB data) { var columns = data.Columns; var numColumns = columns.Count; if (numColumns != schema.Columns.Count) { ThrowColumnCountMismatchException(schema.Columns.Count, numColumns); } if (data.Columns.Count == 0 || data.NumRows == 0) { // Empty projection, usually used for quick row counting. return(CreateEmptyResultSet(schema, data.NumRows)); } var numRows = checked ((int)data.NumRows); var bufferLength = message.Buffer.Length; var nonNullBitmapLength = KuduEncoder.BitsToBytes(numRows); var dataSidecarOffsets = new SidecarOffset[numColumns]; var varlenDataSidecarOffsets = new SidecarOffset[numColumns]; var nonNullBitmapSidecarOffsets = new SidecarOffset[numColumns]; for (int i = 0; i < numColumns; i++) { var column = columns[i]; var columnSchema = schema.GetColumn(i); if (column.HasDataSidecar) { var offset = message.GetSidecarOffset(column.DataSidecar); var length = GetColumnDataSize(columnSchema, numRows); ValidateSidecar(offset, length, bufferLength); dataSidecarOffsets[i] = offset; } else { ThrowMissingDataSidecarException(columnSchema); } if (column.HasVarlenDataSidecar) { var offset = message.GetSidecarOffset(column.VarlenDataSidecar); varlenDataSidecarOffsets[i] = offset; } if (column.HasNonNullBitmapSidecar) { var offset = message.GetSidecarOffset(column.NonNullBitmapSidecar); ValidateSidecar(offset, nonNullBitmapLength, bufferLength); nonNullBitmapSidecarOffsets[i] = offset; } else { nonNullBitmapSidecarOffsets[i] = new SidecarOffset(-1, 0); } } var buffer = message.TakeMemory(); return(new ResultSet( buffer, schema, data.NumRows, dataSidecarOffsets, varlenDataSidecarOffsets, nonNullBitmapSidecarOffsets)); }
// Used to convert the rowwise data to the newer columnar format, // to avoid virtual calls on ResultSet. // This is only used if the Kudu server is 1.11 or older. public static ResultSet Convert( KuduMessage message, KuduSchema schema, RowwiseRowBlockPB rowPb) { var numColumns = schema.Columns.Count; int columnOffsetsSize = numColumns; if (schema.HasNullableColumns) { columnOffsetsSize++; } var columnOffsets = new int[columnOffsetsSize]; int currentOffset = 0; columnOffsets[0] = currentOffset; // Pre-compute the columns offsets in rowData for easier lookups later. // If the schema has nullables, we also add the offset for the null bitmap at the end. for (int i = 1; i < columnOffsetsSize; i++) { ColumnSchema column = schema.GetColumn(i - 1); int previousSize = column.Size; columnOffsets[i] = previousSize + currentOffset; currentOffset += previousSize; } var rowData = GetRowData(message, rowPb); var indirectData = GetIndirectData(message, rowPb); int nonNullBitmapOffset = columnOffsets[columnOffsets.Length - 1]; int rowSize = schema.RowSize; int numRows = rowPb.NumRows; var dataSidecarOffsets = new SidecarOffset[numColumns]; var varlenDataSidecarOffsets = new SidecarOffset[numColumns]; var nonNullBitmapSidecarOffsets = new SidecarOffset[numColumns]; int nonNullBitmapSize = KuduEncoder.BitsToBytes(numRows); int offset = 0; for (int i = 0; i < numColumns; i++) { var column = schema.GetColumn(i); var dataSize = column.IsFixedSize ? column.Size * numRows : (4 * numRows) + 4; dataSidecarOffsets[i] = new SidecarOffset(offset, dataSize); offset += dataSize; if (column.IsNullable) { nonNullBitmapSidecarOffsets[i] = new SidecarOffset(offset, nonNullBitmapSize); offset += nonNullBitmapSize; } else { nonNullBitmapSidecarOffsets[i] = new SidecarOffset(-1, 0); } } var buffer = new ArrayPoolBuffer <byte>(offset + indirectData.Length); var data = buffer.Buffer; data.AsSpan().Clear(); var varlenData = data.AsSpan(offset); int currentDataOffset = 0; int currentVarlenOffset = 0; for (int columnIndex = 0; columnIndex < numColumns; columnIndex++) { var column = schema.GetColumn(columnIndex); var isFixedSize = column.IsFixedSize; var columnarSize = isFixedSize ? column.Size : 4; var rowwiseSize = column.Size; var dataOffset = dataSidecarOffsets[columnIndex]; var nonNullOffset = nonNullBitmapSidecarOffsets[columnIndex].Start; var dataOutput = data.AsSpan(dataOffset.Start, dataOffset.Length); for (int rowIndex = 0; rowIndex < numRows; rowIndex++) { bool isSet = true; var rowSlice = rowData.Slice(rowSize * rowIndex, rowSize); if (nonNullOffset > 0) { isSet = !rowSlice.GetBit(nonNullBitmapOffset, columnIndex); if (isSet) { data.SetBit(nonNullOffset, rowIndex); } } if (isSet) { if (isFixedSize) { var rawData = rowSlice.Slice(currentDataOffset, columnarSize); rawData.CopyTo(dataOutput); } else { var offsetData = rowSlice.Slice(currentDataOffset, 8); var lengthData = rowSlice.Slice(currentDataOffset + 8, 8); int start = (int)KuduEncoder.DecodeInt64(offsetData); int length = (int)KuduEncoder.DecodeInt64(lengthData); var indirectSlice = indirectData.Slice(start, length); indirectSlice.CopyTo(varlenData); varlenData = varlenData.Slice(length); KuduEncoder.EncodeInt32(dataOutput, currentVarlenOffset); currentVarlenOffset += length; } } dataOutput = dataOutput.Slice(columnarSize); } currentDataOffset += rowwiseSize; if (!isFixedSize) { KuduEncoder.EncodeInt32(dataOutput, currentVarlenOffset); varlenDataSidecarOffsets[columnIndex] = new SidecarOffset(offset, currentVarlenOffset); offset += currentVarlenOffset; currentVarlenOffset = 0; } } return(new ResultSet( buffer, schema, numRows, dataSidecarOffsets, varlenDataSidecarOffsets, nonNullBitmapSidecarOffsets)); }