/// <summary> /// <para> /// Add a range partition partition to the table with a lower bound and upper /// bound. /// </para> /// /// <para> /// If either row is empty, then that end of the range will be unbounded. If a /// range column is missing a value, the logical minimum value for that column /// type will be used as the default. /// </para> /// /// <para> /// Multiple range bounds may be added, but they must not overlap. All split /// rows must fall in one of the range bounds. The lower bound must be less /// than or equal to the upper bound. /// </para> /// /// <para> /// If not provided, the table's range will be unbounded. /// </para> /// </summary> /// <param name="configure"> /// Delegate to configure the lower bound and the upper bound (in that order). /// </param> /// <param name="lowerBoundType">The type of the lower bound.</param> /// <param name="upperBoundType">The type of the upper bound.</param> public TableBuilder AddRangePartition( Action <PartialRowOperation, PartialRowOperation> configure, RangePartitionBound lowerBoundType, RangePartitionBound upperBoundType) { // TODO: Rework this var columns = _createTableRequest.Schema.Columns .Select(c => ColumnSchema.FromProtobuf(c)) .ToList(); var lowerRowOp = lowerBoundType == RangePartitionBound.Inclusive ? RowOperation.RangeLowerBound : RowOperation.ExclusiveRangeLowerBound; var upperRowOp = upperBoundType == RangePartitionBound.Exclusive ? RowOperation.RangeUpperBound : RowOperation.InclusiveRangeUpperBound; var schema = new KuduSchema(columns); var lowerBoundRow = new PartialRowOperation(schema, lowerRowOp); var upperBoundRow = new PartialRowOperation(schema, upperRowOp); configure(lowerBoundRow, upperBoundRow); _splitRowsRangeBounds.Add(lowerBoundRow); _splitRowsRangeBounds.Add(upperBoundRow); return(this); }
public ColumnSchema( string name, KuduType type, bool isKey = false, bool isNullable = false, object?defaultValue = null, int desiredBlockSize = 0, EncodingType encoding = EncodingType.AutoEncoding, CompressionType compression = CompressionType.DefaultCompression, ColumnTypeAttributes?typeAttributes = null, string?comment = null) { Name = name; Type = type; IsKey = isKey; IsNullable = isNullable; DefaultValue = defaultValue; DesiredBlockSize = desiredBlockSize; Encoding = encoding; Compression = compression; TypeAttributes = typeAttributes; Comment = comment; Size = KuduSchema.GetTypeSize(type); IsSigned = KuduSchema.IsSigned(type); IsFixedSize = IsTypeFixedSize(type); }
public PartitionSchema( RangeSchema rangeSchema, List <HashBucketSchema> hashBucketSchemas, KuduSchema schema) { RangeSchema = rangeSchema; HashBucketSchemas = hashBucketSchemas; bool isSimple = hashBucketSchemas.Count == 0 && rangeSchema.ColumnIds.Count == schema.PrimaryKeyColumnCount; if (isSimple) { int i = 0; foreach (int id in rangeSchema.ColumnIds) { if (schema.GetColumnIndex(id) != i++) { isSimple = false; break; } } } IsSimpleRangePartitioning = isSimple; }
private PartialRow GetPartialRowWithAllTypes(bool isNullable = false) { var schema = new KuduSchema(new List <ColumnSchema> { new ColumnSchema("int8", KuduType.Int8, false, isNullable), new ColumnSchema("int16", KuduType.Int16, false, isNullable), new ColumnSchema("int32", KuduType.Int32, false, isNullable), new ColumnSchema("int64", KuduType.Int64, false, isNullable), new ColumnSchema("string", KuduType.String, false, isNullable), new ColumnSchema("bool", KuduType.Bool, false, isNullable), new ColumnSchema("float", KuduType.Float, false, isNullable), new ColumnSchema("double", KuduType.Double, false, isNullable), new ColumnSchema("binary", KuduType.Binary, false, isNullable), new ColumnSchema("timestamp", KuduType.UnixtimeMicros, false, isNullable), new ColumnSchema("date", KuduType.Date, false, isNullable), new ColumnSchema("decimal32", KuduType.Decimal32, false, isNullable, typeAttributes: new ColumnTypeAttributes(5, 3, null)), new ColumnSchema("decimal64", KuduType.Decimal64, false, isNullable, typeAttributes: new ColumnTypeAttributes(5, 3, null)), new ColumnSchema("decimal128", KuduType.Decimal128, false, isNullable, typeAttributes: new ColumnTypeAttributes(5, 3, null)), new ColumnSchema("varchar", KuduType.Varchar, false, isNullable, typeAttributes: new ColumnTypeAttributes(null, null, 10)) }); return(new PartialRow(schema)); }
public KuduScanEnumerator( ILogger logger, KuduClient client, KuduTable table, List <ColumnSchemaPB> projectedColumnsPb, KuduSchema projectionSchema, OrderMode orderMode, ReadMode readMode, ReplicaSelection replicaSelection, bool isFaultTolerant, Dictionary <string, KuduPredicate> predicates, long limit, bool cacheBlocks, byte[] startPrimaryKey, byte[] endPrimaryKey, long startTimestamp, long htTimestamp, int batchSizeBytes, PartitionPruner partitionPruner, CancellationToken cancellationToken) { _logger = logger; _client = client; _table = table; _partitionPruner = partitionPruner; _orderMode = orderMode; _readMode = readMode; _columns = projectedColumnsPb; _schema = projectionSchema; _predicates = predicates; _replicaSelection = replicaSelection; _isFaultTolerant = isFaultTolerant; _limit = limit; _cacheBlocks = cacheBlocks; _startPrimaryKey = startPrimaryKey ?? Array.Empty <byte>(); _endPrimaryKey = endPrimaryKey ?? Array.Empty <byte>(); _startTimestamp = startTimestamp; SnapshotTimestamp = htTimestamp; _batchSizeBytes = batchSizeBytes; _scannerId = ByteString.Empty; _lastPrimaryKey = ByteString.Empty; _cancellationToken = cancellationToken; ResourceMetrics = new ResourceMetrics(); // If the partition pruner has pruned all partitions, then the scan can be // short circuited without contacting any tablet servers. if (!_partitionPruner.HasMorePartitionKeyRanges) { _closed = true; } // For READ_YOUR_WRITES scan mode, get the latest observed timestamp // and store it. Always use this one as the propagated timestamp for // the duration of the scan to avoid unnecessary wait. if (readMode == ReadMode.ReadYourWrites) { _lowerBoundPropagationTimestamp = client.LastPropagatedTimestamp; } }
public KuduTable(GetTableSchemaResponsePB schemaPb) { Schema = new KuduSchema(schemaPb.Schema); SchemaPbNoIds = CreateWithNoColumnIds(schemaPb); SchemaPb = schemaPb; PartitionSchema = ProtobufHelper.CreatePartitionSchema( schemaPb.PartitionSchema, Schema); TableId = schemaPb.TableId.ToStringUtf8(); ExtraConfig = schemaPb.ExtraConfigs; }
private static ResultSet CreateEmptyResultSet(KuduSchema schema, long numRows) { return(new ResultSet( null, schema, numRows, Array.Empty <SidecarOffset>(), Array.Empty <SidecarOffset>(), Array.Empty <SidecarOffset>())); }
public async Task InitializeAsync() { _harness = await new MiniKuduClusterBuilder().BuildHarnessAsync(); _client = _harness.CreateClient(); await using var session = _client.NewSession(); // Create a 4-tablets table for scanning. var builder = new TableBuilder(_tableName) .AddColumn("key1", KuduType.String, opt => opt.Key(true)) .AddColumn("key2", KuduType.String, opt => opt.Key(true)) .AddColumn("val", KuduType.String) .SetRangePartitionColumns("key1", "key2"); for (int i = 1; i < 4; i++) { builder.AddSplitRow(splitRow => { splitRow.SetString("key1", i.ToString()); splitRow.SetString("key2", ""); }); } var table = await _client.CreateTableAsync(builder); // The data layout ends up like this: // tablet '', '1': no rows // tablet '1', '2': '111', '122', '133' // tablet '2', '3': '211', '222', '233' // tablet '3', '': '311', '322', '333' var keys = new[] { "1", "2", "3" }; foreach (var key1 in keys) { foreach (var key2 in keys) { var insert = table.NewInsert(); insert.SetString(0, key1); insert.SetString(1, key2); insert.SetString(2, key2); await session.EnqueueAsync(insert); await session.FlushAsync(); } } _beforeWriteTimestamp = _client.LastPropagatedTimestamp; // Reset the client in order to clear the propagated timestamp. _newClient = _harness.CreateClient(); // Reopen the table using the new client. _table = await _newClient.OpenTableAsync(_tableName); _schema = _table.Schema; }
/// <summary> /// Creates a new partial row by deep-copying the data-fields of the /// provided partial row. /// </summary> /// <param name="row">The partial row to copy.</param> internal PartialRow(PartialRow row) { Schema = row.Schema; _rowAlloc = CloneArray(row._rowAlloc); _headerSize = row._headerSize; _nullOffset = row._nullOffset; _varLengthData = new byte[row._varLengthData.Length][]; for (int i = 0; i < _varLengthData.Length; i++) { _varLengthData[i] = CloneArray(row._varLengthData[i]); } }
public static ResultSet Create( KuduSchema scanSchema, ScanResponsePB scanResponse, KuduMessage message) { if (scanResponse.ColumnarData is not null) { return(CreateResultSet(message, scanSchema, scanResponse.ColumnarData)); } return(CreateResultSet(message, scanSchema, scanResponse.Data)); }
private static KuduSchema GenerateProjectionSchema( KuduSchema schema, List <string>?projectedColumnNames, List <int>?projectedColumnIndexes, bool includeDeletedColumn) { var numColumns = projectedColumnNames?.Count ?? projectedColumnIndexes?.Count ?? schema.Columns.Count; if (includeDeletedColumn) { numColumns++; } // Map the column names to actual columns in the table schema. // If the user set this to 'null', we scan all columns. var columns = new List <ColumnSchema>(numColumns); if (projectedColumnNames is not null) { foreach (string columnName in projectedColumnNames) { var columnSchema = schema.GetColumn(columnName); columns.Add(columnSchema); } } else if (projectedColumnIndexes is not null) { foreach (int columnIndex in projectedColumnIndexes) { var columnSchema = schema.GetColumn(columnIndex); columns.Add(columnSchema); } } else { columns.AddRange(schema.Columns); } int isDeletedIndex = -1; if (includeDeletedColumn) { var deletedColumn = GenerateIsDeletedColumn(schema); columns.Add(deletedColumn); isDeletedIndex = columns.Count - 1; } return(new KuduSchema(columns, isDeletedIndex)); }
/// <summary> /// Add a range partition split. The split row must fall in a range partition, /// and causes the range partition to split into two contiguous range partitions. /// </summary> /// <param name="configure">A delegate to configure the split row.</param> public TableBuilder AddSplitRow(Action <PartialRowOperation> configure) { // TODO: Rework this var columns = _createTableRequest.Schema.Columns .Select(c => ColumnSchema.FromProtobuf(c)) .ToList(); var schema = new KuduSchema(columns); var splitRow = new PartialRowOperation(schema, RowOperation.SplitRow); configure(splitRow); _splitRowsRangeBounds.Add(splitRow); return(this); }
/// <summary> /// Generates and returns a ColumnSchema for the virtual IS_DELETED column. /// The column name is generated to ensure there is never a collision. /// </summary> /// <param name="schema">The table schema.</param> private static ColumnSchema GenerateIsDeletedColumn(KuduSchema schema) { var columnName = "is_deleted"; // If the column already exists and we need to pick an alternate column name. while (schema.HasColumn(columnName)) { columnName += "_"; } return(new ColumnSchema( columnName, KuduType.Bool, isKey: false, isNullable: false, defaultValue: false)); }
private static ResultSet CreateResultSet( KuduMessage message, KuduSchema schema, RowwiseRowBlockPB data) { if (data is null) { return(CreateEmptyResultSet(schema, 0)); } if (!data.HasRowsSidecar || schema.Columns.Count == 0 || data.NumRows == 0) { // Empty projection, usually used for quick row counting. return(CreateEmptyResultSet(schema, data.NumRows)); } return(RowwiseResultSetConverter.Convert(message, schema, data)); }
private static List <ColumnSchemaPB> ToColumnSchemaPbs(KuduSchema schema) { var columnSchemas = schema.Columns; var deletedColumn = schema.HasIsDeleted ? schema.GetColumn(schema.IsDeletedIndex) : null; var columnSchemaPbs = new List <ColumnSchemaPB>(columnSchemas.Count); foreach (var columnSchema in columnSchemas) { var isDeleted = columnSchema == deletedColumn; var columnSchemaPb = ToColumnSchemaPb(columnSchema, isDeleted); columnSchemaPbs.Add(columnSchemaPb); } return(columnSchemaPbs); }
public PartialRow(KuduSchema schema) { Schema = schema; var columnBitmapSize = KuduEncoder.BitsToBytes(schema.Columns.Count); var headerSize = columnBitmapSize; if (schema.HasNullableColumns) { // nullsBitSet is the same size as the columnBitSet. // Bits for non-nullable columns are ignored. headerSize += columnBitmapSize; _nullOffset = columnBitmapSize; } _rowAlloc = new byte[headerSize + schema.RowAllocSize]; _headerSize = headerSize; _varLengthData = new byte[schema.VarLengthColumnCount][]; }
private static IReadOnlyList <int> ComputeProjectedColumnIndexes( ScanTokenPB message, KuduSchema schema) { if (message.ProjectedColumnIdx.Count != 0) { return(message.ProjectedColumnIdx); } var columns = new List <int>(message.ProjectedColumns.Count); foreach (var colSchemaFromPb in message.ProjectedColumns) { int colIdx = colSchemaFromPb.HasId && schema.HasColumnIds ? schema.GetColumnIndex((int)colSchemaFromPb.Id) : schema.GetColumnIndex(colSchemaFromPb.Name); var colSchema = schema.GetColumn(colIdx); if (colSchemaFromPb.Type != (DataType)colSchema.Type) { throw new Exception($"Invalid type {colSchemaFromPb.Type} " + $"for column '{colSchemaFromPb.Name}' in scan token, " + $"expected: {colSchema.Type}"); } if (colSchemaFromPb.IsNullable != colSchema.IsNullable) { throw new Exception($"Invalid nullability for column '{colSchemaFromPb.Name}' " + $"in scan token, expected: {(colSchema.IsNullable ? "NULLABLE" : "NOT NULL")}"); } columns.Add(colIdx); } return(columns); }
private static ResultSet CreateResultSet( KuduMessage message, KuduSchema schema, ColumnarRowBlockPB data) { var columns = data.Columns; var numColumns = columns.Count; if (numColumns != schema.Columns.Count) { ThrowColumnCountMismatchException(schema.Columns.Count, numColumns); } if (data.Columns.Count == 0 || data.NumRows == 0) { // Empty projection, usually used for quick row counting. return(CreateEmptyResultSet(schema, data.NumRows)); } var numRows = checked ((int)data.NumRows); var bufferLength = message.Buffer.Length; var nonNullBitmapLength = KuduEncoder.BitsToBytes(numRows); var dataSidecarOffsets = new SidecarOffset[numColumns]; var varlenDataSidecarOffsets = new SidecarOffset[numColumns]; var nonNullBitmapSidecarOffsets = new SidecarOffset[numColumns]; for (int i = 0; i < numColumns; i++) { var column = columns[i]; var columnSchema = schema.GetColumn(i); if (column.HasDataSidecar) { var offset = message.GetSidecarOffset(column.DataSidecar); var length = GetColumnDataSize(columnSchema, numRows); ValidateSidecar(offset, length, bufferLength); dataSidecarOffsets[i] = offset; } else { ThrowMissingDataSidecarException(columnSchema); } if (column.HasVarlenDataSidecar) { var offset = message.GetSidecarOffset(column.VarlenDataSidecar); varlenDataSidecarOffsets[i] = offset; } if (column.HasNonNullBitmapSidecar) { var offset = message.GetSidecarOffset(column.NonNullBitmapSidecar); ValidateSidecar(offset, nonNullBitmapLength, bufferLength); nonNullBitmapSidecarOffsets[i] = offset; } else { nonNullBitmapSidecarOffsets[i] = new SidecarOffset(-1, 0); } } var buffer = message.TakeMemory(); return(new ResultSet( buffer, schema, data.NumRows, dataSidecarOffsets, varlenDataSidecarOffsets, nonNullBitmapSidecarOffsets)); }
// Used to convert the rowwise data to the newer columnar format, // to avoid virtual calls on ResultSet. // This is only used if the Kudu server is 1.11 or older. public static ResultSet Convert( KuduMessage message, KuduSchema schema, RowwiseRowBlockPB rowPb) { var numColumns = schema.Columns.Count; int columnOffsetsSize = numColumns; if (schema.HasNullableColumns) { columnOffsetsSize++; } var columnOffsets = new int[columnOffsetsSize]; int currentOffset = 0; columnOffsets[0] = currentOffset; // Pre-compute the columns offsets in rowData for easier lookups later. // If the schema has nullables, we also add the offset for the null bitmap at the end. for (int i = 1; i < columnOffsetsSize; i++) { ColumnSchema column = schema.GetColumn(i - 1); int previousSize = column.Size; columnOffsets[i] = previousSize + currentOffset; currentOffset += previousSize; } var rowData = GetRowData(message, rowPb); var indirectData = GetIndirectData(message, rowPb); int nonNullBitmapOffset = columnOffsets[columnOffsets.Length - 1]; int rowSize = schema.RowSize; int numRows = rowPb.NumRows; var dataSidecarOffsets = new SidecarOffset[numColumns]; var varlenDataSidecarOffsets = new SidecarOffset[numColumns]; var nonNullBitmapSidecarOffsets = new SidecarOffset[numColumns]; int nonNullBitmapSize = KuduEncoder.BitsToBytes(numRows); int offset = 0; for (int i = 0; i < numColumns; i++) { var column = schema.GetColumn(i); var dataSize = column.IsFixedSize ? column.Size * numRows : (4 * numRows) + 4; dataSidecarOffsets[i] = new SidecarOffset(offset, dataSize); offset += dataSize; if (column.IsNullable) { nonNullBitmapSidecarOffsets[i] = new SidecarOffset(offset, nonNullBitmapSize); offset += nonNullBitmapSize; } else { nonNullBitmapSidecarOffsets[i] = new SidecarOffset(-1, 0); } } var buffer = new ArrayPoolBuffer <byte>(offset + indirectData.Length); var data = buffer.Buffer; data.AsSpan().Clear(); var varlenData = data.AsSpan(offset); int currentDataOffset = 0; int currentVarlenOffset = 0; for (int columnIndex = 0; columnIndex < numColumns; columnIndex++) { var column = schema.GetColumn(columnIndex); var isFixedSize = column.IsFixedSize; var columnarSize = isFixedSize ? column.Size : 4; var rowwiseSize = column.Size; var dataOffset = dataSidecarOffsets[columnIndex]; var nonNullOffset = nonNullBitmapSidecarOffsets[columnIndex].Start; var dataOutput = data.AsSpan(dataOffset.Start, dataOffset.Length); for (int rowIndex = 0; rowIndex < numRows; rowIndex++) { bool isSet = true; var rowSlice = rowData.Slice(rowSize * rowIndex, rowSize); if (nonNullOffset > 0) { isSet = !rowSlice.GetBit(nonNullBitmapOffset, columnIndex); if (isSet) { data.SetBit(nonNullOffset, rowIndex); } } if (isSet) { if (isFixedSize) { var rawData = rowSlice.Slice(currentDataOffset, columnarSize); rawData.CopyTo(dataOutput); } else { var offsetData = rowSlice.Slice(currentDataOffset, 8); var lengthData = rowSlice.Slice(currentDataOffset + 8, 8); int start = (int)KuduEncoder.DecodeInt64(offsetData); int length = (int)KuduEncoder.DecodeInt64(lengthData); var indirectSlice = indirectData.Slice(start, length); indirectSlice.CopyTo(varlenData); varlenData = varlenData.Slice(length); KuduEncoder.EncodeInt32(dataOutput, currentVarlenOffset); currentVarlenOffset += length; } } dataOutput = dataOutput.Slice(columnarSize); } currentDataOffset += rowwiseSize; if (!isFixedSize) { KuduEncoder.EncodeInt32(dataOutput, currentVarlenOffset); varlenDataSidecarOffsets[columnIndex] = new SidecarOffset(offset, currentVarlenOffset); offset += currentVarlenOffset; currentVarlenOffset = 0; } } return(new ResultSet( buffer, schema, numRows, dataSidecarOffsets, varlenDataSidecarOffsets, nonNullBitmapSidecarOffsets)); }
public async Task TestAlterRangePartitioning() { KuduTable table = await CreateTableAsync(); KuduSchema schema = table.Schema; // Insert some rows, and then drop the partition and ensure that the table is empty. await InsertRowsAsync(table, 0, 100); Assert.Equal(100, await ClientTestUtil.CountRowsAsync(_client, table)); await _client.AlterTableAsync(new AlterTableBuilder(table) .DropRangePartition((lower, upper) => { })); Assert.Equal(0, await ClientTestUtil.CountRowsAsync(_client, table)); // Add new range partition and insert rows. await _client.AlterTableAsync(new AlterTableBuilder(table) .AddRangePartition((lower, upper) => { lower.SetInt32("c0", 0); upper.SetInt32("c0", 100); })); await InsertRowsAsync(table, 0, 100); Assert.Equal(100, await ClientTestUtil.CountRowsAsync(_client, table)); // Replace the range partition with a different one. await _client.AlterTableAsync(new AlterTableBuilder(table) .DropRangePartition((lower, upper) => { lower.SetInt32("c0", 0); upper.SetInt32("c0", 100); }) .AddRangePartition((lower, upper) => { lower.SetInt32("c0", 50); upper.SetInt32("c0", 150); })); Assert.Equal(0, await ClientTestUtil.CountRowsAsync(_client, table)); await InsertRowsAsync(table, 50, 125); Assert.Equal(75, await ClientTestUtil.CountRowsAsync(_client, table)); // Replace the range partition with the same one. await _client.AlterTableAsync(new AlterTableBuilder(table) .DropRangePartition((lower, upper) => { lower.SetInt32("c0", 50); upper.SetInt32("c0", 150); }) .AddRangePartition((lower, upper) => { lower.SetInt32("c0", 50); upper.SetInt32("c0", 150); })); Assert.Equal(0, await ClientTestUtil.CountRowsAsync(_client, table)); await InsertRowsAsync(table, 50, 125); Assert.Equal(75, await ClientTestUtil.CountRowsAsync(_client, table)); // Alter table partitioning + alter table schema var newTableName = $"{_tableName}-renamed"; await _client.AlterTableAsync(new AlterTableBuilder(table) .AddRangePartition((lower, upper) => { lower.SetInt32("c0", 200); upper.SetInt32("c0", 300); }) .RenameTable(newTableName) .AddColumn("c2", KuduType.Int32)); await InsertRowsAsync(table, 200, 300); Assert.Equal(175, await ClientTestUtil.CountRowsAsync(_client, table)); Assert.Equal(3, (await _client.OpenTableAsync(newTableName)).Schema.Columns.Count); // Drop all range partitions + alter table schema. This also serves to test // specifying range bounds with a subset schema (since a column was // previously added). await _client.AlterTableAsync(new AlterTableBuilder(table) .DropRangePartition((lower, upper) => { lower.SetInt32("c0", 200); upper.SetInt32("c0", 300); }) .DropRangePartition((lower, upper) => { lower.SetInt32("c0", 50); upper.SetInt32("c0", 150); }) .DropColumn("c2")); Assert.Equal(0, await ClientTestUtil.CountRowsAsync(_client, table)); Assert.Equal(2, (await _client.OpenTableAsync(newTableName)).Schema.Columns.Count); }
public PartialRowOperation(KuduSchema schema, RowOperation operation) : base(schema) { Operation = operation; }