private static async Task <long> CountScanTokenRowsAsync( KuduClient client, List <KuduScanToken> tokens) { var tasks = new List <Task <long> >(); foreach (var token in tokens) { var task = Task.Run(async() => { var tokenBytes = token.Serialize(); var scanBuilder = await client.NewScanBuilderFromTokenAsync(tokenBytes); var scanner = scanBuilder.Build(); return(await scanner.CountAsync()); }); tasks.Add(task); } var results = await Task.WhenAll(tasks); var rowCount = results.Sum(); return(rowCount); }
public async Task TestScanTokensConcurrentAlterTable() { var builder = new TableBuilder(_tableName) .SetNumReplicas(1) .AddColumn("key", KuduType.Int64, opt => opt.Key(true)) .AddColumn("a", KuduType.Int64); var table = await _client.CreateTableAsync(builder); var tokenBuilder = _client.NewScanTokenBuilder(table); var tokens = await tokenBuilder.IncludeTableMetadata(false).BuildAsync(); var tokensWithMetadata = await tokenBuilder.IncludeTableMetadata(true).BuildAsync(); var token = Assert.Single(tokens); var tokenWithMetadata = Assert.Single(tokensWithMetadata); // Drop a column await _client.AlterTableAsync(new AlterTableBuilder(table) .DropColumn("a")); await Assert.ThrowsAsync <KeyNotFoundException>( async() => await _client.NewScanBuilderFromTokenAsync(token)); var exception = await Assert.ThrowsAsync <NonRecoverableException>(async() => { var scanBuilder = await _client.NewScanBuilderFromTokenAsync(tokenWithMetadata); await scanBuilder.Build().CountAsync(); }); Assert.Contains("Some columns are not present in the current schema: a", exception.Message); // Add a column with the same name, type, and nullability. It will have a // different id-- it's a different column-- so the scan token will fail. await _client.AlterTableAsync(new AlterTableBuilder(table) .AddColumn("a", KuduType.Int64, opt => opt.DefaultValue(0L))); table = await _client.OpenTableAsync(_tableName); await Assert.ThrowsAsync <KeyNotFoundException>( async() => await _client.NewScanBuilderFromTokenAsync(token)); }
public async Task TestFaultTolerantScannerRestartAfterSecondScanRequest() { // In fact, the test has TABLET_COUNT, default is 3. // We check the rows' order, no dup rows and loss rows. // And In the case, we need 2 times or more scan requests, // so set a minimum batchSizeBytes 1. var tokenBuilder = _client.NewScanTokenBuilder(_table) .SetBatchSizeBytes(1) .SetFaultTolerant(true) .SetProjectedColumns(0); var tokens = await tokenBuilder.BuildAsync(); Assert.Equal(_numTablets, tokens.Count); var tabletScannerTasks = tokens .Select((token, i) => ScanTokenAsync(token, i == 0)); var results = await Task.WhenAll(tabletScannerTasks); var rowCount = results.Sum(); Assert.Equal(_numRows, rowCount); async Task <int> ScanTokenAsync(KuduScanToken token, bool enableFaultInjection) { int rowCount = 0; int previousRow = int.MinValue; bool faultInjected = !enableFaultInjection; int faultInjectionLowBound = (_numRows / _numTablets / 2); bool firstScanRequest = true; long firstScannedMetric = 0; long firstPropagatedTimestamp = 0; long lastScannedMetric = 0; long lastPropagatedTimestamp = 0; var scanBuilder = await _client.NewScanBuilderFromTokenAsync(token); var scanner = scanBuilder.Build(); await using var scanEnumerator = scanner.GetAsyncEnumerator(); while (await scanEnumerator.MoveNextAsync()) { foreach (var row in scanEnumerator.Current) { int key = row.GetInt32(0); if (previousRow >= key) { throw new Exception( $"Impossible results, previousKey: {previousRow} >= currentKey: {key}"); } if (!faultInjected && rowCount > faultInjectionLowBound) { await _harness.RestartTabletServerAsync(scanEnumerator.Tablet); faultInjected = true; } else { if (firstScanRequest) { firstScannedMetric = scanEnumerator.ResourceMetrics.TotalDurationNanos; firstPropagatedTimestamp = _client.LastPropagatedTimestamp; firstScanRequest = false; } lastScannedMetric = scanEnumerator.ResourceMetrics.TotalDurationNanos; lastPropagatedTimestamp = _client.LastPropagatedTimestamp; } previousRow = key; rowCount++; } } Assert.NotEqual(lastScannedMetric, firstScannedMetric); Assert.True(lastPropagatedTimestamp > firstPropagatedTimestamp, $"Expected {lastPropagatedTimestamp} > {firstPropagatedTimestamp}"); return(rowCount); } }