private async Task <PrimaryKeyValue> SetStartingPosition(string executionId, TableSchema tableSchema, int batchSize) { PrimaryKeyValue lastRetrievedKey = null; long ctr = 0; var existingOffsetResult = await _cdcReaderClient.GetLastFullLoadOffsetAsync(executionId, tableSchema.TableName); if (existingOffsetResult.Result == Result.NoStoredState) { Console.WriteLine($"Table {tableSchema.TableName} - No previous stored offset. Starting from first row"); var firstBatch = await _cdcReaderClient.GetFirstBatchAsync(tableSchema, batchSize); if (firstBatch.Records.Any()) { lastRetrievedKey = firstBatch.LastRowKey; var result = await WriteToRedshiftAsync(firstBatch, ctr); if (!result.Item1) { Console.WriteLine($"Table {tableSchema.TableName} - Export aborted"); return(null); } await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, lastRetrievedKey); ctr = result.Item2; Console.WriteLine($"Table {tableSchema.TableName} - Written first batch to Redshift"); } else { Console.WriteLine($"Table {tableSchema.TableName} - No data to export"); return(null); } } else { Console.WriteLine($"Table {tableSchema.TableName} - Starting from stored offset"); lastRetrievedKey = existingOffsetResult.State; } return(lastRetrievedKey); }
private async Task StreamTableAsync(CancellationToken token, string executionId, TableSchema tableSchema, SerializationMode serializationMode, bool sendWithKey, int batchSize, int printPercentProgressMod) { string topicName = _kafkaTopicPrefix + tableSchema.TableName.ToLower(); var rowCount = await _cdcReaderClient.GetRowCountAsync(tableSchema); Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} has {rowCount} rows to export"); int progress = 0; using (var producer = ProducerFactory.GetProducer(topicName, tableSchema, serializationMode, sendWithKey, _kafkaBootstrapServers, _schemaRegistryUrl)) { long ctr = 0; PrimaryKeyValue lastRetrievedKey = null; var existingOffset = await _cdcReaderClient.GetLastFullLoadOffsetAsync(executionId, tableSchema.TableName); if (existingOffset.Result == CdcReader.State.Result.NoStoredState) { Console.WriteLine($"Table {tableSchema.TableName} - No previous stored offset. Starting from first row"); var firstBatch = await _cdcReaderClient.GetFirstBatchAsync(tableSchema, batchSize); ctr = await PublishAsync(producer, token, firstBatch, ctr); lastRetrievedKey = firstBatch.LastRowKey; await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, firstBatch.LastRowKey); } else { Console.WriteLine($"Table {tableSchema.TableName} - No data to export"); lastRetrievedKey = existingOffset.State; } bool finished = false; while (!token.IsCancellationRequested && !finished) { var changes = new List <RowChange>(); var batch = await _cdcReaderClient.GetBatchAsync(tableSchema, lastRetrievedKey, batchSize); ctr = await PublishAsync(producer, token, batch, ctr); int latestProgress = (int)(((double)ctr / (double)rowCount) * 100); if (progress != latestProgress && latestProgress % printPercentProgressMod == 0) { Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - Progress at {latestProgress}% ({ctr} records)"); } progress = latestProgress; lastRetrievedKey = batch.LastRowKey; await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, lastRetrievedKey); if (!batch.Records.Any() || batch.Records.Count < batchSize) { finished = true; } } if (token.IsCancellationRequested) { Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - cancelled at progress at {progress}% ({ctr} records)"); } else { Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - complete ({ctr} records)"); } } }