예제 #1
0
        private async Task <PrimaryKeyValue> SetStartingPosition(string executionId, TableSchema tableSchema, int batchSize)
        {
            PrimaryKeyValue lastRetrievedKey = null;
            long            ctr = 0;
            var             existingOffsetResult = await _cdcReaderClient.GetLastFullLoadOffsetAsync(executionId, tableSchema.TableName);

            if (existingOffsetResult.Result == Result.NoStoredState)
            {
                Console.WriteLine($"Table {tableSchema.TableName} - No previous stored offset. Starting from first row");
                var firstBatch = await _cdcReaderClient.GetFirstBatchAsync(tableSchema, batchSize);

                if (firstBatch.Records.Any())
                {
                    lastRetrievedKey = firstBatch.LastRowKey;
                    var result = await WriteToRedshiftAsync(firstBatch, ctr);

                    if (!result.Item1)
                    {
                        Console.WriteLine($"Table {tableSchema.TableName} - Export aborted");
                        return(null);
                    }

                    await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, lastRetrievedKey);

                    ctr = result.Item2;
                    Console.WriteLine($"Table {tableSchema.TableName} - Written first batch to Redshift");
                }
                else
                {
                    Console.WriteLine($"Table {tableSchema.TableName} - No data to export");
                    return(null);
                }
            }
            else
            {
                Console.WriteLine($"Table {tableSchema.TableName} - Starting from stored offset");
                lastRetrievedKey = existingOffsetResult.State;
            }

            return(lastRetrievedKey);
        }
예제 #2
0
        private async Task StreamTableAsync(CancellationToken token,
                                            string executionId,
                                            TableSchema tableSchema,
                                            SerializationMode serializationMode,
                                            bool sendWithKey,
                                            int batchSize,
                                            int printPercentProgressMod)
        {
            string topicName = _kafkaTopicPrefix + tableSchema.TableName.ToLower();
            var    rowCount  = await _cdcReaderClient.GetRowCountAsync(tableSchema);

            Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} has {rowCount} rows to export");
            int progress = 0;

            using (var producer = ProducerFactory.GetProducer(topicName, tableSchema, serializationMode, sendWithKey, _kafkaBootstrapServers, _schemaRegistryUrl))
            {
                long            ctr = 0;
                PrimaryKeyValue lastRetrievedKey = null;
                var             existingOffset   = await _cdcReaderClient.GetLastFullLoadOffsetAsync(executionId, tableSchema.TableName);

                if (existingOffset.Result == CdcReader.State.Result.NoStoredState)
                {
                    Console.WriteLine($"Table {tableSchema.TableName} - No previous stored offset. Starting from first row");
                    var firstBatch = await _cdcReaderClient.GetFirstBatchAsync(tableSchema, batchSize);

                    ctr = await PublishAsync(producer, token, firstBatch, ctr);

                    lastRetrievedKey = firstBatch.LastRowKey;
                    await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, firstBatch.LastRowKey);
                }
                else
                {
                    Console.WriteLine($"Table {tableSchema.TableName} - No data to export");
                    lastRetrievedKey = existingOffset.State;
                }

                bool finished = false;

                while (!token.IsCancellationRequested && !finished)
                {
                    var changes = new List <RowChange>();

                    var batch = await _cdcReaderClient.GetBatchAsync(tableSchema, lastRetrievedKey, batchSize);

                    ctr = await PublishAsync(producer, token, batch, ctr);

                    int latestProgress = (int)(((double)ctr / (double)rowCount) * 100);
                    if (progress != latestProgress && latestProgress % printPercentProgressMod == 0)
                    {
                        Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - Progress at {latestProgress}% ({ctr} records)");
                    }

                    progress         = latestProgress;
                    lastRetrievedKey = batch.LastRowKey;
                    await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, lastRetrievedKey);

                    if (!batch.Records.Any() || batch.Records.Count < batchSize)
                    {
                        finished = true;
                    }
                }

                if (token.IsCancellationRequested)
                {
                    Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - cancelled at progress at {progress}% ({ctr} records)");
                }
                else
                {
                    Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - complete ({ctr} records)");
                }
            }
        }