Exemplo n.º 1
0
        private async Task StreamTableAsync(CancellationToken token,
                                            string executionId,
                                            TableSchema tableSchema,
                                            SerializationMode serializationMode,
                                            bool sendWithKey,
                                            int batchSize,
                                            int printPercentProgressMod)
        {
            string topicName = _kafkaTopicPrefix + tableSchema.TableName.ToLower();
            var    rowCount  = await _cdcReaderClient.GetRowCountAsync(tableSchema);

            Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} has {rowCount} rows to export");
            int progress = 0;

            using (var producer = ProducerFactory.GetProducer(topicName, tableSchema, serializationMode, sendWithKey, _kafkaBootstrapServers, _schemaRegistryUrl))
            {
                long            ctr = 0;
                PrimaryKeyValue lastRetrievedKey = null;
                var             existingOffset   = await _cdcReaderClient.GetLastFullLoadOffsetAsync(executionId, tableSchema.TableName);

                if (existingOffset.Result == CdcReader.State.Result.NoStoredState)
                {
                    Console.WriteLine($"Table {tableSchema.TableName} - No previous stored offset. Starting from first row");
                    var firstBatch = await _cdcReaderClient.GetFirstBatchAsync(tableSchema, batchSize);

                    ctr = await PublishAsync(producer, token, firstBatch, ctr);

                    lastRetrievedKey = firstBatch.LastRowKey;
                    await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, firstBatch.LastRowKey);
                }
                else
                {
                    Console.WriteLine($"Table {tableSchema.TableName} - No data to export");
                    lastRetrievedKey = existingOffset.State;
                }

                bool finished = false;

                while (!token.IsCancellationRequested && !finished)
                {
                    var changes = new List <RowChange>();

                    var batch = await _cdcReaderClient.GetBatchAsync(tableSchema, lastRetrievedKey, batchSize);

                    ctr = await PublishAsync(producer, token, batch, ctr);

                    int latestProgress = (int)(((double)ctr / (double)rowCount) * 100);
                    if (progress != latestProgress && latestProgress % printPercentProgressMod == 0)
                    {
                        Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - Progress at {latestProgress}% ({ctr} records)");
                    }

                    progress         = latestProgress;
                    lastRetrievedKey = batch.LastRowKey;
                    await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, lastRetrievedKey);

                    if (!batch.Records.Any() || batch.Records.Count < batchSize)
                    {
                        finished = true;
                    }
                }

                if (token.IsCancellationRequested)
                {
                    Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - cancelled at progress at {progress}% ({ctr} records)");
                }
                else
                {
                    Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - complete ({ctr} records)");
                }
            }
        }
Exemplo n.º 2
0
        private async Task ExportTableAsync(CancellationToken token,
                                            string executionId,
                                            TableSchema tableSchema,
                                            int batchSize,
                                            int printPercentProgressMod)
        {
            var rowCount = await _cdcReaderClient.GetRowCountAsync(tableSchema);

            Console.WriteLine($"Table {tableSchema.TableName} - {rowCount} rows to export");
            int progress = 0;

            PrimaryKeyValue lastRetrievedKey = await SetStartingPosition(executionId, tableSchema, batchSize);

            long ctr      = batchSize;
            bool finished = false;

            while (!token.IsCancellationRequested && !finished)
            {
                var changes = new List <RowChange>();

                var batch = await _cdcReaderClient.GetBatchAsync(tableSchema, lastRetrievedKey, batchSize);

                var result = await WriteToRedshiftAsync(batch, ctr);

                if (result.Item1)
                {
                    ctr = result.Item2;
                    int latestProgress = (int)(((double)ctr / (double)rowCount) * 100);
                    if (progress != latestProgress && latestProgress % printPercentProgressMod == 0)
                    {
                        Console.WriteLine($"Table {tableSchema.TableName} - Progress at {latestProgress}% ({ctr} records)");
                    }

                    progress         = latestProgress;
                    lastRetrievedKey = batch.LastRowKey;
                    if (batch.Records.Any())
                    {
                        await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, lastRetrievedKey);
                    }

                    if (!batch.Records.Any() || batch.Records.Count < batchSize)
                    {
                        finished = true;
                    }
                }
                else
                {
                    Console.WriteLine($"Table {tableSchema.TableName} - Failed to upload to Redshift. Will try again in 10 seconds.");
                    await WaitForSeconds(token, 10);
                }
            }

            if (token.IsCancellationRequested)
            {
                Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - cancelled at progress at {progress}% ({ctr} records)");
            }
            else
            {
                Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - complete ({ctr} records)");
            }
        }