Exemple #1
0
        /// <inheritdoc />
        public Task <Result <CheckpointState> > GetCheckpointStateAsync(OperationContext context)
        {
            return(context.PerformOperationAsync(
                       Tracer,
                       async() =>
            {
                var(checkpoints, startCursor) = await ExecuteRedisFallbackAsync(context, async redisDb =>
                                                                                Result.Success(await redisDb.ExecuteBatchAsync(context, batch => batch.GetCheckpointsInfoAsync(_checkpointsKey, _clock.UtcNow), RedisOperation.GetCheckpoint)))
                                                .ThrowIfFailureAsync();

                var roleResult = await UpdateRoleAsync(context, release: false);
                if (!roleResult.Succeeded)
                {
                    return new ErrorResult(roleResult).AsResult <Result <CheckpointState> >();
                }

                _role = roleResult.Value;

                var maxCheckpoint = checkpoints.MaxByOrDefault(c => c.CheckpointCreationTime);
                if (maxCheckpoint == null)
                {
                    Tracer.Debug(context, $"Getting checkpoint state: Can't find a checkpoint: Start cursor time: {startCursor}");

                    // Add slack for start cursor to account for clock skew between event hub and redis
                    var epochStartCursor = startCursor - _configuration.EventStore.NewEpochEventStartCursorDelay;
                    return CheckpointState.CreateUnavailable(_role.Value, epochStartCursor);
                }

                Tracer.Debug(context, $"Getting checkpoint state: Found checkpoint '{maxCheckpoint}'");

                return Result.Success(new CheckpointState(_role.Value, new EventSequencePoint(maxCheckpoint.SequenceNumber), maxCheckpoint.CheckpointId, maxCheckpoint.CheckpointCreationTime));
            },
                       Counters[GlobalStoreCounters.GetCheckpointState]));
        }
Exemple #2
0
        public Task <BoolResult> RegisterCheckpointAsync(OperationContext context, CheckpointState checkpointState)
        {
            TriggerGarbageCollection(context);

            var msg = checkpointState.ToString();

            return(context.PerformOperationWithTimeoutAsync(
                       Tracer,
                       context =>
            {
                var blobName = GenerateBlobName();
                checkpointState.Consumers.TryAdd(_primaryMachineLocation);
                if (_configuration.WriteLegacyFormat)
                {
                    return _storage.WriteAsync(context, blobName, JsonSerializer.Serialize(checkpointState, _jsonSerializerOptions));
                }
                else
                {
                    return _storage.WriteAsync(context, blobName, checkpointState);
                }
            },
                       traceOperationStarted: false,
                       extraStartMessage: msg,
                       extraEndMessage: _ => msg,
                       timeout: _configuration.RegisterCheckpointTimeout));
        }
        /// <summary>
        /// Initializes a new instance of the BeginCheckpointLogRecord class.
        /// </summary>
        /// <remarks>Only used for generating invalid BeginCheckpointLogRecord.</remarks>
        private BeginCheckpointLogRecord()
        {
            this.IsFirstCheckpointOnFullCopy = false;
            this.progressVector = null;
            this.earliestPendingTransactionOffset = LogicalLogRecord.InvalidLogicalRecordOffset;
            this.earliestPendingTransaction       = BeginTransactionOperationLogRecord.InvalidBeginTransactionLogRecord;
            this.checkpointState = CheckpointState.Invalid;
            this.lastStableLsn   = LogicalSequenceNumber.InvalidLsn;
            this.epoch           = LogicalSequenceNumber.InvalidEpoch;

            // Initializes the backup information to invalid.
            this.highestBackedUpEpoch = new Epoch(
                LogicalSequenceNumber.InvalidLsn.LSN,
                LogicalSequenceNumber.InvalidLsn.LSN);
            this.highestBackedUpLsn = LogicalSequenceNumber.InvalidLsn;

            // Uint.MaxValue is used to indicate invalid. 4,294,967,295 log records, 4.294967295 TB.
            this.backupLogRecordCount = uint.MaxValue;
            this.backupLogSize        = uint.MaxValue;

            this.earliestPendingTransactionInvalidated = 0;

            this.lastPeriodicCheckpointTimeTicks = 0;
            this.lastPeriodicTruncationTimeTicks = 0;
        }
        internal BeginCheckpointLogRecord(LogRecordType recordType, ulong recordPosition, long lsn)
            : base(recordType, recordPosition, lsn)
        {
            Utility.Assert(
                recordType == LogRecordType.BeginCheckpoint,
                "Record type is expected to be begin checkpoint  but the record type is : {0}",
                recordType);
            this.IsFirstCheckpointOnFullCopy = false;
            this.progressVector = null;
            this.earliestPendingTransactionOffset = LogicalLogRecord.InvalidLogicalRecordOffset;
            this.earliestPendingTransaction       = BeginTransactionOperationLogRecord.InvalidBeginTransactionLogRecord;
            this.checkpointState = CheckpointState.Invalid;
            this.lastStableLsn   = LogicalSequenceNumber.InvalidLsn;
            this.epoch           = LogicalSequenceNumber.InvalidEpoch;

            // Initializes the backup information to invalid.
            this.backupId             = BackupLogRecord.InvalidBackupId;
            this.highestBackedUpEpoch = new Epoch(
                LogicalSequenceNumber.InvalidLsn.LSN,
                LogicalSequenceNumber.InvalidLsn.LSN);
            this.highestBackedUpLsn = LogicalSequenceNumber.InvalidLsn;

            // Uint.MaxValue is used to indicate invalid. 4,294,967,295 log records, 4.294967295 TB.
            this.backupLogRecordCount = uint.MaxValue;
            this.backupLogSize        = uint.MaxValue;

            this.earliestPendingTransactionInvalidated = 0;

            this.lastPeriodicCheckpointTimeTicks = 0;
            this.lastPeriodicTruncationTimeTicks = 0;
        }
        public void CanBackwardCompatJsonSerializeCheckpointState()
        {
            var test         = new CheckpointState(new EventSequencePoint(42), checkpointId: "TestCheckpointId", producer: new MachineLocation("This is a machine loc"));
            var serialized   = JsonSerializer.Serialize(test);
            var deserialized = JsonUtilities.JsonDeserialize <CheckpointState>(serialized);

            Assert.Equal(test.CheckpointId, deserialized.CheckpointId);
            Assert.Equal(test.Producer, deserialized.Producer);
        }
Exemple #6
0
        internal static void Assert(bool condition, string format, CheckpointState param1)
        {
            if (condition == false)
            {
                var failFastMessage = string.Format(System.Globalization.CultureInfo.InvariantCulture, format, param1);
                FailFast(failFastMessage);

                // AMW - Force break into debugger for ease of debugging
                Debugger.Break();
            }
        }
Exemple #7
0
 private void WriteLatestCheckpointToFile(OperationContext context, CheckpointState checkpointState)
 {
     try
     {
         _fileSystem.WriteAllText(_lastCheckpointFile, $"{checkpointState.CheckpointId},{checkpointState.CheckpointTime}");
     }
     catch (Exception e)
     {
         _tracer.Warning(context, $"Failed to write latest checkpoint state to disk: {e}");
     }
 }
Exemple #8
0
        public Task <Result <CheckpointState> > GetCheckpointStateAsync(OperationContext context)
        {
            // NOTE: this function is naturally retried by the heartbeat mechanisms in LLS
            return(context.PerformOperationWithTimeoutAsync(
                       Tracer,
                       async context =>
            {
                var blobs = ListBlobsRecentFirstAsync(context);

                await foreach (var blob in blobs)
                {
                    try
                    {
                        var checkpointState = await _storage.ReadAsync <CheckpointState>(context, blob).ThrowIfFailureAsync();
                        checkpointState.FileName = blob;

                        foreach (var consumer in checkpointState.Consumers)
                        {
                            _pushLocations.Add(consumer, _configuration.PushCheckpointCandidateExpiry);
                        }

                        return Result.Success(checkpointState);
                    }
                    catch (TaskCanceledException) when(context.Token.IsCancellationRequested)
                    {
                        // We hit timeout or a proper cancellation.
                        // Breaking from the loop instead of tracing error for each iteration.
                        break;
                    }
                    catch (Exception e)
                    {
                        Tracer.Error(context, e, $"Failed to obtain {nameof(CheckpointState)} from blob `{blob.Name}`. Skipping.");
                        continue;
                    }
                }

                // Add slack for start cursor to account for clock skew
                return CheckpointState.CreateUnavailable(_clock.UtcNow - _configuration.NewEpochEventStartCursorDelay);
            },
                       extraEndMessage: result =>
            {
                if (!result.Succeeded)
                {
                    return string.Empty;
                }

                var checkpointState = result.Value;
                return $"CheckpointId=[{checkpointState.CheckpointId}] SequencePoint=[{checkpointState.StartSequencePoint}]";
            },
                       timeout: _configuration.CheckpointStateTimeout));
        }
        public void CanJsonSerializeCheckpointState()
        {
            var test1 = new CheckpointState(new EventSequencePoint(42));

            TestJsonRoundtrip(test1);

            var test2 = new CheckpointState(new EventSequencePoint(42), checkpointId: "TestCheckpointId");

            TestJsonRoundtrip(test2);

            var test3 = new CheckpointState(new EventSequencePoint(42), checkpointId: "TestCheckpointId", producer: new MachineLocation("This is a machine loc"));

            TestJsonRoundtrip(test3);
        }
        /// <inheritdoc />
        public Task <Result <CheckpointState> > GetCheckpointStateAsync(OperationContext context)
        {
            return(context.PerformOperationAsync(
                       Tracer,
                       async() =>
            {
                var(checkpoints, startCursor) = await _checkpointsKey.UseNonConcurrentReplicatedHashAsync(
                    context,
                    Configuration.RetryWindow,
                    RedisOperation.GetCheckpoint,
                    (batch, key) => batch.GetCheckpointsInfoAsync(key, _clock.UtcNow),
                    timeout: Configuration.ClusterRedisOperationTimeout)
                                                .ThrowIfFailureAsync();

                var roleResult = await UpdateRoleAsync(context, release: false);
                if (!roleResult.Succeeded)
                {
                    return new ErrorResult(roleResult).AsResult <Result <CheckpointState> >();
                }

                _role = roleResult.Value;

                var masterName = await _masterLeaseKey.UseNonConcurrentReplicatedHashAsync(
                    context,
                    Configuration.RetryWindow,
                    RedisOperation.GetCheckpoint,
                    (batch, key) => batch.AddOperation("GetRole", b => b.HashGetAsync(key, "M#1.MachineName")),
                    timeout: Configuration.ClusterRedisOperationTimeout)
                                 .ThrowIfFailureAsync();

                var masterLocation = masterName.IsNull ? default(MachineLocation) : new MachineLocation((string)masterName);

                var maxCheckpoint = checkpoints.MaxByOrDefault(c => c.CheckpointCreationTime);
                if (maxCheckpoint == null)
                {
                    Tracer.Debug(context, $"Getting checkpoint state: Can't find a checkpoint: Start cursor time: {startCursor}");

                    // Add slack for start cursor to account for clock skew between event hub and redis
                    var epochStartCursor = startCursor - Configuration.EventStore.NewEpochEventStartCursorDelay;
                    return CheckpointState.CreateUnavailable(_role.Value, epochStartCursor, masterLocation);
                }

                Tracer.Debug(context, $"Getting checkpoint state: Found checkpoint '{maxCheckpoint}'");

                return Result.Success(new CheckpointState(_role.Value, new EventSequencePoint(maxCheckpoint.SequenceNumber), maxCheckpoint.CheckpointId, maxCheckpoint.CheckpointCreationTime, new MachineLocation(maxCheckpoint.MachineName), masterLocation));
            },
                       Counters[GlobalStoreCounters.GetCheckpointState]));
        }
        public void OnCheckpointCompleted(Exception err, CheckpointState state, bool isRecoveredCheckpoint)
        {
            if (!isRecoveredCheckpoint && periodicCheckpointTruncationState != PeriodicCheckpointTruncationState.CheckpointStarted)
            {
                // Checkpoint not initiated by config
                // Indicates regular checkpoint
                return;
            }

            if (err != null || state != CheckpointState.Completed)
            {
                // Checkpoint failed to complete successfully, reset periodic process to 'Ready'
                return;
            }

            periodicCheckpointTruncationState = PeriodicCheckpointTruncationState.CheckpointCompleted;
        }
        public Task ReconciliationOverRealStorage()
        {
            var checkpointsKey = Guid.NewGuid().ToString();
            // Copy and paste a real connection string here.
            var storageConnectionString = string.Empty;
            // Consider updating this directory if you want to keep data between invocations.
            var workingDirectory = TestRootDirectoryPath;
            var configuration    = new LocalDiskCentralStoreConfiguration(
                workingDirectory,
                checkpointsKey);
            var blobStoreConfiguration = new BlobCentralStoreConfiguration(
                credentials: new AzureBlobStorageCredentials(storageConnectionString),
                containerName: "checkpoints",
                checkpointsKey: checkpointsKey);
            var producerMachineLocation = new MachineLocation();

            ConfigureWithOneMaster(s =>
            {
                s.ReconcileMode          = ReconciliationMode.Once.ToString();
                s.AzureStorageSecretName = Host.StoreSecret("StorageName", storageConnectionString);
            });

            return(RunTestAsync(
                       new Context(Logger),
                       2,
                       async context =>
            {
                var master = context.GetMaster();
                var worker = context.GetFirstWorker();
                var workerId = worker.LocalLocationStore.ClusterState.PrimaryMachineId;

                var workerSession = context.Sessions[context.GetFirstWorkerIndex()];

                var checkpointState = new CheckpointState(
                    Role.Worker,
                    EventSequencePoint.Parse("24382354"),
                    "MD5:8C4856EA13F6AD59B65D8F6781D2A2F9||DCS||incrementalCheckpoints/24382354.10a0ca0f-d63f-4992-a088-f67bd00abd8a.checkpointInfo.txt|Incremental",
                    DateTime.Now,
                    producerMachineLocation,
                    producerMachineLocation);
                // Next heartbeat workers to restore checkpoint
                await worker.LocalLocationStore.ProcessStateAsync(new OperationContext(context), checkpointState, inline: true, forceRestore: true).ShouldBeSuccess();
                var reconcileResult = await worker.ReconcileAsync(context).ShouldBeSuccess();
                Output.WriteLine($"Reconcile result: {reconcileResult}");
            }));
        }
Exemple #13
0
        /// <inheritdoc />
        public Task <Result <CheckpointState> > GetCheckpointStateAsync(OperationContext context)
        {
            return(context.PerformOperationAsync(
                       Tracer,
                       async() =>
            {
                var(checkpoints, startCursor) = await _checkpointsKey.UseReplicatedHashAsync(
                    context,
                    _configuration.RetryWindow,
                    RedisOperation.GetCheckpoint,
                    (batch, key) => batch.GetCheckpointsInfoAsync(key, _clock.UtcNow))
                                                .ThrowIfFailureAsync();

                var roleResult = await UpdateRoleAsync(context, release: false);
                if (!roleResult.Succeeded)
                {
                    return new ErrorResult(roleResult).AsResult <Result <CheckpointState> >();
                }

                _role = roleResult.Value;

                var maxCheckpoint = checkpoints.MaxByOrDefault(c => c.CheckpointCreationTime);
                if (maxCheckpoint == null)
                {
                    Tracer.Debug(context, $"Getting checkpoint state: Can't find a checkpoint: Start cursor time: {startCursor}");

                    // Add slack for start cursor to account for clock skew between event hub and redis
                    var epochStartCursor = startCursor - _configuration.EventStore.NewEpochEventStartCursorDelay;
                    return CheckpointState.CreateUnavailable(_role.Value, epochStartCursor);
                }

                Tracer.Debug(context, $"Getting checkpoint state: Found checkpoint '{maxCheckpoint}'");

                return Result.Success(new CheckpointState(_role.Value, new EventSequencePoint(maxCheckpoint.SequenceNumber), maxCheckpoint.CheckpointId, maxCheckpoint.CheckpointCreationTime, new MachineLocation(maxCheckpoint.MachineName)));
            },
                       Counters[GlobalStoreCounters.GetCheckpointState],
                       // Using a timeout to make sure the operation finishes: this is important because we want for heartbeat operations
                       // that call this method to keep running to avoid stale checkpoints.
                       timeout: Configuration.GetCheckpointStateTimeout));
        }
        /// <summary>
        /// Initializes a new instance of the BeginCheckpointLogRecord class.
        /// </summary>
        /// <param name="dummy">Used to indicate that this is not an Invalid BeginCheckpointLogRecord.</param>
        private BeginCheckpointLogRecord(bool dummy)
            : base(LogRecordType.BeginCheckpoint, LogicalSequenceNumber.ZeroLsn, null)
        {
            this.IsFirstCheckpointOnFullCopy = false;
            this.progressVector = ProgressVector.Clone(ProgressVector.ZeroProgressVector, 0, LogicalSequenceNumber.ZeroEpoch, LogicalSequenceNumber.ZeroEpoch);
            this.earliestPendingTransactionOffset = 0;
            this.earliestPendingTransaction       = null;
            this.checkpointState = CheckpointState.Completed;
            this.lastStableLsn   = LogicalSequenceNumber.ZeroLsn;
            this.epoch           = LogicalSequenceNumber.ZeroEpoch;

            // Indicates that a full backup has not been taken yet.
            this.highestBackedUpEpoch = LogicalSequenceNumber.ZeroEpoch;
            this.highestBackedUpLsn   = LogicalSequenceNumber.ZeroLsn;

            // Indicates that the current backup stream has zero logs and hence 0 KB size.
            this.backupLogRecordCount = (uint)0;
            this.backupLogSize        = (uint)0;

            this.earliestPendingTransactionInvalidated = 0;
            this.lastPeriodicCheckpointTimeTicks       = DateTime.Now.Ticks;
            this.lastPeriodicTruncationTimeTicks       = this.lastPeriodicCheckpointTimeTicks;
        }
        /// <summary>
        /// Initializes a new instance of the BeginCheckpointLogRecord class.
        /// </summary>
        /// <remarks>Called when the replicator decides to checkpoint.</remarks>
        internal BeginCheckpointLogRecord(
            bool isFirstCheckpointOnFullCopy,
            ProgressVector progressVector,
            BeginTransactionOperationLogRecord earliestPendingTransaction,
            Epoch headEpoch,
            Epoch epoch,
            LogicalSequenceNumber lsn,
            PhysicalLogRecord lastLinkedPhysicalRecord,
            BackupLogRecord lastCompletedBackupLogRecord,
            uint progressVectorMaxEntries,
            long periodicCheckpointTimeTicks,
            long periodicTruncationTimeTicks)
            : base(LogRecordType.BeginCheckpoint, lsn, lastLinkedPhysicalRecord)
        {
            this.IsFirstCheckpointOnFullCopy = isFirstCheckpointOnFullCopy;
            this.progressVector = ProgressVector.Clone(progressVector, progressVectorMaxEntries, lastCompletedBackupLogRecord.HighestBackedUpEpoch, headEpoch);

            this.earliestPendingTransactionOffset = LogicalLogRecord.InvalidLogicalRecordOffset;
            this.earliestPendingTransaction       = earliestPendingTransaction;
            this.checkpointState = CheckpointState.Invalid;
            this.lastStableLsn   = LogicalSequenceNumber.InvalidLsn;
            this.epoch           = (earliestPendingTransaction != null) ? earliestPendingTransaction.RecordEpoch : epoch;

            // Initialize backup log record fields.
            this.highestBackedUpEpoch = lastCompletedBackupLogRecord.HighestBackedUpEpoch;
            this.highestBackedUpLsn   = lastCompletedBackupLogRecord.HighestBackedUpLsn;

            this.backupLogRecordCount = lastCompletedBackupLogRecord.BackupLogRecordCount;
            this.backupLogSize        = lastCompletedBackupLogRecord.BackupLogSizeInKB;

            this.earliestPendingTransactionInvalidated = 0;

            this.lastPeriodicCheckpointTimeTicks = periodicCheckpointTimeTicks;
            this.lastPeriodicTruncationTimeTicks = periodicTruncationTimeTicks;
            this.UpdateApproximateDiskSize();
        }
Exemple #16
0
        public async Task <BoolResult> OnChangeCheckpointAsync(OperationContext context, CheckpointState initialState, CheckpointManifest manifest)
        {
            _activeCheckpointInfo = _activeCheckpointInfo with
            {
                Manifest = manifest
            };

            if (initialState.FileName == null)
            {
                return(BoolResult.Success);
            }

            return(await context.PerformOperationAsync(
                       Tracer,
                       async() =>
            {
                var(state, index) = await _storage.ReadModifyWriteAsync <CheckpointState, int>(context, initialState.FileName.Value, state =>
                {
                    var index = state.Consumers.Count;
                    var updated = state.Consumers.TryAdd(_primaryMachineLocation);
                    if (!updated)
                    {
                        index = state.Consumers.IndexOf(_primaryMachineLocation);
                    }
                    return (state, index, updated);
                },
                                                                                               defaultValue: () => initialState).ThrowIfFailureAsync();

                var locations = GetCandidateLocations(state, index);
                _activeCheckpointInfo = new CheckpointInfoSnapshot(manifest, locations);
                return Result.Success(index);
            },
Exemple #17
0
        /// <summary>
        /// Restores the checkpoint for a given checkpoint id.
        /// </summary>
        public Task <BoolResult> RestoreCheckpointAsync(OperationContext context, CheckpointState checkpointState)
        {
            context = context.CreateNested();
            var checkpointId = checkpointState.CheckpointId;

            return(context.PerformOperationAsync(
                       _tracer,
                       async() =>
            {
                bool successfullyUpdatedIncrementalState = false;
                try
                {
                    bool isIncrementalCheckpoint = false;
                    var checkpointFileExtension = ".zip";
                    if (checkpointId.EndsWith(IncrementalCheckpointIdSuffix, StringComparison.OrdinalIgnoreCase))
                    {
                        isIncrementalCheckpoint = true;
                        checkpointFileExtension = ".txt";
                        // Remove the suffix to get the real checkpoint id used with central storage
                        checkpointId = checkpointId.Substring(0, checkpointId.Length - IncrementalCheckpointIdSuffix.Length);
                    }

                    var checkpointFile = _checkpointStagingDirectory / $"chkpt{checkpointFileExtension}";
                    var extractedCheckpointDirectory = _checkpointStagingDirectory / "chkpt";

                    FileUtilities.DeleteDirectoryContents(_checkpointStagingDirectory.ToString());
                    FileUtilities.DeleteDirectoryContents(extractedCheckpointDirectory.ToString());

                    // Creating a working temporary folder
                    using (new DisposableDirectory(_fileSystem, _checkpointStagingDirectory))
                    {
                        // Getting the checkpoint from the central store
                        await _storage.TryGetFileAsync(context, checkpointId, checkpointFile).ThrowIfFailure();

                        if (isIncrementalCheckpoint)
                        {
                            var incrementalRestoreResult = await RestoreCheckpointIncrementalAsync(context, checkpointFile, extractedCheckpointDirectory);
                            incrementalRestoreResult.ThrowIfFailure();
                        }
                        else
                        {
                            RestoreFullCheckpoint(checkpointFile, extractedCheckpointDirectory);
                        }

                        // Restoring the checkpoint
                        _database.RestoreCheckpoint(context, extractedCheckpointDirectory).ThrowIfFailure();

                        // Save latest checkpoint info to file in case we get restarded and want to know about the previous checkpoint.
                        WriteLatestCheckpoint(context, checkpointState);

                        successfullyUpdatedIncrementalState = true;
                        return BoolResult.Success;
                    }
                }
                finally
                {
                    ClearIncrementalCheckpointStateIfNeeded(context, successfullyUpdatedIncrementalState);
                }
            },
                       extraStartMessage: $"CheckpointId=[{checkpointId}]",
                       extraEndMessage: _ => $"CheckpointId=[{checkpointId}]"));
        }
        public async Task <BoolResult> RegisterCheckpointAsync(OperationContext context, CheckpointState checkpointState)
        {
            var primaryTask  = _primary.RegisterCheckpointAsync(context, checkpointState);
            var fallbackTask = _fallback.RegisterCheckpointAsync(context, checkpointState);
            await Task.WhenAll(primaryTask, fallbackTask);

            var primaryResult  = await primaryTask;
            var fallbackResult = await fallbackTask;

            if (primaryResult.Succeeded)
            {
                return(BoolResult.Success);
            }

            return(new BoolResult(primaryResult & fallbackResult, $"Failed to register checkpoint `{checkpointState}` to both primary and fallback"));
        }