/// <inheritdoc /> public Task <Result <CheckpointState> > GetCheckpointStateAsync(OperationContext context) { return(context.PerformOperationAsync( Tracer, async() => { var(checkpoints, startCursor) = await ExecuteRedisFallbackAsync(context, async redisDb => Result.Success(await redisDb.ExecuteBatchAsync(context, batch => batch.GetCheckpointsInfoAsync(_checkpointsKey, _clock.UtcNow), RedisOperation.GetCheckpoint))) .ThrowIfFailureAsync(); var roleResult = await UpdateRoleAsync(context, release: false); if (!roleResult.Succeeded) { return new ErrorResult(roleResult).AsResult <Result <CheckpointState> >(); } _role = roleResult.Value; var maxCheckpoint = checkpoints.MaxByOrDefault(c => c.CheckpointCreationTime); if (maxCheckpoint == null) { Tracer.Debug(context, $"Getting checkpoint state: Can't find a checkpoint: Start cursor time: {startCursor}"); // Add slack for start cursor to account for clock skew between event hub and redis var epochStartCursor = startCursor - _configuration.EventStore.NewEpochEventStartCursorDelay; return CheckpointState.CreateUnavailable(_role.Value, epochStartCursor); } Tracer.Debug(context, $"Getting checkpoint state: Found checkpoint '{maxCheckpoint}'"); return Result.Success(new CheckpointState(_role.Value, new EventSequencePoint(maxCheckpoint.SequenceNumber), maxCheckpoint.CheckpointId, maxCheckpoint.CheckpointCreationTime)); }, Counters[GlobalStoreCounters.GetCheckpointState])); }
public Task <BoolResult> RegisterCheckpointAsync(OperationContext context, CheckpointState checkpointState) { TriggerGarbageCollection(context); var msg = checkpointState.ToString(); return(context.PerformOperationWithTimeoutAsync( Tracer, context => { var blobName = GenerateBlobName(); checkpointState.Consumers.TryAdd(_primaryMachineLocation); if (_configuration.WriteLegacyFormat) { return _storage.WriteAsync(context, blobName, JsonSerializer.Serialize(checkpointState, _jsonSerializerOptions)); } else { return _storage.WriteAsync(context, blobName, checkpointState); } }, traceOperationStarted: false, extraStartMessage: msg, extraEndMessage: _ => msg, timeout: _configuration.RegisterCheckpointTimeout)); }
/// <summary> /// Initializes a new instance of the BeginCheckpointLogRecord class. /// </summary> /// <remarks>Only used for generating invalid BeginCheckpointLogRecord.</remarks> private BeginCheckpointLogRecord() { this.IsFirstCheckpointOnFullCopy = false; this.progressVector = null; this.earliestPendingTransactionOffset = LogicalLogRecord.InvalidLogicalRecordOffset; this.earliestPendingTransaction = BeginTransactionOperationLogRecord.InvalidBeginTransactionLogRecord; this.checkpointState = CheckpointState.Invalid; this.lastStableLsn = LogicalSequenceNumber.InvalidLsn; this.epoch = LogicalSequenceNumber.InvalidEpoch; // Initializes the backup information to invalid. this.highestBackedUpEpoch = new Epoch( LogicalSequenceNumber.InvalidLsn.LSN, LogicalSequenceNumber.InvalidLsn.LSN); this.highestBackedUpLsn = LogicalSequenceNumber.InvalidLsn; // Uint.MaxValue is used to indicate invalid. 4,294,967,295 log records, 4.294967295 TB. this.backupLogRecordCount = uint.MaxValue; this.backupLogSize = uint.MaxValue; this.earliestPendingTransactionInvalidated = 0; this.lastPeriodicCheckpointTimeTicks = 0; this.lastPeriodicTruncationTimeTicks = 0; }
internal BeginCheckpointLogRecord(LogRecordType recordType, ulong recordPosition, long lsn) : base(recordType, recordPosition, lsn) { Utility.Assert( recordType == LogRecordType.BeginCheckpoint, "Record type is expected to be begin checkpoint but the record type is : {0}", recordType); this.IsFirstCheckpointOnFullCopy = false; this.progressVector = null; this.earliestPendingTransactionOffset = LogicalLogRecord.InvalidLogicalRecordOffset; this.earliestPendingTransaction = BeginTransactionOperationLogRecord.InvalidBeginTransactionLogRecord; this.checkpointState = CheckpointState.Invalid; this.lastStableLsn = LogicalSequenceNumber.InvalidLsn; this.epoch = LogicalSequenceNumber.InvalidEpoch; // Initializes the backup information to invalid. this.backupId = BackupLogRecord.InvalidBackupId; this.highestBackedUpEpoch = new Epoch( LogicalSequenceNumber.InvalidLsn.LSN, LogicalSequenceNumber.InvalidLsn.LSN); this.highestBackedUpLsn = LogicalSequenceNumber.InvalidLsn; // Uint.MaxValue is used to indicate invalid. 4,294,967,295 log records, 4.294967295 TB. this.backupLogRecordCount = uint.MaxValue; this.backupLogSize = uint.MaxValue; this.earliestPendingTransactionInvalidated = 0; this.lastPeriodicCheckpointTimeTicks = 0; this.lastPeriodicTruncationTimeTicks = 0; }
public void CanBackwardCompatJsonSerializeCheckpointState() { var test = new CheckpointState(new EventSequencePoint(42), checkpointId: "TestCheckpointId", producer: new MachineLocation("This is a machine loc")); var serialized = JsonSerializer.Serialize(test); var deserialized = JsonUtilities.JsonDeserialize <CheckpointState>(serialized); Assert.Equal(test.CheckpointId, deserialized.CheckpointId); Assert.Equal(test.Producer, deserialized.Producer); }
internal static void Assert(bool condition, string format, CheckpointState param1) { if (condition == false) { var failFastMessage = string.Format(System.Globalization.CultureInfo.InvariantCulture, format, param1); FailFast(failFastMessage); // AMW - Force break into debugger for ease of debugging Debugger.Break(); } }
private void WriteLatestCheckpointToFile(OperationContext context, CheckpointState checkpointState) { try { _fileSystem.WriteAllText(_lastCheckpointFile, $"{checkpointState.CheckpointId},{checkpointState.CheckpointTime}"); } catch (Exception e) { _tracer.Warning(context, $"Failed to write latest checkpoint state to disk: {e}"); } }
public Task <Result <CheckpointState> > GetCheckpointStateAsync(OperationContext context) { // NOTE: this function is naturally retried by the heartbeat mechanisms in LLS return(context.PerformOperationWithTimeoutAsync( Tracer, async context => { var blobs = ListBlobsRecentFirstAsync(context); await foreach (var blob in blobs) { try { var checkpointState = await _storage.ReadAsync <CheckpointState>(context, blob).ThrowIfFailureAsync(); checkpointState.FileName = blob; foreach (var consumer in checkpointState.Consumers) { _pushLocations.Add(consumer, _configuration.PushCheckpointCandidateExpiry); } return Result.Success(checkpointState); } catch (TaskCanceledException) when(context.Token.IsCancellationRequested) { // We hit timeout or a proper cancellation. // Breaking from the loop instead of tracing error for each iteration. break; } catch (Exception e) { Tracer.Error(context, e, $"Failed to obtain {nameof(CheckpointState)} from blob `{blob.Name}`. Skipping."); continue; } } // Add slack for start cursor to account for clock skew return CheckpointState.CreateUnavailable(_clock.UtcNow - _configuration.NewEpochEventStartCursorDelay); }, extraEndMessage: result => { if (!result.Succeeded) { return string.Empty; } var checkpointState = result.Value; return $"CheckpointId=[{checkpointState.CheckpointId}] SequencePoint=[{checkpointState.StartSequencePoint}]"; }, timeout: _configuration.CheckpointStateTimeout)); }
public void CanJsonSerializeCheckpointState() { var test1 = new CheckpointState(new EventSequencePoint(42)); TestJsonRoundtrip(test1); var test2 = new CheckpointState(new EventSequencePoint(42), checkpointId: "TestCheckpointId"); TestJsonRoundtrip(test2); var test3 = new CheckpointState(new EventSequencePoint(42), checkpointId: "TestCheckpointId", producer: new MachineLocation("This is a machine loc")); TestJsonRoundtrip(test3); }
/// <inheritdoc /> public Task <Result <CheckpointState> > GetCheckpointStateAsync(OperationContext context) { return(context.PerformOperationAsync( Tracer, async() => { var(checkpoints, startCursor) = await _checkpointsKey.UseNonConcurrentReplicatedHashAsync( context, Configuration.RetryWindow, RedisOperation.GetCheckpoint, (batch, key) => batch.GetCheckpointsInfoAsync(key, _clock.UtcNow), timeout: Configuration.ClusterRedisOperationTimeout) .ThrowIfFailureAsync(); var roleResult = await UpdateRoleAsync(context, release: false); if (!roleResult.Succeeded) { return new ErrorResult(roleResult).AsResult <Result <CheckpointState> >(); } _role = roleResult.Value; var masterName = await _masterLeaseKey.UseNonConcurrentReplicatedHashAsync( context, Configuration.RetryWindow, RedisOperation.GetCheckpoint, (batch, key) => batch.AddOperation("GetRole", b => b.HashGetAsync(key, "M#1.MachineName")), timeout: Configuration.ClusterRedisOperationTimeout) .ThrowIfFailureAsync(); var masterLocation = masterName.IsNull ? default(MachineLocation) : new MachineLocation((string)masterName); var maxCheckpoint = checkpoints.MaxByOrDefault(c => c.CheckpointCreationTime); if (maxCheckpoint == null) { Tracer.Debug(context, $"Getting checkpoint state: Can't find a checkpoint: Start cursor time: {startCursor}"); // Add slack for start cursor to account for clock skew between event hub and redis var epochStartCursor = startCursor - Configuration.EventStore.NewEpochEventStartCursorDelay; return CheckpointState.CreateUnavailable(_role.Value, epochStartCursor, masterLocation); } Tracer.Debug(context, $"Getting checkpoint state: Found checkpoint '{maxCheckpoint}'"); return Result.Success(new CheckpointState(_role.Value, new EventSequencePoint(maxCheckpoint.SequenceNumber), maxCheckpoint.CheckpointId, maxCheckpoint.CheckpointCreationTime, new MachineLocation(maxCheckpoint.MachineName), masterLocation)); }, Counters[GlobalStoreCounters.GetCheckpointState])); }
public void OnCheckpointCompleted(Exception err, CheckpointState state, bool isRecoveredCheckpoint) { if (!isRecoveredCheckpoint && periodicCheckpointTruncationState != PeriodicCheckpointTruncationState.CheckpointStarted) { // Checkpoint not initiated by config // Indicates regular checkpoint return; } if (err != null || state != CheckpointState.Completed) { // Checkpoint failed to complete successfully, reset periodic process to 'Ready' return; } periodicCheckpointTruncationState = PeriodicCheckpointTruncationState.CheckpointCompleted; }
public Task ReconciliationOverRealStorage() { var checkpointsKey = Guid.NewGuid().ToString(); // Copy and paste a real connection string here. var storageConnectionString = string.Empty; // Consider updating this directory if you want to keep data between invocations. var workingDirectory = TestRootDirectoryPath; var configuration = new LocalDiskCentralStoreConfiguration( workingDirectory, checkpointsKey); var blobStoreConfiguration = new BlobCentralStoreConfiguration( credentials: new AzureBlobStorageCredentials(storageConnectionString), containerName: "checkpoints", checkpointsKey: checkpointsKey); var producerMachineLocation = new MachineLocation(); ConfigureWithOneMaster(s => { s.ReconcileMode = ReconciliationMode.Once.ToString(); s.AzureStorageSecretName = Host.StoreSecret("StorageName", storageConnectionString); }); return(RunTestAsync( new Context(Logger), 2, async context => { var master = context.GetMaster(); var worker = context.GetFirstWorker(); var workerId = worker.LocalLocationStore.ClusterState.PrimaryMachineId; var workerSession = context.Sessions[context.GetFirstWorkerIndex()]; var checkpointState = new CheckpointState( Role.Worker, EventSequencePoint.Parse("24382354"), "MD5:8C4856EA13F6AD59B65D8F6781D2A2F9||DCS||incrementalCheckpoints/24382354.10a0ca0f-d63f-4992-a088-f67bd00abd8a.checkpointInfo.txt|Incremental", DateTime.Now, producerMachineLocation, producerMachineLocation); // Next heartbeat workers to restore checkpoint await worker.LocalLocationStore.ProcessStateAsync(new OperationContext(context), checkpointState, inline: true, forceRestore: true).ShouldBeSuccess(); var reconcileResult = await worker.ReconcileAsync(context).ShouldBeSuccess(); Output.WriteLine($"Reconcile result: {reconcileResult}"); })); }
/// <inheritdoc /> public Task <Result <CheckpointState> > GetCheckpointStateAsync(OperationContext context) { return(context.PerformOperationAsync( Tracer, async() => { var(checkpoints, startCursor) = await _checkpointsKey.UseReplicatedHashAsync( context, _configuration.RetryWindow, RedisOperation.GetCheckpoint, (batch, key) => batch.GetCheckpointsInfoAsync(key, _clock.UtcNow)) .ThrowIfFailureAsync(); var roleResult = await UpdateRoleAsync(context, release: false); if (!roleResult.Succeeded) { return new ErrorResult(roleResult).AsResult <Result <CheckpointState> >(); } _role = roleResult.Value; var maxCheckpoint = checkpoints.MaxByOrDefault(c => c.CheckpointCreationTime); if (maxCheckpoint == null) { Tracer.Debug(context, $"Getting checkpoint state: Can't find a checkpoint: Start cursor time: {startCursor}"); // Add slack for start cursor to account for clock skew between event hub and redis var epochStartCursor = startCursor - _configuration.EventStore.NewEpochEventStartCursorDelay; return CheckpointState.CreateUnavailable(_role.Value, epochStartCursor); } Tracer.Debug(context, $"Getting checkpoint state: Found checkpoint '{maxCheckpoint}'"); return Result.Success(new CheckpointState(_role.Value, new EventSequencePoint(maxCheckpoint.SequenceNumber), maxCheckpoint.CheckpointId, maxCheckpoint.CheckpointCreationTime, new MachineLocation(maxCheckpoint.MachineName))); }, Counters[GlobalStoreCounters.GetCheckpointState], // Using a timeout to make sure the operation finishes: this is important because we want for heartbeat operations // that call this method to keep running to avoid stale checkpoints. timeout: Configuration.GetCheckpointStateTimeout)); }
/// <summary> /// Initializes a new instance of the BeginCheckpointLogRecord class. /// </summary> /// <param name="dummy">Used to indicate that this is not an Invalid BeginCheckpointLogRecord.</param> private BeginCheckpointLogRecord(bool dummy) : base(LogRecordType.BeginCheckpoint, LogicalSequenceNumber.ZeroLsn, null) { this.IsFirstCheckpointOnFullCopy = false; this.progressVector = ProgressVector.Clone(ProgressVector.ZeroProgressVector, 0, LogicalSequenceNumber.ZeroEpoch, LogicalSequenceNumber.ZeroEpoch); this.earliestPendingTransactionOffset = 0; this.earliestPendingTransaction = null; this.checkpointState = CheckpointState.Completed; this.lastStableLsn = LogicalSequenceNumber.ZeroLsn; this.epoch = LogicalSequenceNumber.ZeroEpoch; // Indicates that a full backup has not been taken yet. this.highestBackedUpEpoch = LogicalSequenceNumber.ZeroEpoch; this.highestBackedUpLsn = LogicalSequenceNumber.ZeroLsn; // Indicates that the current backup stream has zero logs and hence 0 KB size. this.backupLogRecordCount = (uint)0; this.backupLogSize = (uint)0; this.earliestPendingTransactionInvalidated = 0; this.lastPeriodicCheckpointTimeTicks = DateTime.Now.Ticks; this.lastPeriodicTruncationTimeTicks = this.lastPeriodicCheckpointTimeTicks; }
/// <summary> /// Initializes a new instance of the BeginCheckpointLogRecord class. /// </summary> /// <remarks>Called when the replicator decides to checkpoint.</remarks> internal BeginCheckpointLogRecord( bool isFirstCheckpointOnFullCopy, ProgressVector progressVector, BeginTransactionOperationLogRecord earliestPendingTransaction, Epoch headEpoch, Epoch epoch, LogicalSequenceNumber lsn, PhysicalLogRecord lastLinkedPhysicalRecord, BackupLogRecord lastCompletedBackupLogRecord, uint progressVectorMaxEntries, long periodicCheckpointTimeTicks, long periodicTruncationTimeTicks) : base(LogRecordType.BeginCheckpoint, lsn, lastLinkedPhysicalRecord) { this.IsFirstCheckpointOnFullCopy = isFirstCheckpointOnFullCopy; this.progressVector = ProgressVector.Clone(progressVector, progressVectorMaxEntries, lastCompletedBackupLogRecord.HighestBackedUpEpoch, headEpoch); this.earliestPendingTransactionOffset = LogicalLogRecord.InvalidLogicalRecordOffset; this.earliestPendingTransaction = earliestPendingTransaction; this.checkpointState = CheckpointState.Invalid; this.lastStableLsn = LogicalSequenceNumber.InvalidLsn; this.epoch = (earliestPendingTransaction != null) ? earliestPendingTransaction.RecordEpoch : epoch; // Initialize backup log record fields. this.highestBackedUpEpoch = lastCompletedBackupLogRecord.HighestBackedUpEpoch; this.highestBackedUpLsn = lastCompletedBackupLogRecord.HighestBackedUpLsn; this.backupLogRecordCount = lastCompletedBackupLogRecord.BackupLogRecordCount; this.backupLogSize = lastCompletedBackupLogRecord.BackupLogSizeInKB; this.earliestPendingTransactionInvalidated = 0; this.lastPeriodicCheckpointTimeTicks = periodicCheckpointTimeTicks; this.lastPeriodicTruncationTimeTicks = periodicTruncationTimeTicks; this.UpdateApproximateDiskSize(); }
public async Task <BoolResult> OnChangeCheckpointAsync(OperationContext context, CheckpointState initialState, CheckpointManifest manifest) { _activeCheckpointInfo = _activeCheckpointInfo with { Manifest = manifest }; if (initialState.FileName == null) { return(BoolResult.Success); } return(await context.PerformOperationAsync( Tracer, async() => { var(state, index) = await _storage.ReadModifyWriteAsync <CheckpointState, int>(context, initialState.FileName.Value, state => { var index = state.Consumers.Count; var updated = state.Consumers.TryAdd(_primaryMachineLocation); if (!updated) { index = state.Consumers.IndexOf(_primaryMachineLocation); } return (state, index, updated); }, defaultValue: () => initialState).ThrowIfFailureAsync(); var locations = GetCandidateLocations(state, index); _activeCheckpointInfo = new CheckpointInfoSnapshot(manifest, locations); return Result.Success(index); },
/// <summary> /// Restores the checkpoint for a given checkpoint id. /// </summary> public Task <BoolResult> RestoreCheckpointAsync(OperationContext context, CheckpointState checkpointState) { context = context.CreateNested(); var checkpointId = checkpointState.CheckpointId; return(context.PerformOperationAsync( _tracer, async() => { bool successfullyUpdatedIncrementalState = false; try { bool isIncrementalCheckpoint = false; var checkpointFileExtension = ".zip"; if (checkpointId.EndsWith(IncrementalCheckpointIdSuffix, StringComparison.OrdinalIgnoreCase)) { isIncrementalCheckpoint = true; checkpointFileExtension = ".txt"; // Remove the suffix to get the real checkpoint id used with central storage checkpointId = checkpointId.Substring(0, checkpointId.Length - IncrementalCheckpointIdSuffix.Length); } var checkpointFile = _checkpointStagingDirectory / $"chkpt{checkpointFileExtension}"; var extractedCheckpointDirectory = _checkpointStagingDirectory / "chkpt"; FileUtilities.DeleteDirectoryContents(_checkpointStagingDirectory.ToString()); FileUtilities.DeleteDirectoryContents(extractedCheckpointDirectory.ToString()); // Creating a working temporary folder using (new DisposableDirectory(_fileSystem, _checkpointStagingDirectory)) { // Getting the checkpoint from the central store await _storage.TryGetFileAsync(context, checkpointId, checkpointFile).ThrowIfFailure(); if (isIncrementalCheckpoint) { var incrementalRestoreResult = await RestoreCheckpointIncrementalAsync(context, checkpointFile, extractedCheckpointDirectory); incrementalRestoreResult.ThrowIfFailure(); } else { RestoreFullCheckpoint(checkpointFile, extractedCheckpointDirectory); } // Restoring the checkpoint _database.RestoreCheckpoint(context, extractedCheckpointDirectory).ThrowIfFailure(); // Save latest checkpoint info to file in case we get restarded and want to know about the previous checkpoint. WriteLatestCheckpoint(context, checkpointState); successfullyUpdatedIncrementalState = true; return BoolResult.Success; } } finally { ClearIncrementalCheckpointStateIfNeeded(context, successfullyUpdatedIncrementalState); } }, extraStartMessage: $"CheckpointId=[{checkpointId}]", extraEndMessage: _ => $"CheckpointId=[{checkpointId}]")); }
public async Task <BoolResult> RegisterCheckpointAsync(OperationContext context, CheckpointState checkpointState) { var primaryTask = _primary.RegisterCheckpointAsync(context, checkpointState); var fallbackTask = _fallback.RegisterCheckpointAsync(context, checkpointState); await Task.WhenAll(primaryTask, fallbackTask); var primaryResult = await primaryTask; var fallbackResult = await fallbackTask; if (primaryResult.Succeeded) { return(BoolResult.Success); } return(new BoolResult(primaryResult & fallbackResult, $"Failed to register checkpoint `{checkpointState}` to both primary and fallback")); }