/// <summary> /// Checks the status of the current recovery manager /// </summary> /// <remarks> /// The method must: /// - Check whether current recovery is running and has expired /// - Whether there is an alive violation /// - Whether the user is behind with processing /// The method should not: /// - Update the processing delay - this is done on a message from a user's session /// - Start the recovery - this is done on alive from the system session /// - Complete non timed-out recovery - this is done on the snapshot_complete from user's session /// </remarks> public void CheckStatus() { //if the producer is disabled in SDK or not available for current user - nothing to do if (!Producer.IsAvailable || Producer.IsDisabled) { return; } // recovery must not be done (replay server) if (_producer.IgnoreRecovery) { return; } // multiple class fields can be accessed from multiple threads (messages from user session(s), system session, here, ...) lock (_syncLock) { ProducerRecoveryStatus?newStatus = null; try { // check whether the user is falling behind with processing if (Status == ProducerRecoveryStatus.Completed && _timestampTracker.IsBehind) { newStatus = ProducerRecoveryStatus.Delayed; } // check whether the user was behind with processing but is no longer if (Status == ProducerRecoveryStatus.Delayed && !_timestampTracker.IsBehind) { newStatus = ProducerRecoveryStatus.Completed; } // Check whether there is an alive violation during normal processing if ((Status == ProducerRecoveryStatus.Completed || Status == ProducerRecoveryStatus.Delayed) && _timestampTracker.IsAliveViolated) { ExecutionLog.LogWarning($"Producer id={Producer.Id}: alive violation detected. Recovery will be done on next system alive."); newStatus = ProducerRecoveryStatus.Error; //TODO: do we need new recovery here - or just Delayed status } // Check whether there is an alive violation during recovery if (Status == ProducerRecoveryStatus.Started && _timestampTracker.IsAliveViolated) { Debug.Assert(_recoveryOperation.IsRunning); ExecutionLog.LogWarning($"Producer id={Producer.Id}: alive violation detected during recovery. Additional recovery from {_timestampTracker.SystemAliveTimestamp} will be done once the current is completed."); _recoveryOperation.Interrupt(SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp)); } if (Status == ProducerRecoveryStatus.Started && !_recoveryOperation.IsRunning || Status != ProducerRecoveryStatus.Started && _recoveryOperation.IsRunning) { ExecutionLog.LogWarning($"Producer id={Producer.Id}: internal recovery status problem ({Status}-{_recoveryOperation.IsRunning}). Recovery will be done on next system alive."); newStatus = ProducerRecoveryStatus.Error; } // Check whether the recovery is running and has timed-out if (Status == ProducerRecoveryStatus.Started && _recoveryOperation.HasTimedOut()) { Debug.Assert(_recoveryOperation.IsRunning); _recoveryOperation.CompleteTimedOut(); ExecutionLog.LogWarning($"Producer id={Producer.Id}: recovery timeout. New recovery from {_timestampTracker.SystemAliveTimestamp} will be done."); newStatus = ProducerRecoveryStatus.Error; } // check if any message arrived for this producer in the last X seconds; if not, start recovery if ((Status == ProducerRecoveryStatus.NotStarted || Status == ProducerRecoveryStatus.Error) && newStatus != ProducerRecoveryStatus.Started && DateTime.Now - SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp) > TimeSpan.FromSeconds(60) && SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp) > _connectionDownTimestamp) { ExecutionLog.LogWarning($"Producer id={Producer.Id}: no alive messages arrived since {SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp)}. New recovery will be done."); var recoveryStarted = StartRecovery(); if (recoveryStarted.HasValue && recoveryStarted.Value) { newStatus = ProducerRecoveryStatus.Started; } } // recovery is called and we check if any recovery message arrived in last X time; or restart recovery if (Status == ProducerRecoveryStatus.Started && _recoveryOperation.IsRunning && DateTime.Now - _lastRecoveryMessage > TimeSpan.FromSeconds(300)) { ExecutionLog.LogWarning($"Producer id={Producer.Id}: no recovery message arrived since {_lastRecoveryMessage}. New recovery will be done."); _recoveryOperation.Interrupt(SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp)); _recoveryOperation.Reset(); var recoveryStarted = StartRecovery(); if (recoveryStarted.HasValue && recoveryStarted.Value) { newStatus = ProducerRecoveryStatus.Started; } } ExecutionLog.LogInformation($"Status check: Producer={_producer}({Enum.GetName(typeof(ProducerRecoveryStatus), Status)}), Timing Info={_timestampTracker}"); } catch (Exception ex) { ExecutionLog.LogError(ex, $"An unexpected exception occurred while checking status. Producer={_producer.Id}. Status={Status}, IsRunning={_recoveryOperation.IsRunning}"); } if (newStatus != null && newStatus.Value != Status) { SetStatusAndRaiseEvent(null, newStatus.Value); } } }
/// <summary> /// Checks the status of the current recovery manager /// </summary> /// <remarks> /// The method must: /// - Check whether current recovery is running and has expired /// - Whether there is an alive violation /// - Whether the user is behind with processing /// The method should not: /// - Update the processing delay - this is done on a message from a user's session /// - Start the recovery - this is done on alive from the system session /// - Complete non timed-out recovery - this is done on the snapshot_complete from user's session /// </remarks> public void CheckStatus() { //if the producer is disabled in SDK or not available for current user - nothing to do if (!Producer.IsAvailable || Producer.IsDisabled) { return; } // recovery must not be done (replay server) if (_producer.IgnoreRecovery) { return; } // multiple class fields can be accessed from multiple threads(messages from user session(s), system session, here, ...) lock (_syncLock) { var newStatus = Status; try { // check whether the user is falling behind with processing if (Status == ProducerRecoveryStatus.Completed && _timestampTracker.IsBehind) { newStatus = ProducerRecoveryStatus.Delayed; } // check whether the user was behind with processing but is no longer if (Status == ProducerRecoveryStatus.Delayed && !_timestampTracker.IsBehind) { newStatus = ProducerRecoveryStatus.Completed; } // Check whether there is an alive violation during normal processing if ((Status == ProducerRecoveryStatus.Completed || Status == ProducerRecoveryStatus.Delayed) && _timestampTracker.IsAliveViolated) { ExecutionLog.Warn($"Producer id={Producer.Id}: alive violation detected. Recovery will be done on next system alive."); newStatus = ProducerRecoveryStatus.Error; } // Check whether there is an alive violation during recovery if (Status == ProducerRecoveryStatus.Started && _timestampTracker.IsAliveViolated) { Debug.Assert(_recoveryOperation.IsRunning); ExecutionLog.Warn($"Producer id={Producer.Id}: alive violation detected during recovery. Additional recovery from {_timestampTracker.SystemAliveTimestamp} will be done once the current is completed."); _recoveryOperation.Interrupt(SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp)); } if (Status == ProducerRecoveryStatus.Started && !_recoveryOperation.IsRunning || Status != ProducerRecoveryStatus.Started && _recoveryOperation.IsRunning) { ExecutionLog.Warn($"Producer id={Producer.Id}: internal recovery status problem ({Status}-{_recoveryOperation.IsRunning}). Recovery will be done on next system alive."); newStatus = ProducerRecoveryStatus.Error; } // Check whether the recovery is running and has timed-out if (Status == ProducerRecoveryStatus.Started && _recoveryOperation.HasTimedOut()) { Debug.Assert(_recoveryOperation.IsRunning); _recoveryOperation.CompleteTimedOut(); ExecutionLog.Warn($"Producer id={Producer.Id}: recovery timeout. New recovery from {_timestampTracker.SystemAliveTimestamp} will be done."); newStatus = ProducerRecoveryStatus.Error; } ExecutionLog.Info($"Status check: Producer={_producer}({Enum.GetName(typeof(ProducerRecoveryStatus), Status)}), Timing Info={_timestampTracker}"); } catch (Exception ex) { ExecutionLog.Error($"An unexpected exception occurred while checking status. Producer={_producer.Id}. Status={Status}, IsRunning={_recoveryOperation.IsRunning}", ex); } if (newStatus != Status) { SetStatusAndRaiseEvent(null, newStatus); } } }