Exemple #1
0
        /// <summary>
        /// Checks the status of the current recovery manager
        /// </summary>
        /// <remarks>
        /// The method must:
        /// - Check whether current recovery is running and has expired
        /// - Whether there is an alive violation
        /// - Whether the user is behind with processing
        /// The method should not:
        /// - Update the processing delay - this is done on a message from a user's session
        /// - Start the recovery - this is done on alive from the system session
        /// - Complete non timed-out recovery - this is done on the snapshot_complete from user's session
        /// </remarks>
        public void CheckStatus()
        {
            //if the producer is disabled in SDK or not available for current user - nothing to do
            if (!Producer.IsAvailable || Producer.IsDisabled)
            {
                return;
            }

            // recovery must not be done (replay server)
            if (_producer.IgnoreRecovery)
            {
                return;
            }

            // multiple class fields can be accessed from multiple threads (messages from user session(s), system session, here, ...)
            lock (_syncLock)
            {
                ProducerRecoveryStatus?newStatus = null;

                try
                {
                    // check whether the user is falling behind with processing
                    if (Status == ProducerRecoveryStatus.Completed && _timestampTracker.IsBehind)
                    {
                        newStatus = ProducerRecoveryStatus.Delayed;
                    }

                    // check whether the user was behind with processing but is no longer
                    if (Status == ProducerRecoveryStatus.Delayed && !_timestampTracker.IsBehind)
                    {
                        newStatus = ProducerRecoveryStatus.Completed;
                    }

                    // Check whether there is an alive violation during normal processing
                    if ((Status == ProducerRecoveryStatus.Completed || Status == ProducerRecoveryStatus.Delayed) && _timestampTracker.IsAliveViolated)
                    {
                        ExecutionLog.LogWarning($"Producer id={Producer.Id}: alive violation detected. Recovery will be done on next system alive.");
                        newStatus = ProducerRecoveryStatus.Error;
                        //TODO: do we need new recovery here - or just Delayed status
                    }

                    // Check whether there is an alive violation during recovery
                    if (Status == ProducerRecoveryStatus.Started && _timestampTracker.IsAliveViolated)
                    {
                        Debug.Assert(_recoveryOperation.IsRunning);
                        ExecutionLog.LogWarning($"Producer id={Producer.Id}: alive violation detected during recovery. Additional recovery from {_timestampTracker.SystemAliveTimestamp} will be done once the current is completed.");
                        _recoveryOperation.Interrupt(SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp));
                    }

                    if (Status == ProducerRecoveryStatus.Started && !_recoveryOperation.IsRunning ||
                        Status != ProducerRecoveryStatus.Started && _recoveryOperation.IsRunning)
                    {
                        ExecutionLog.LogWarning($"Producer id={Producer.Id}: internal recovery status problem ({Status}-{_recoveryOperation.IsRunning}). Recovery will be done on next system alive.");
                        newStatus = ProducerRecoveryStatus.Error;
                    }

                    // Check whether the recovery is running and has timed-out
                    if (Status == ProducerRecoveryStatus.Started && _recoveryOperation.HasTimedOut())
                    {
                        Debug.Assert(_recoveryOperation.IsRunning);
                        _recoveryOperation.CompleteTimedOut();
                        ExecutionLog.LogWarning($"Producer id={Producer.Id}: recovery timeout. New recovery from {_timestampTracker.SystemAliveTimestamp} will be done.");
                        newStatus = ProducerRecoveryStatus.Error;
                    }

                    // check if any message arrived for this producer in the last X seconds; if not, start recovery
                    if ((Status == ProducerRecoveryStatus.NotStarted || Status == ProducerRecoveryStatus.Error) &&
                        newStatus != ProducerRecoveryStatus.Started &&
                        DateTime.Now - SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp) > TimeSpan.FromSeconds(60) &&
                        SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp) > _connectionDownTimestamp)
                    {
                        ExecutionLog.LogWarning($"Producer id={Producer.Id}: no alive messages arrived since {SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp)}. New recovery will be done.");
                        var recoveryStarted = StartRecovery();
                        if (recoveryStarted.HasValue && recoveryStarted.Value)
                        {
                            newStatus = ProducerRecoveryStatus.Started;
                        }
                    }

                    // recovery is called and we check if any recovery message arrived in last X time; or restart recovery
                    if (Status == ProducerRecoveryStatus.Started && _recoveryOperation.IsRunning && DateTime.Now - _lastRecoveryMessage > TimeSpan.FromSeconds(300))
                    {
                        ExecutionLog.LogWarning($"Producer id={Producer.Id}: no recovery message arrived since {_lastRecoveryMessage}. New recovery will be done.");
                        _recoveryOperation.Interrupt(SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp));
                        _recoveryOperation.Reset();
                        var recoveryStarted = StartRecovery();
                        if (recoveryStarted.HasValue && recoveryStarted.Value)
                        {
                            newStatus = ProducerRecoveryStatus.Started;
                        }
                    }

                    ExecutionLog.LogInformation($"Status check: Producer={_producer}({Enum.GetName(typeof(ProducerRecoveryStatus), Status)}), Timing Info={_timestampTracker}");
                }
                catch (Exception ex)
                {
                    ExecutionLog.LogError(ex, $"An unexpected exception occurred while checking status. Producer={_producer.Id}. Status={Status}, IsRunning={_recoveryOperation.IsRunning}");
                }

                if (newStatus != null && newStatus.Value != Status)
                {
                    SetStatusAndRaiseEvent(null, newStatus.Value);
                }
            }
        }
Exemple #2
0
        /// <summary>
        /// Checks the status of the current recovery manager
        /// </summary>
        /// <remarks>
        /// The method must:
        /// - Check whether current recovery is running and has expired
        /// - Whether there is an alive violation
        /// - Whether the user is behind with processing
        /// The method should not:
        /// - Update the processing delay - this is done on a message from a user's session
        /// - Start the recovery - this is done on alive from the system session
        /// - Complete non timed-out recovery - this is done on the snapshot_complete from user's session
        /// </remarks>
        public void CheckStatus()
        {
            //if the producer is disabled in SDK or not available for current user - nothing to do
            if (!Producer.IsAvailable || Producer.IsDisabled)
            {
                return;
            }

            // recovery must not be done (replay server)
            if (_producer.IgnoreRecovery)
            {
                return;
            }

            // multiple class fields can be accessed from multiple threads(messages from user session(s), system session, here, ...)
            lock (_syncLock)
            {
                var newStatus = Status;

                try
                {
                    // check whether the user is falling behind with processing
                    if (Status == ProducerRecoveryStatus.Completed && _timestampTracker.IsBehind)
                    {
                        newStatus = ProducerRecoveryStatus.Delayed;
                    }

                    // check whether the user was behind with processing but is no longer
                    if (Status == ProducerRecoveryStatus.Delayed && !_timestampTracker.IsBehind)
                    {
                        newStatus = ProducerRecoveryStatus.Completed;
                    }

                    // Check whether there is an alive violation during normal processing
                    if ((Status == ProducerRecoveryStatus.Completed || Status == ProducerRecoveryStatus.Delayed) && _timestampTracker.IsAliveViolated)
                    {
                        ExecutionLog.Warn($"Producer id={Producer.Id}: alive violation detected. Recovery will be done on next system alive.");
                        newStatus = ProducerRecoveryStatus.Error;
                    }

                    // Check whether there is an alive violation during recovery
                    if (Status == ProducerRecoveryStatus.Started && _timestampTracker.IsAliveViolated)
                    {
                        Debug.Assert(_recoveryOperation.IsRunning);
                        ExecutionLog.Warn($"Producer id={Producer.Id}: alive violation detected during recovery. Additional recovery from {_timestampTracker.SystemAliveTimestamp} will be done once the current is completed.");
                        _recoveryOperation.Interrupt(SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp));
                    }

                    if (Status == ProducerRecoveryStatus.Started && !_recoveryOperation.IsRunning ||
                        Status != ProducerRecoveryStatus.Started && _recoveryOperation.IsRunning)
                    {
                        ExecutionLog.Warn($"Producer id={Producer.Id}: internal recovery status problem ({Status}-{_recoveryOperation.IsRunning}). Recovery will be done on next system alive.");
                        newStatus = ProducerRecoveryStatus.Error;
                    }

                    // Check whether the recovery is running and has timed-out
                    if (Status == ProducerRecoveryStatus.Started && _recoveryOperation.HasTimedOut())
                    {
                        Debug.Assert(_recoveryOperation.IsRunning);
                        _recoveryOperation.CompleteTimedOut();
                        ExecutionLog.Warn($"Producer id={Producer.Id}: recovery timeout. New recovery from {_timestampTracker.SystemAliveTimestamp} will be done.");
                        newStatus = ProducerRecoveryStatus.Error;
                    }
                    ExecutionLog.Info($"Status check: Producer={_producer}({Enum.GetName(typeof(ProducerRecoveryStatus), Status)}), Timing Info={_timestampTracker}");
                }
                catch (Exception ex)
                {
                    ExecutionLog.Error($"An unexpected exception occurred while checking status. Producer={_producer.Id}. Status={Status}, IsRunning={_recoveryOperation.IsRunning}", ex);
                }
                if (newStatus != Status)
                {
                    SetStatusAndRaiseEvent(null, newStatus);
                }
            }
        }