/// <summary> /// Updates the timing info in the provided dictionary if the latency on the existing entry is bigger than on the new one /// </summary> /// <param name="dictionary">The <see cref="IReadOnlyDictionary{TKey,TValue}"/> to modify</param> /// <param name="interest">The <see cref="MessageInterest"/> associated with the session which received the message</param> /// <param name="message">The received <see cref="FeedMessage"/></param> private static void UpdateTimingInfoIfLatencyLower(IReadOnlyDictionary <MessageInterest, MessageTimingInfo> dictionary, MessageInterest interest, FeedMessage message) { Guard.Argument(dictionary, nameof(dictionary)).NotNull(); Guard.Argument(interest, nameof(interest)).NotNull(); Guard.Argument(message, nameof(message)).NotNull(); if (dictionary.TryGetValue(interest, out var timingInfo)) { var messageDateTime = SdkInfo.FromEpochTime(message.GeneratedAt); var newLatency = TimeProviderAccessor.Current.Now <= messageDateTime ? TimeSpan.Zero : TimeProviderAccessor.Current.Now - messageDateTime; if (timingInfo.Latency > newLatency) { timingInfo.Update(message.GeneratedAt); } } else { if (!(message is alive)) { ExecutionLog.LogError($"Message timing info for message type:{message.GetType().Name}, interest:{interest.Name} does not exist."); } } }
public void correct_timestamp_is_used_when_recovery_is_interrupted_do_to_unsubscribed_alive() { var recoveryOperationMock = new Mock <IRecoveryOperation>(); recoveryOperationMock.Setup(x => x.Start()).Returns(true); _producerRecoveryManager = new ProducerRecoveryManager(_producer, recoveryOperationMock.Object, _timestampTracker); //start the recovery _producerRecoveryManager.ProcessSystemMessage(_messageBuilder.BuildAlive()); Assert.AreEqual(_producerRecoveryManager.Status, ProducerRecoveryStatus.Started); //make sure that after the the recoveryOperationMock IsRunning returns true recoveryOperationMock.Setup(x => x.IsRunning).Returns(true); //let's go over few normal cycles without user messages _producerRecoveryManager.CheckStatus(); _producerRecoveryManager.ProcessSystemMessage(_messageBuilder.BuildAlive()); _timeProvider.AddSeconds(10); _producerRecoveryManager.CheckStatus(); var lastAlive = _messageBuilder.BuildAlive(); _producerRecoveryManager.ProcessSystemMessage(lastAlive); // let's feed the recovery manager with unsubscribed alive _timeProvider.AddSeconds(10); var unsubscribedAlive = _messageBuilder.BuildAlive(); unsubscribedAlive.subscribed = 0; _producerRecoveryManager.ProcessSystemMessage(unsubscribedAlive); recoveryOperationMock.Verify(x => x.Interrupt(SdkInfo.FromEpochTime(lastAlive.timestamp)), Times.Once); Assert.AreEqual(ProducerRecoveryStatus.Started, _producerRecoveryManager.Status); }
/// <summary> /// Updates the timing info in the provided dictionary if the latency on the existing entry is bigger than on the new one /// </summary> /// <param name="dictionary">The <see cref="IReadOnlyDictionary{TKey,TValue}"/> to modify</param> /// <param name="interest">The <see cref="MessageInterest"/> associated with the session which received the message</param> /// <param name="message">The received <see cref="FeedMessage"/></param> private static void UpdateTimingInfoIfLatencyLower(IReadOnlyDictionary <MessageInterest, MessageTimingInfo> dictionary, MessageInterest interest, FeedMessage message) { Contract.Requires(dictionary != null); Contract.Requires(interest != null); Contract.Requires(message != null); MessageTimingInfo timingInfo; if (dictionary.TryGetValue(interest, out timingInfo)) { var messageDateTime = SdkInfo.FromEpochTime(message.GeneratedAt); var newLatency = TimeProviderAccessor.Current.Now <= messageDateTime ? TimeSpan.Zero : TimeProviderAccessor.Current.Now - messageDateTime; if (timingInfo.Latency > newLatency) { timingInfo.Update(message.GeneratedAt); } } else { if (!(message is alive)) { ExecutionLog.Error($"Message timing info for message type:{message.GetType().Name}, interest:{interest.Name} does not exist."); } } }
/// <summary> /// Updates the information held by the current instance /// </summary> /// <param name="generatedAt">Epoch time of the message generation</param> public void Update(long generatedAt) { var currentTime = TimeProviderAccessor.Current.Now; GeneratedAt = SdkInfo.FromEpochTime(generatedAt); Latency = GeneratedAt >= currentTime ? TimeSpan.Zero : currentTime - GeneratedAt; }
/// <summary> /// Processes <see cref="IMessage" /> received on the user's session(s) /// </summary> /// <remarks> /// This method does: /// - collect timing info on odds_change(s), bet_stop(s) and alive(s) /// - attempt to complete running recoveries with snapshot_complete(s) /// This method does not: /// - determine if the user is behind (or not) with message processing - this is done in CheckStatus(..) method /// - attempt to determine whether the recovery has timed-out - this is done in CheckStatus(..) method /// - determine weather is alive violated. This should be only done based on system alive(s) and is done in ProcessSystemMessage(...) method /// - start recoveries - This should only be done based on system alive(s) and is done in ProcessSystemMessage(...) method /// </remarks> /// <param name="message">The <see cref="IMessage" /> message to be processed</param> /// <param name="interest">The <see cref="MessageInterest"/> describing the session from which the message originates </param> /// <exception cref="ArgumentException">The Producer.Id of the message and the Producer associated with this manager do not match</exception> public void ProcessUserMessage(FeedMessage message, MessageInterest interest) { Guard.Argument(message, nameof(message)).NotNull(); Guard.Argument(interest, nameof(interest)).NotNull(); if (message.ProducerId != Producer.Id) { ExecutionLog.LogError($"The producer ({message.ProducerId}) of the message and the Producer {Producer.Name} ({Producer.Id}) associated with this manager do not match."); return; } if (!Producer.IsAvailable || Producer.IsDisabled) { ExecutionLog.LogDebug($"{Producer.Name} Producer is not available or user disabled, however we still receive {message.GetType()} message. Can be ignored."); return; } lock (_syncLock) { ProducerRecoveryStatus?newStatus = null; try { if (message is odds_change || message is bet_stop) { _timestampTracker.ProcessUserMessage(interest, message); } var alive = message as alive; if (alive != null && alive.subscribed != 0) { // the alive message from user's session should only be used for tracking of processing delay _timestampTracker.ProcessUserMessage(interest, message); if (Status == ProducerRecoveryStatus.Completed || Status == ProducerRecoveryStatus.Delayed) { _producer.SetLastTimestampBeforeDisconnect(SdkInfo.FromEpochTime(_timestampTracker.OldestUserAliveTimestamp)); } } var snapshotComplete = message as snapshot_complete; if (snapshotComplete != null) { newStatus = ProcessSnapshotCompleteMessage(snapshotComplete, interest); } } catch (Exception ex) { ExecutionLog.LogError($"An unexpected exception occurred while processing user message. Producer={_producer.Id}, Session={interest.Name}, Message={message}, Exception:", ex); } if (newStatus != null && newStatus.Value != Status) { SetStatusAndRaiseEvent(null, newStatus.Value); } } }
/// <summary> /// Processes the <see cref="snapshot_complete"/> message. Only snapshot_complete(s) from user's sessions should be processed /// </summary> /// <param name="snapshotCompleted">The <see cref="snapshot_complete"/> message received on the system user's session</param> /// <param name="interest">The <see cref="MessageInterest"/> associated with the session which received the message</param> /// <returns>A <see cref="ProducerRecoveryStatus"/> specifying the new status of the manager or null reference if no change is needed</returns> private ProducerRecoveryStatus?ProcessSnapshotCompleteMessage(snapshot_complete snapshotCompleted, MessageInterest interest) { URN eventId; if (_producer.EventRecoveries.TryRemove(snapshotCompleted.request_id, out eventId)) { ExecutionLog.Info($"Recovery with requestId={snapshotCompleted.request_id} for producer={Producer.Id}, eventId={eventId} completed."); EventRecoveryCompleted?.Invoke(this, new EventRecoveryCompletedEventArgs(snapshotCompleted.request_id, eventId)); return(null); } //The snapshot message not for us if (!_recoveryOperation.IsRunning || !_recoveryOperation.RequestId.HasValue || _recoveryOperation.RequestId.Value != snapshotCompleted.RequestId) { return(null); } //Debug.Assert(Status == ProducerRecoveryStatus.Started); RecoveryResult recoveryResult; ExecutionLog.Debug($"SnapshotComplete[{"requestId" + snapshotCompleted.request_id}] for producer=[{"id=" + Producer.Id}] on session {interest.Name} received"); if (!_recoveryOperation.TryComplete(interest, out recoveryResult)) { //The recovery is not complete, nothing to do. ExecutionLog.Debug($"Recovery with requestId={snapshotCompleted.request_id} for producer={Producer.Id} is not yet completed. Waiting for snapshots from other sessions"); return(null); } // The recovery operation completed. Check the result and act accordingly if (recoveryResult.Success) { // the recovery was interrupted if (recoveryResult.InterruptedAt.HasValue) { ExecutionLog.Warn($"Recovery with requestId={snapshotCompleted.request_id} for producer={Producer.Id} completed with interruption at:{recoveryResult.InterruptedAt.Value}"); _producer.SetLastTimestampBeforeDisconnect(recoveryResult.InterruptedAt.Value); return(ProducerRecoveryStatus.Error); } // the recovery was not interrupted var recoveryDuration = TimeProviderAccessor.Current.Now - recoveryResult.StartTime; ExecutionLog.Info($"Recovery with requestId={snapshotCompleted.request_id} for producer={Producer.Id} completed in {recoveryDuration.TotalSeconds} sec."); _producer.SetLastTimestampBeforeDisconnect(SdkInfo.FromEpochTime(snapshotCompleted.timestamp)); return(_timestampTracker.IsBehind ? ProducerRecoveryStatus.Delayed : ProducerRecoveryStatus.Completed); } // The recovery operation timed-out var timeOutDuration = TimeProviderAccessor.Current.Now - recoveryResult.StartTime; ExecutionLog.Warn($"Recovery with requestId={snapshotCompleted.RequestId} timed out after:{timeOutDuration}"); return(ProducerRecoveryStatus.Error); }
/// <summary> /// Handles the <see cref="IFeedMessageProcessor.MessageProcessed"/> event /// </summary> /// <param name="sender">A <see cref="object"/> representation of the instance raising the event</param> /// <param name="e">A <see cref="FeedMessageReceivedEventArgs"/> instance containing event information</param> private void OnMessageProcessed(object sender, FeedMessageReceivedEventArgs e) { var processingTook = string.Empty; var sdkProcessingTime = DateTime.Now - SdkInfo.FromEpochTime(e.Message.ReceivedAt); if (sdkProcessingTime.TotalMilliseconds > 500) { processingTook = $" Sdk processing took {(int)sdkProcessingTime.TotalMilliseconds}ms"; } ExecutionLog.LogDebug($"Dispatching {e.Message.GetType().Name} for {e.Message.EventId} ({e.Message.GeneratedAt}).{processingTook}"); var dispatcher = SelectDispatcher(e.Message); dispatcher.Dispatch(e.Message, e.RawMessage); }
/// <summary> /// Checks the status of the current recovery manager /// </summary> /// <remarks> /// The method must: /// - Check whether current recovery is running and has expired /// - Whether there is an alive violation /// - Whether the user is behind with processing /// The method should not: /// - Update the processing delay - this is done on a message from a user's session /// - Start the recovery - this is done on alive from the system session /// - Complete non timed-out recovery - this is done on the snapshot_complete from user's session /// </remarks> public void CheckStatus() { //if the producer is disabled in SDK or not available for current user - nothing to do if (!Producer.IsAvailable || Producer.IsDisabled) { return; } // recovery must not be done (replay server) if (_producer.IgnoreRecovery) { return; } // multiple class fields can be accessed from multiple threads (messages from user session(s), system session, here, ...) lock (_syncLock) { ProducerRecoveryStatus?newStatus = null; try { // check whether the user is falling behind with processing if (Status == ProducerRecoveryStatus.Completed && _timestampTracker.IsBehind) { newStatus = ProducerRecoveryStatus.Delayed; } // check whether the user was behind with processing but is no longer if (Status == ProducerRecoveryStatus.Delayed && !_timestampTracker.IsBehind) { newStatus = ProducerRecoveryStatus.Completed; } // Check whether there is an alive violation during normal processing if ((Status == ProducerRecoveryStatus.Completed || Status == ProducerRecoveryStatus.Delayed) && _timestampTracker.IsAliveViolated) { ExecutionLog.LogWarning($"Producer id={Producer.Id}: alive violation detected. Recovery will be done on next system alive."); newStatus = ProducerRecoveryStatus.Error; //TODO: do we need new recovery here - or just Delayed status } // Check whether there is an alive violation during recovery if (Status == ProducerRecoveryStatus.Started && _timestampTracker.IsAliveViolated) { Debug.Assert(_recoveryOperation.IsRunning); ExecutionLog.LogWarning($"Producer id={Producer.Id}: alive violation detected during recovery. Additional recovery from {_timestampTracker.SystemAliveTimestamp} will be done once the current is completed."); _recoveryOperation.Interrupt(SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp)); } if (Status == ProducerRecoveryStatus.Started && !_recoveryOperation.IsRunning || Status != ProducerRecoveryStatus.Started && _recoveryOperation.IsRunning) { ExecutionLog.LogWarning($"Producer id={Producer.Id}: internal recovery status problem ({Status}-{_recoveryOperation.IsRunning}). Recovery will be done on next system alive."); newStatus = ProducerRecoveryStatus.Error; } // Check whether the recovery is running and has timed-out if (Status == ProducerRecoveryStatus.Started && _recoveryOperation.HasTimedOut()) { Debug.Assert(_recoveryOperation.IsRunning); _recoveryOperation.CompleteTimedOut(); ExecutionLog.LogWarning($"Producer id={Producer.Id}: recovery timeout. New recovery from {_timestampTracker.SystemAliveTimestamp} will be done."); newStatus = ProducerRecoveryStatus.Error; } // check if any message arrived for this producer in the last X seconds; if not, start recovery if ((Status == ProducerRecoveryStatus.NotStarted || Status == ProducerRecoveryStatus.Error) && newStatus != ProducerRecoveryStatus.Started && DateTime.Now - SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp) > TimeSpan.FromSeconds(60) && SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp) > _connectionDownTimestamp) { ExecutionLog.LogWarning($"Producer id={Producer.Id}: no alive messages arrived since {SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp)}. New recovery will be done."); var recoveryStarted = StartRecovery(); if (recoveryStarted.HasValue && recoveryStarted.Value) { newStatus = ProducerRecoveryStatus.Started; } } // recovery is called and we check if any recovery message arrived in last X time; or restart recovery if (Status == ProducerRecoveryStatus.Started && _recoveryOperation.IsRunning && DateTime.Now - _lastRecoveryMessage > TimeSpan.FromSeconds(300)) { ExecutionLog.LogWarning($"Producer id={Producer.Id}: no recovery message arrived since {_lastRecoveryMessage}. New recovery will be done."); _recoveryOperation.Interrupt(SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp)); _recoveryOperation.Reset(); var recoveryStarted = StartRecovery(); if (recoveryStarted.HasValue && recoveryStarted.Value) { newStatus = ProducerRecoveryStatus.Started; } } ExecutionLog.LogInformation($"Status check: Producer={_producer}({Enum.GetName(typeof(ProducerRecoveryStatus), Status)}), Timing Info={_timestampTracker}"); } catch (Exception ex) { ExecutionLog.LogError(ex, $"An unexpected exception occurred while checking status. Producer={_producer.Id}. Status={Status}, IsRunning={_recoveryOperation.IsRunning}"); } if (newStatus != null && newStatus.Value != Status) { SetStatusAndRaiseEvent(null, newStatus.Value); } } }
/// <summary> /// Processes a message received on the system's session /// </summary> /// <remarks> /// This method does: /// - starts recovery operations if needed /// - interrupt running recoveries on non-subscribed alive(s) and alive violation(s) /// - set LastTimestampBeforeDisconnect property on the producer. /// This method does not: /// - determine if the user is behind (or not) with message processing - this is done in CheckStatus(..) method /// - attempt to determine whether the recovery has timed-out - this is done in CheckStatus(..) method /// </remarks> /// <param name="message">A <see cref="FeedMessage"/> received on the system session</param> public void ProcessSystemMessage(FeedMessage message) { Guard.Argument(message, nameof(message)).NotNull(); var alive = message as alive; if (alive?.ProducerId != Producer.Id) { return; } if (Status == ProducerRecoveryStatus.FatalError || _producer.IgnoreRecovery) { return; } lock (_syncLock) { ProducerRecoveryStatus?newStatus = null; try { // store the timestamp of most recent system alive before it is overridden by // _timestampTracker.ProcessSystemAlive(alive); in case the current alive // is not subscribed and ongoing recovery operation must be interrupted var previousAliveTimestamp = _timestampTracker.SystemAliveTimestamp; _timestampTracker.ProcessSystemAlive(alive); // if current status is NotStarted or Error just start the recovery if (Status == ProducerRecoveryStatus.NotStarted || Status == ProducerRecoveryStatus.Error) { try { var started = StartRecovery(); if (started.HasValue && started.Value) { ExecutionLog.LogInformation($"Producer={_producer.Id}: Recovery operation started. Current status={Enum.GetName(typeof(ProducerRecoveryStatus), Status)}, After={SdkInfo.ToEpochTime(_producer.LastTimestampBeforeDisconnect)}."); newStatus = ProducerRecoveryStatus.Started; } else if (started.HasValue) { ExecutionLog.LogWarning($"Producer={_producer.Id}: An error occurred while starting recovery operation with after={SdkInfo.ToEpochTime(_producer.LastTimestampBeforeDisconnect)}. Retry will be made at next system alive."); } } catch (RecoveryInitiationException ex) { ExecutionLog.LogCritical($"Producer id={Producer.Id} failed to execute recovery because 'after' is to much in the past. After={ex.After}. Stopping the feed."); newStatus = ProducerRecoveryStatus.FatalError; } } else { Debug.Assert(Status == ProducerRecoveryStatus.Started || Status == ProducerRecoveryStatus.Completed || Status == ProducerRecoveryStatus.Delayed); // we are no longer in sync with the feed if (alive.subscribed == 0) { if (_recoveryOperation.IsRunning) { Debug.Assert(Status == ProducerRecoveryStatus.Started); var timestamp = SdkInfo.FromEpochTime(previousAliveTimestamp); ExecutionLog.LogInformation($"Producer={_producer.Id}: Recovery operation interrupted. Current status={Enum.GetName(typeof(ProducerRecoveryStatus), Status)}, Timestamp={timestamp}."); _recoveryOperation.Interrupt(timestamp); } else { Debug.Assert(Status == ProducerRecoveryStatus.Completed || Status == ProducerRecoveryStatus.Delayed); try { var started = StartRecovery(); if (started.HasValue && started.Value) { ExecutionLog.LogInformation($"Producer={_producer.Id}: Recovery operation started due to unsubscribed alive. Current status={Enum.GetName(typeof(ProducerRecoveryStatus), Status)}, After={SdkInfo.ToEpochTime(_producer.LastTimestampBeforeDisconnect)}."); newStatus = ProducerRecoveryStatus.Started; } else if (started.HasValue) { ExecutionLog.LogWarning($"Producer={_producer.Id}: An error occurred while starting recovery operation with after={SdkInfo.ToEpochTime(_producer.LastTimestampBeforeDisconnect)}. Retry will be made at next system alive."); newStatus = ProducerRecoveryStatus.Error; } } catch (RecoveryInitiationException ex) { ExecutionLog.LogCritical($"Producer id={Producer.Id} failed to execute recovery because 'after' is to much in the past. After={ex.After}. Stopping the feed."); newStatus = ProducerRecoveryStatus.FatalError; } } } } } catch (Exception ex) { ExecutionLog.LogError(ex, $"An unexpected exception occurred while processing system message. Producer={_producer.Id}, message={message}"); } if (newStatus != null && newStatus.Value != Status) { SetStatusAndRaiseEvent(null, newStatus.Value); } } }
/// <summary> /// Checks the status of the current recovery manager /// </summary> /// <remarks> /// The method must: /// - Check whether current recovery is running and has expired /// - Whether there is an alive violation /// - Whether the user is behind with processing /// The method should not: /// - Update the processing delay - this is done on a message from a user's session /// - Start the recovery - this is done on alive from the system session /// - Complete non timed-out recovery - this is done on the snapshot_complete from user's session /// </remarks> public void CheckStatus() { //if the producer is disabled in SDK or not available for current user - nothing to do if (!Producer.IsAvailable || Producer.IsDisabled) { return; } // recovery must not be done (replay server) if (_producer.IgnoreRecovery) { return; } // multiple class fields can be accessed from multiple threads(messages from user session(s), system session, here, ...) lock (_syncLock) { var newStatus = Status; try { // check whether the user is falling behind with processing if (Status == ProducerRecoveryStatus.Completed && _timestampTracker.IsBehind) { newStatus = ProducerRecoveryStatus.Delayed; } // check whether the user was behind with processing but is no longer if (Status == ProducerRecoveryStatus.Delayed && !_timestampTracker.IsBehind) { newStatus = ProducerRecoveryStatus.Completed; } // Check whether there is an alive violation during normal processing if ((Status == ProducerRecoveryStatus.Completed || Status == ProducerRecoveryStatus.Delayed) && _timestampTracker.IsAliveViolated) { ExecutionLog.Warn($"Producer id={Producer.Id}: alive violation detected. Recovery will be done on next system alive."); newStatus = ProducerRecoveryStatus.Error; } // Check whether there is an alive violation during recovery if (Status == ProducerRecoveryStatus.Started && _timestampTracker.IsAliveViolated) { Debug.Assert(_recoveryOperation.IsRunning); ExecutionLog.Warn($"Producer id={Producer.Id}: alive violation detected during recovery. Additional recovery from {_timestampTracker.SystemAliveTimestamp} will be done once the current is completed."); _recoveryOperation.Interrupt(SdkInfo.FromEpochTime(_timestampTracker.SystemAliveTimestamp)); } if (Status == ProducerRecoveryStatus.Started && !_recoveryOperation.IsRunning || Status != ProducerRecoveryStatus.Started && _recoveryOperation.IsRunning) { ExecutionLog.Warn($"Producer id={Producer.Id}: internal recovery status problem ({Status}-{_recoveryOperation.IsRunning}). Recovery will be done on next system alive."); newStatus = ProducerRecoveryStatus.Error; } // Check whether the recovery is running and has timed-out if (Status == ProducerRecoveryStatus.Started && _recoveryOperation.HasTimedOut()) { Debug.Assert(_recoveryOperation.IsRunning); _recoveryOperation.CompleteTimedOut(); ExecutionLog.Warn($"Producer id={Producer.Id}: recovery timeout. New recovery from {_timestampTracker.SystemAliveTimestamp} will be done."); newStatus = ProducerRecoveryStatus.Error; } ExecutionLog.Info($"Status check: Producer={_producer}({Enum.GetName(typeof(ProducerRecoveryStatus), Status)}), Timing Info={_timestampTracker}"); } catch (Exception ex) { ExecutionLog.Error($"An unexpected exception occurred while checking status. Producer={_producer.Id}. Status={Status}, IsRunning={_recoveryOperation.IsRunning}", ex); } if (newStatus != Status) { SetStatusAndRaiseEvent(null, newStatus); } } }
/// <inheritdoc /> public override string ToString() { return($"After={After}/{SdkInfo.FromEpochTime(After)}, NodeId={NodeId}, Timestamp={Timestamp}/{SdkInfo.FromEpochTime(Timestamp)}, RequestId={RequestId}, Response={ResponseCode}-{ResponseMessage}"); }