public static REEFMessage Deserialize(byte[] bytes) { REEFMessage pbuf = null; using (var s = new MemoryStream(bytes)) { pbuf = Serializer.Deserialize <REEFMessage>(s); } return(pbuf); }
public void Send(EvaluatorHeartbeatProto evaluatorHeartbeatProto) { lock (_queuedHeartbeats) { if (_evaluatorSettings.OperationState == EvaluatorOperationState.RECOVERY) { LOGGER.Log(Level.Warning, string.Format(CultureInfo.InvariantCulture, "In RECOVERY mode, heartbeat queued as [{0}]. ", evaluatorHeartbeatProto)); _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto); return; } // NOT during recovery, try to send REEFMessage payload = new REEFMessage(evaluatorHeartbeatProto); try { _observer.OnNext(payload); _heartbeatFailures = 0; // reset failure counts if we are having intermidtten (not continuous) failures } catch (Exception e) { if (evaluatorHeartbeatProto.task_status == null || evaluatorHeartbeatProto.task_status.state != State.RUNNING) { Utilities.Diagnostics.Exceptions.Throw(e, "Lost communications to driver when no task is running, recovery NOT supported for such scenario", LOGGER); } _heartbeatFailures++; _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto); LOGGER.Log(Level.Error, string.Format(CultureInfo.InvariantCulture, "Sending heartbeat to driver experienced #{0} failure. Hearbeat queued as: [{1}]. ", _heartbeatFailures, evaluatorHeartbeatProto), e); if (_heartbeatFailures >= _maxHeartbeatRetries) { LOGGER.Log(Level.Warning, "Heartbeat communications to driver reached max of {0} failures. Driver is considered dead/unreachable", _heartbeatFailures); LOGGER.Log(Level.Info, "=========== Entering RECOVERY mode. ==========="); ContextManager.HandleDriverConnectionMessage(new DriverConnectionMessageImpl(DriverConnectionState.Disconnected)); try { _driverConnection = _evaluatorSettings.EvaluatorInjector.GetInstance<IDriverConnection>(); } catch (Exception ex) { Utilities.Diagnostics.Exceptions.CaughtAndThrow(ex, Level.Error, "Failed to inject the driver reconnect implementation", LOGGER); } LOGGER.Log(Level.Info, "instantiate driver reconnect implementation: " + _driverConnection); _evaluatorSettings.OperationState = EvaluatorOperationState.RECOVERY; // clean heartbeat failure _heartbeatFailures = 0; } } } }
public void Send(EvaluatorHeartbeatProto evaluatorHeartbeatProto) { lock (_queuedHeartbeats) { // Do not send a heartbeat if Evaluator has already signaled that it was done. if (_isCompletedHeartbeatQueued) { LOGGER.Log(Level.Warning, "Evaluator trying to schedule a heartbeat after a completed heartbeat has already been scheduled or sent."); return; } if (IsEvaluatorStateCompleted(evaluatorHeartbeatProto.evaluator_status.state)) { _isCompletedHeartbeatQueued = true; } if (_evaluatorSettings.OperationState == EvaluatorOperationState.RECOVERY) { LOGGER.Log(Level.Warning, string.Format(CultureInfo.InvariantCulture, "In RECOVERY mode, heartbeat queued as [{0}]. ", evaluatorHeartbeatProto)); _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto); return; } // NOT during recovery, try to send REEFMessage payload = new REEFMessage(evaluatorHeartbeatProto); try { _observer.OnNext(payload); _heartbeatFailures = 0; // reset failure counts if we are having intermittent (not continuous) failures } catch (Exception e) { if (evaluatorHeartbeatProto.task_status == null || evaluatorHeartbeatProto.task_status.state != State.RUNNING) { Utilities.Diagnostics.Exceptions.Throw(e, "Lost communications to driver when no task is running, recovery NOT supported for such scenario", LOGGER); } _heartbeatFailures++; _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto); LOGGER.Log(Level.Error, string.Format(CultureInfo.InvariantCulture, "Sending heartbeat to driver experienced #{0} failure. Hearbeat queued as: [{1}]. ", _heartbeatFailures, evaluatorHeartbeatProto), e); if (_driverConnection.Get() is MissingDriverConnection) { if (_heartbeatFailures >= _maxHeartbeatRetriesForNonRecoveryMode) { var msg = string.Format(CultureInfo.InvariantCulture, "Have encountered {0} heartbeat failures. Limit of heartbeat sending failures exceeded. Driver reconnect logic is not implemented, failing evaluator.", _heartbeatFailures); LOGGER.Log(Level.Error, msg); throw new ReefRuntimeException(msg, e); } } else { if (_heartbeatFailures >= _maxHeartbeatRetries) { LOGGER.Log(Level.Warning, "Heartbeat communications to driver reached max of {0} failures. Driver is considered dead/unreachable", _heartbeatFailures); LOGGER.Log(Level.Info, "Entering RECOVERY mode!!!"); ContextManager.HandleDriverConnectionMessage( new DriverConnectionMessageImpl(DriverConnectionState.Disconnected)); LOGGER.Log(Level.Info, "instantiate driver reconnect implementation: " + _driverConnection); _evaluatorSettings.OperationState = EvaluatorOperationState.RECOVERY; // clean heartbeat failure _heartbeatFailures = 0; } } } } }