Exemplo n.º 1
0
        public static REEFMessage Deserialize(byte[] bytes)
        {
            REEFMessage pbuf = null;

            using (var s = new MemoryStream(bytes))
            {
                pbuf = Serializer.Deserialize <REEFMessage>(s);
            }
            return(pbuf);
        }
        public void Send(EvaluatorHeartbeatProto evaluatorHeartbeatProto)
        {
            lock (_queuedHeartbeats)
            {
                if (_evaluatorSettings.OperationState == EvaluatorOperationState.RECOVERY)
                {
                    LOGGER.Log(Level.Warning, string.Format(CultureInfo.InvariantCulture, "In RECOVERY mode, heartbeat queued as [{0}]. ", evaluatorHeartbeatProto));
                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    return;
                }

                // NOT during recovery, try to send
                REEFMessage payload = new REEFMessage(evaluatorHeartbeatProto);
                try
                {
                    _observer.OnNext(payload);
                    _heartbeatFailures = 0; // reset failure counts if we are having intermidtten (not continuous) failures
                }
                catch (Exception e)
                {
                    if (evaluatorHeartbeatProto.task_status == null || evaluatorHeartbeatProto.task_status.state != State.RUNNING)
                    {
                        Utilities.Diagnostics.Exceptions.Throw(e, "Lost communications to driver when no task is running, recovery NOT supported for such scenario", LOGGER);
                    }

                    _heartbeatFailures++;

                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    LOGGER.Log(Level.Error, string.Format(CultureInfo.InvariantCulture, "Sending heartbeat to driver experienced #{0} failure. Hearbeat queued as: [{1}]. ", _heartbeatFailures, evaluatorHeartbeatProto), e);

                    if (_heartbeatFailures >= _maxHeartbeatRetries)
                    {
                        LOGGER.Log(Level.Warning, "Heartbeat communications to driver reached max of {0} failures. Driver is considered dead/unreachable", _heartbeatFailures);
                        LOGGER.Log(Level.Info, "=========== Entering RECOVERY mode. ===========");
                        ContextManager.HandleDriverConnectionMessage(new DriverConnectionMessageImpl(DriverConnectionState.Disconnected));

                        try
                        {
                            _driverConnection = _evaluatorSettings.EvaluatorInjector.GetInstance<IDriverConnection>();
                        }
                        catch (Exception ex)
                        {
                            Utilities.Diagnostics.Exceptions.CaughtAndThrow(ex, Level.Error, "Failed to inject the driver reconnect implementation", LOGGER);
                        }
                        LOGGER.Log(Level.Info, "instantiate driver reconnect implementation: " + _driverConnection);
                        _evaluatorSettings.OperationState = EvaluatorOperationState.RECOVERY;

                        // clean heartbeat failure
                        _heartbeatFailures = 0;
                    }
                }
            }     
        }
Exemplo n.º 3
0
        public void Send(EvaluatorHeartbeatProto evaluatorHeartbeatProto)
        {
            lock (_queuedHeartbeats)
            {
                // Do not send a heartbeat if Evaluator has already signaled that it was done.
                if (_isCompletedHeartbeatQueued)
                {
                    LOGGER.Log(Level.Warning, "Evaluator trying to schedule a heartbeat after a completed heartbeat has already been scheduled or sent.");
                    return;
                }

                if (IsEvaluatorStateCompleted(evaluatorHeartbeatProto.evaluator_status.state))
                {
                    _isCompletedHeartbeatQueued = true;
                }

                if (_evaluatorSettings.OperationState == EvaluatorOperationState.RECOVERY)
                {
                    LOGGER.Log(Level.Warning, string.Format(CultureInfo.InvariantCulture, "In RECOVERY mode, heartbeat queued as [{0}]. ", evaluatorHeartbeatProto));
                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    return;
                }

                // NOT during recovery, try to send
                REEFMessage payload = new REEFMessage(evaluatorHeartbeatProto);
                try
                {
                    _observer.OnNext(payload);
                    _heartbeatFailures = 0; // reset failure counts if we are having intermittent (not continuous) failures
                }
                catch (Exception e)
                {
                    if (evaluatorHeartbeatProto.task_status == null || evaluatorHeartbeatProto.task_status.state != State.RUNNING)
                    {
                        Utilities.Diagnostics.Exceptions.Throw(e, "Lost communications to driver when no task is running, recovery NOT supported for such scenario", LOGGER);
                    }

                    _heartbeatFailures++;

                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    LOGGER.Log(Level.Error, string.Format(CultureInfo.InvariantCulture, "Sending heartbeat to driver experienced #{0} failure. Hearbeat queued as: [{1}]. ", _heartbeatFailures, evaluatorHeartbeatProto), e);

                    if (_driverConnection.Get() is MissingDriverConnection)
                    {
                        if (_heartbeatFailures >= _maxHeartbeatRetriesForNonRecoveryMode)
                        {
                            var msg =
                                string.Format(CultureInfo.InvariantCulture,
                                    "Have encountered {0} heartbeat failures. Limit of heartbeat sending failures exceeded. Driver reconnect logic is not implemented, failing evaluator.",
                                    _heartbeatFailures);
                            LOGGER.Log(Level.Error, msg);
                            throw new ReefRuntimeException(msg, e);
                        }
                    }
                    else
                    {
                        if (_heartbeatFailures >= _maxHeartbeatRetries)
                        {
                            LOGGER.Log(Level.Warning,
                                "Heartbeat communications to driver reached max of {0} failures. Driver is considered dead/unreachable",
                                _heartbeatFailures);
                            LOGGER.Log(Level.Info, "Entering RECOVERY mode!!!");
                            ContextManager.HandleDriverConnectionMessage(
                                new DriverConnectionMessageImpl(DriverConnectionState.Disconnected));

                            LOGGER.Log(Level.Info, "instantiate driver reconnect implementation: " + _driverConnection);
                            _evaluatorSettings.OperationState = EvaluatorOperationState.RECOVERY;

                            // clean heartbeat failure
                            _heartbeatFailures = 0;
                        }
                    }
                }
            }     
        }