Пример #1
0
        public static EvaluatorHeartbeatProto Deserialize(byte[] bytes)
        {
            EvaluatorHeartbeatProto pbuf = null;

            using (var s = new MemoryStream(bytes))
            {
                pbuf = Serializer.Deserialize <EvaluatorHeartbeatProto>(s);
            }
            return(pbuf);
        }
Пример #2
0
 public REEFMessage(EvaluatorHeartbeatProto evaluatorHeartbeatProto)
 {
     _evaluatorHeartBeat = evaluatorHeartbeatProto;
 }
 private EvaluatorHeartbeatProto GetEvaluatorHeartbeatProto(
     EvaluatorStatusProto evaluatorStatusProto,
     ICollection<ContextStatusProto> contextStatusProtos,
     Optional<TaskStatusProto> taskStatusProto)
 {
     EvaluatorHeartbeatProto evaluatorHeartbeatProto = new EvaluatorHeartbeatProto()
     {
         timestamp = CurrentTimeMilliSeconds(),
         evaluator_status = evaluatorStatusProto
     };
     foreach (ContextStatusProto contextStatusProto in contextStatusProtos)
     {
         evaluatorHeartbeatProto.context_status.Add(contextStatusProto);
     }
     if (taskStatusProto.IsPresent())
     {
         evaluatorHeartbeatProto.task_status = taskStatusProto.Value;
     }
     return evaluatorHeartbeatProto;
 }
 private EvaluatorHeartbeatProto ConstructRecoveryHeartBeat(EvaluatorHeartbeatProto heartbeat)
 {
     heartbeat.recovery = true;
     heartbeat.context_status.ForEach(c => c.recovery = true);
     heartbeat.task_status.recovery = true;
     return heartbeat;
 }
 /// <summary>
 /// Called with a specific EvaluatorStatus that must be delivered to the driver
 /// </summary>
 /// <param name="evaluatorStatusProto"></param>
 public void OnNext(EvaluatorStatusProto evaluatorStatusProto)
 {
     LOGGER.Log(Level.Verbose, "Before acquiring lock: HeartbeatManager::OnNext(EvaluatorStatusProto)");
     lock (this)
     {
         LOGGER.Log(Level.Verbose, "HeartbeatManager::OnNext(EvaluatorStatusProto)");
         EvaluatorHeartbeatProto heartbeatProto = new EvaluatorHeartbeatProto()
         {
             timestamp = CurrentTimeMilliSeconds(),
             evaluator_status = evaluatorStatusProto
         };
         LOGGER.Log(Level.Info, string.Format(CultureInfo.InvariantCulture, "Triggered a heartbeat: {0}.", heartbeatProto));
         Send(heartbeatProto);
     }
 }
        public void Send(EvaluatorHeartbeatProto evaluatorHeartbeatProto)
        {
            lock (_queuedHeartbeats)
            {
                if (_evaluatorSettings.OperationState == EvaluatorOperationState.RECOVERY)
                {
                    LOGGER.Log(Level.Warning, string.Format(CultureInfo.InvariantCulture, "In RECOVERY mode, heartbeat queued as [{0}]. ", evaluatorHeartbeatProto));
                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    return;
                }

                // NOT during recovery, try to send
                REEFMessage payload = new REEFMessage(evaluatorHeartbeatProto);
                try
                {
                    _observer.OnNext(payload);
                    _heartbeatFailures = 0; // reset failure counts if we are having intermidtten (not continuous) failures
                }
                catch (Exception e)
                {
                    if (evaluatorHeartbeatProto.task_status == null || evaluatorHeartbeatProto.task_status.state != State.RUNNING)
                    {
                        Utilities.Diagnostics.Exceptions.Throw(e, "Lost communications to driver when no task is running, recovery NOT supported for such scenario", LOGGER);
                    }

                    _heartbeatFailures++;

                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    LOGGER.Log(Level.Error, string.Format(CultureInfo.InvariantCulture, "Sending heartbeat to driver experienced #{0} failure. Hearbeat queued as: [{1}]. ", _heartbeatFailures, evaluatorHeartbeatProto), e);

                    if (_heartbeatFailures >= _maxHeartbeatRetries)
                    {
                        LOGGER.Log(Level.Warning, "Heartbeat communications to driver reached max of {0} failures. Driver is considered dead/unreachable", _heartbeatFailures);
                        LOGGER.Log(Level.Info, "=========== Entering RECOVERY mode. ===========");
                        ContextManager.HandleDriverConnectionMessage(new DriverConnectionMessageImpl(DriverConnectionState.Disconnected));

                        try
                        {
                            _driverConnection = _evaluatorSettings.EvaluatorInjector.GetInstance<IDriverConnection>();
                        }
                        catch (Exception ex)
                        {
                            Utilities.Diagnostics.Exceptions.CaughtAndThrow(ex, Level.Error, "Failed to inject the driver reconnect implementation", LOGGER);
                        }
                        LOGGER.Log(Level.Info, "instantiate driver reconnect implementation: " + _driverConnection);
                        _evaluatorSettings.OperationState = EvaluatorOperationState.RECOVERY;

                        // clean heartbeat failure
                        _heartbeatFailures = 0;
                    }
                }
            }     
        }
Пример #7
0
 public REEFMessage(EvaluatorHeartbeatProto evaluatorHeartbeatProto)
 {
     _evaluatorHeartBeat = evaluatorHeartbeatProto;
 }
Пример #8
0
        public void Send(EvaluatorHeartbeatProto evaluatorHeartbeatProto)
        {
            lock (_queuedHeartbeats)
            {
                // Do not send a heartbeat if Evaluator has already signaled that it was done.
                if (_isCompletedHeartbeatQueued)
                {
                    LOGGER.Log(Level.Warning, "Evaluator trying to schedule a heartbeat after a completed heartbeat has already been scheduled or sent.");
                    return;
                }

                if (IsEvaluatorStateCompleted(evaluatorHeartbeatProto.evaluator_status.state))
                {
                    _isCompletedHeartbeatQueued = true;
                }

                if (_evaluatorSettings.OperationState == EvaluatorOperationState.RECOVERY)
                {
                    LOGGER.Log(Level.Warning, string.Format(CultureInfo.InvariantCulture, "In RECOVERY mode, heartbeat queued as [{0}]. ", evaluatorHeartbeatProto));
                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    return;
                }

                // NOT during recovery, try to send
                REEFMessage payload = new REEFMessage(evaluatorHeartbeatProto);
                try
                {
                    _observer.OnNext(payload);
                    _heartbeatFailures = 0; // reset failure counts if we are having intermittent (not continuous) failures
                }
                catch (Exception e)
                {
                    if (evaluatorHeartbeatProto.task_status == null || evaluatorHeartbeatProto.task_status.state != State.RUNNING)
                    {
                        Utilities.Diagnostics.Exceptions.Throw(e, "Lost communications to driver when no task is running, recovery NOT supported for such scenario", LOGGER);
                    }

                    _heartbeatFailures++;

                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    LOGGER.Log(Level.Error, string.Format(CultureInfo.InvariantCulture, "Sending heartbeat to driver experienced #{0} failure. Hearbeat queued as: [{1}]. ", _heartbeatFailures, evaluatorHeartbeatProto), e);

                    if (_driverConnection.Get() is MissingDriverConnection)
                    {
                        if (_heartbeatFailures >= _maxHeartbeatRetriesForNonRecoveryMode)
                        {
                            var msg =
                                string.Format(CultureInfo.InvariantCulture,
                                    "Have encountered {0} heartbeat failures. Limit of heartbeat sending failures exceeded. Driver reconnect logic is not implemented, failing evaluator.",
                                    _heartbeatFailures);
                            LOGGER.Log(Level.Error, msg);
                            throw new ReefRuntimeException(msg, e);
                        }
                    }
                    else
                    {
                        if (_heartbeatFailures >= _maxHeartbeatRetries)
                        {
                            LOGGER.Log(Level.Warning,
                                "Heartbeat communications to driver reached max of {0} failures. Driver is considered dead/unreachable",
                                _heartbeatFailures);
                            LOGGER.Log(Level.Info, "Entering RECOVERY mode!!!");
                            ContextManager.HandleDriverConnectionMessage(
                                new DriverConnectionMessageImpl(DriverConnectionState.Disconnected));

                            LOGGER.Log(Level.Info, "instantiate driver reconnect implementation: " + _driverConnection);
                            _evaluatorSettings.OperationState = EvaluatorOperationState.RECOVERY;

                            // clean heartbeat failure
                            _heartbeatFailures = 0;
                        }
                    }
                }
            }     
        }