Exemplo n.º 1
0
        public void Send(EvaluatorHeartbeatProto evaluatorHeartbeatProto)
        {
            lock (_queuedHeartbeats)
            {
                if (_evaluatorSettings.OperationState == EvaluatorOperationState.RECOVERY)
                {
                    LOGGER.Log(Level.Warning, string.Format(CultureInfo.InvariantCulture, "In RECOVERY mode, heartbeat queued as [{0}]. ", evaluatorHeartbeatProto));
                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    return;
                }

                // NOT during recovery, try to send
                REEFMessage payload = new REEFMessage(evaluatorHeartbeatProto);
                try
                {
                    _observer.OnNext(payload);
                    _heartbeatFailures = 0; // reset failure counts if we are having intermidtten (not continuous) failures
                }
                catch (Exception e)
                {
                    if (evaluatorHeartbeatProto.task_status == null || evaluatorHeartbeatProto.task_status.state != State.RUNNING)
                    {
                        Utilities.Diagnostics.Exceptions.Throw(e, "Lost communications to driver when no task is running, recovery NOT supported for such scenario", LOGGER);
                    }

                    _heartbeatFailures++;

                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    LOGGER.Log(Level.Error, string.Format(CultureInfo.InvariantCulture, "Sending heartbeat to driver experienced #{0} failure. Hearbeat queued as: [{1}]. ", _heartbeatFailures, evaluatorHeartbeatProto), e);

                    if (_heartbeatFailures >= _maxHeartbeatRetries)
                    {
                        LOGGER.Log(Level.Warning, "Heartbeat communications to driver reached max of {0} failures. Driver is considered dead/unreachable", _heartbeatFailures);
                        LOGGER.Log(Level.Info, "=========== Entering RECOVERY mode. ===========");
                        ContextManager.HandleDriverConnectionMessage(new DriverConnectionMessageImpl(DriverConnectionState.Disconnected));

                        try
                        {
                            _driverConnection = _evaluatorSettings.EvaluatorInjector.GetInstance <IDriverConnection>();
                        }
                        catch (Exception ex)
                        {
                            Utilities.Diagnostics.Exceptions.CaughtAndThrow(ex, Level.Error, "Failed to inject the driver reconnect implementation", LOGGER);
                        }
                        LOGGER.Log(Level.Info, "instantiate driver reconnect implementation: " + _driverConnection);
                        _evaluatorSettings.OperationState = EvaluatorOperationState.RECOVERY;

                        // clean heartbeat failure
                        _heartbeatFailures = 0;
                    }
                }
            }
        }
Exemplo n.º 2
0
        private static IDriverReconnConfigProvider GetDriverReconnectionProvider(
            IDriverReconnConfigProvider driverReconnConfigProvider,
            IDriverConnection driverConnection)
        {
            // If not the default, this means that the user has bound the newer configuration. Return it.
            if (!(driverReconnConfigProvider is DefaultDriverReconnConfigProvider))
            {
                return(driverReconnConfigProvider);
            }

            // If not default, this means that the user has bound the old configuration.
            // Use the dynamic configuration provider in that case.
            if (!(driverConnection is MissingDriverConnection))
            {
                return(new DynamicDriverReconnConfigProvider(driverConnection.GetType()));
            }

            // This is done as a stop gap for deprecation because we cannot bind an implementation
            // of IDriverConnection to the driver CLRBridgeConfiguration if it is already bound
            // by the user, since the driver configuration and Evaluator configuration will be combined
            // at the Evaluator. We thus need to return the DriverReconnectionConfigurationProvider
            // that does not bind IDriverConnection such that a TANG conflict does not occur.
            return(TangFactory.GetTang().NewInjector().GetInstance <DefaultDriverReconnConfigProvider>());
        }
        public void Send(EvaluatorHeartbeatProto evaluatorHeartbeatProto)
        {
            lock (_queuedHeartbeats)
            {
                if (_evaluatorSettings.OperationState == EvaluatorOperationState.RECOVERY)
                {
                    LOGGER.Log(Level.Warning, string.Format(CultureInfo.InvariantCulture, "In RECOVERY mode, heartbeat queued as [{0}]. ", evaluatorHeartbeatProto));
                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    return;
                }

                // NOT during recovery, try to send
                REEFMessage payload = new REEFMessage(evaluatorHeartbeatProto);
                try
                {
                    _observer.OnNext(payload);
                    _heartbeatFailures = 0; // reset failure counts if we are having intermidtten (not continuous) failures
                }
                catch (Exception e)
                {
                    if (evaluatorHeartbeatProto.task_status == null || evaluatorHeartbeatProto.task_status.state != State.RUNNING)
                    {
                        Utilities.Diagnostics.Exceptions.Throw(e, "Lost communications to driver when no task is running, recovery NOT supported for such scenario", LOGGER);
                    }

                    _heartbeatFailures++;

                    _queuedHeartbeats.Enqueue(evaluatorHeartbeatProto);
                    LOGGER.Log(Level.Error, string.Format(CultureInfo.InvariantCulture, "Sending heartbeat to driver experienced #{0} failure. Hearbeat queued as: [{1}]. ", _heartbeatFailures, evaluatorHeartbeatProto), e);

                    if (_heartbeatFailures >= _maxHeartbeatRetries)
                    {
                        LOGGER.Log(Level.Warning, "Heartbeat communications to driver reached max of {0} failures. Driver is considered dead/unreachable", _heartbeatFailures);
                        LOGGER.Log(Level.Info, "=========== Entering RECOVERY mode. ===========");
                        ContextManager.HandleDriverConnectionMessage(new DriverConnectionMessageImpl(DriverConnectionState.Disconnected));

                        try
                        {
                            _driverConnection = _evaluatorSettings.EvaluatorInjector.GetInstance<IDriverConnection>();
                        }
                        catch (Exception ex)
                        {
                            Utilities.Diagnostics.Exceptions.CaughtAndThrow(ex, Level.Error, "Failed to inject the driver reconnect implementation", LOGGER);
                        }
                        LOGGER.Log(Level.Info, "instantiate driver reconnect implementation: " + _driverConnection);
                        _evaluatorSettings.OperationState = EvaluatorOperationState.RECOVERY;

                        // clean heartbeat failure
                        _heartbeatFailures = 0;
                    }
                }
            }     
        }
Exemplo n.º 4
0
 public RemoteLearnMode(IDriverConnection connection)
 {
     _connection = connection;
 }
Exemplo n.º 5
0
        public DriverBridge(
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.DriverStartedHandlers))] ISet <IObserver <IDriverStarted> > driverStartHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.DriverRestartedHandlers))] ISet <IObserver <IDriverRestarted> > driverRestartedHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.EvaluatorRequestHandlers))] ISet <IObserver <IEvaluatorRequestor> > evaluatorRequestHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.AllocatedEvaluatorHandlers))] ISet <IObserver <IAllocatedEvaluator> > allocatedEvaluatorHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.ActiveContextHandlers))] ISet <IObserver <IActiveContext> > activeContextHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.TaskMessageHandlers))] ISet <IObserver <ITaskMessage> > taskMessageHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.FailedTaskHandlers))] ISet <IObserver <IFailedTask> > failedTaskHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.FailedEvaluatorHandlers))] ISet <IObserver <IFailedEvaluator> > failedEvaluatorHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.CompletedEvaluatorHandlers))] ISet <IObserver <ICompletedEvaluator> > completedEvaluatorHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.RunningTaskHandlers))] ISet <IObserver <IRunningTask> > runningTaskHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.CompletedTaskHandlers))] ISet <IObserver <ICompletedTask> > completedTaskHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.SuspendedTaskHandlers))] ISet <IObserver <ISuspendedTask> > suspendedTaskHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.ClosedContextHandlers))] ISet <IObserver <IClosedContext> > closedContextHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.FailedContextHandlers))] ISet <IObserver <IFailedContext> > failedContextHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.ContextMessageHandlers))] ISet <IObserver <IContextMessage> > contextMessageHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.DriverRestartActiveContextHandlers))] ISet <IObserver <IActiveContext> > driverRestartActiveContextHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.DriverRestartRunningTaskHandlers))] ISet <IObserver <IRunningTask> > driverRestartRunningTaskHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.DriverRestartCompletedHandlers))] ISet <IObserver <IDriverRestartCompleted> > driverRestartCompletedHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.DriverRestartFailedEvaluatorHandlers))] ISet <IObserver <IFailedEvaluator> > driverRestartFailedEvaluatorHandlers,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.TraceListenersSet))] ISet <TraceListener> traceListeners,
            [Parameter(Value = typeof(EvaluatorConfigurationProviders))] ISet <IConfigurationProvider> configurationProviders,
            [Parameter(Value = typeof(DriverBridgeConfigurationOptions.TraceLevel))] string traceLevel,
            IDriverReconnConfigProvider driverReconnConfigProvider,
            IDriverConnection driverConnection,
            HttpServerHandler httpServerHandler,
            IProgressProvider progressProvider,
            AvroConfigurationSerializer serializer)
        {
            foreach (TraceListener listener in traceListeners)
            {
                Logger.AddTraceListener(listener);
            }
            _logger = Logger.GetLogger(typeof(DriverBridge));
            _logger.Log(Level.Info, "Constructing DriverBridge");

            Level level;

            if (!Enum.TryParse(traceLevel.ToString(CultureInfo.InvariantCulture), out level))
            {
                _logger.Log(Level.Warning, string.Format(CultureInfo.InvariantCulture, "Invalid trace level {0} provided, will by default use verbose level", traceLevel));
            }
            else
            {
                Logger.SetCustomLevel(level);
            }

            _driverStartHandlers                  = driverStartHandlers;
            _evaluatorRequestHandlers             = evaluatorRequestHandlers;
            _allocatedEvaluatorHandlers           = allocatedEvaluatorHandlers;
            _activeContextHandlers                = activeContextHandlers;
            _taskMessageHandlers                  = taskMessageHandlers;
            _failedEvaluatorHandlers              = failedEvaluatorHandlers;
            _failedTaskHandlers                   = failedTaskHandlers;
            _completedTaskHandlers                = completedTaskHandlers;
            _runningTaskHandlers                  = runningTaskHandlers;
            _suspendedTaskHandlers                = suspendedTaskHandlers;
            _completedEvaluatorHandlers           = completedEvaluatorHandlers;
            _closedContextHandlers                = closedContextHandlers;
            _failedContextHandlers                = failedContextHandlers;
            _contextMessageHandlers               = contextMessageHandlers;
            _driverRestartedHandlers              = driverRestartedHandlers;
            _driverRestartActiveContextHandlers   = driverRestartActiveContextHandlers;
            _driverRestartRunningTaskHandlers     = driverRestartRunningTaskHandlers;
            _driverRestartCompletedHandlers       = driverRestartCompletedHandlers;
            _driverRestartFailedEvaluatorHandlers = driverRestartFailedEvaluatorHandlers;
            _httpServerHandler = httpServerHandler;

            var configurationProviderSet = new HashSet <IConfigurationProvider>(configurationProviders)
            {
                driverReconnConfigProvider
            };

            _configurationProviderString = serializer.ToString(Configurations.Merge(configurationProviderSet.Select(x => x.GetConfiguration()).ToArray()));
            _progressProvider            = progressProvider;

            _allocatedEvaluatorSubscriber           = new ClrSystemHandler <IAllocatedEvaluator>();
            _completedEvaluatorSubscriber           = new ClrSystemHandler <ICompletedEvaluator>();
            _taskMessageSubscriber                  = new ClrSystemHandler <ITaskMessage>();
            _activeContextSubscriber                = new ClrSystemHandler <IActiveContext>();
            _failedTaskSubscriber                   = new ClrSystemHandler <IFailedTask>();
            _failedEvaluatorSubscriber              = new ClrSystemHandler <IFailedEvaluator>();
            _httpServerEventSubscriber              = new ClrSystemHandler <IHttpMessage>();
            _completedTaskSubscriber                = new ClrSystemHandler <ICompletedTask>();
            _runningTaskSubscriber                  = new ClrSystemHandler <IRunningTask>();
            _suspendedTaskSubscriber                = new ClrSystemHandler <ISuspendedTask>();
            _closedContextSubscriber                = new ClrSystemHandler <IClosedContext>();
            _failedContextSubscriber                = new ClrSystemHandler <IFailedContext>();
            _contextMessageSubscriber               = new ClrSystemHandler <IContextMessage>();
            _driverRestartedSubscriber              = new ClrSystemHandler <IDriverRestarted>();
            _driverRestartActiveContextSubscriber   = new ClrSystemHandler <IActiveContext>();
            _driverRestartRunningTaskSubscriber     = new ClrSystemHandler <IRunningTask>();
            _driverRestartCompletedSubscriber       = new ClrSystemHandler <IDriverRestartCompleted>();
            _driverRestartFailedEvaluatorSubscriber = new ClrSystemHandler <IFailedEvaluator>();
        }