/// <summary> /// This method is called when receiving an IFailedEvaluator event during TaskSubmitted, TaskRunning or system shutting down. /// Removes the task from RunningTasks if the task associated with the FailedEvaluator is present and running. /// Sets the task state to TaskFailedByEvaluatorFailure /// </summary> internal void RecordTaskFailWhenReceivingFailedEvaluator(IFailedEvaluator failedEvaluator) { if (failedEvaluator.FailedTask.IsPresent()) { var taskId = failedEvaluator.FailedTask.Value.Id; var taskState = GetTaskState(taskId); if (taskState == StateMachine.TaskState.TaskRunning) { if (!_runningTasks.ContainsKey(taskId)) { var msg = string.Format(CultureInfo.InvariantCulture, "The task [{0}] doesn't exist in Running Tasks.", taskId); Exceptions.Throw(new IMRUSystemException(msg), Logger); } _runningTasks.Remove(taskId); } UpdateState(taskId, TaskStateEvent.FailedTaskEvaluatorError); } else { var taskId = FindTaskAssociatedWithTheEvalutor(failedEvaluator.Id); var taskState = GetTaskState(taskId); if (taskState == StateMachine.TaskState.TaskSubmitted) { UpdateState(taskId, TaskStateEvent.FailedTaskEvaluatorError); } } }
/// <summary> /// Given an IFailedEvaluator, removes associated IActiveContext from the collection /// Throws IMRUSystemException if associated IActiveContext doesn't exist or /// if more than one IActiveContexts are associated with the IFailedEvaluator /// as current IMRU driver assumes that there is only one context associated with the IFailedEvalutor /// </summary> /// <param name="value"></param> internal void RemoveFailedContextInFailedEvaluator(IFailedEvaluator value) { if (value.FailedContexts != null && value.FailedContexts.Count > 0) { if (value.FailedContexts.Count == 1) { var failedContextId = value.FailedContexts[0].Id; if (!_activeContexts.Remove(failedContextId)) { var msg = string.Format(CultureInfo.InvariantCulture, "The active context [{0}] attached in IFailedEvaluator [{1}] is not in the Active Contexts collection.", failedContextId, value.Id); Exceptions.Throw(new IMRUSystemException(msg), Logger); } } else { var msg = string.Format(CultureInfo.InvariantCulture, "There are [{0}] contexts attached in the failed evaluator. Expected number is 1.", value.FailedContexts.Count); Exceptions.Throw(new IMRUSystemException(msg), Logger); } } }
public void OnNext(IFailedEvaluator value) { // We should expect 0 failed contexts here, since the Evaluator fails // to instantiate the RootContext. Assert.Equal(0, value.FailedContexts.Count); Logger.Log(Level.Info, FailedEvaluatorReceived); }
public void OnNext(IFailedEvaluator value) { Assert.True(value.FailedTask.IsPresent()); var failedTask = value.FailedTask.Value; Assert.Equal(TaskId, failedTask.Id); // Check that Exceptions are deserialized correctly. var ex = value.EvaluatorException.InnerException; if (ex == null) { throw new Exception("Exception was not expected to be null."); } var taskCloseEx = ex as TestSerializableException; if (taskCloseEx == null) { throw new Exception("Expected Exception to be of type TaskCloseExceptionTestException, but instead got type " + ex.GetType().Name); } if (taskCloseEx.Message != TaskCloseExceptionMessage) { throw new Exception( "Expected message to be " + TaskCloseExceptionMessage + " but instead got " + taskCloseEx.Message + "."); } Logger.Log(Level.Info, FailedEvaluatorReceived); }
/// <summary> /// Specifies what to do when evaluator fails. /// If we get all completed tasks then ignore the failure /// Else request a new evaluator. If failure happens in middle of IMRU /// job we expect neighboring evaluators to fail while doing /// communication and will use FailedTask and FailedContext logic to /// order shutdown. /// </summary> /// <param name="value"></param> public void OnNext(IFailedEvaluator value) { if (AreIMRUTasksCompleted()) { Logger.Log(Level.Info, string.Format("Evaluator with Id: {0} failed but IMRU task is completed. So ignoring.", value.Id)); return; } Logger.Log(Level.Info, string.Format("Evaluator with Id: {0} failed with Exception: {1}", value.Id, value.EvaluatorException)); int currFailedEvaluators = Interlocked.Increment(ref _currentFailedEvaluators); if (currFailedEvaluators > _allowedFailedEvaluators) { Exceptions.Throw(new MaximumNumberOfEvaluatorFailuresExceededException(_allowedFailedEvaluators), Logger); } _serviceAndContextConfigurationProvider.RecordEvaluatorFailureById(value.Id); bool isMaster = _serviceAndContextConfigurationProvider.IsMasterEvaluatorId(value.Id); // If failed evaluator is master then ask for master // evaluator else ask for mapper evaluator if (!isMaster) { Logger.Log(Level.Info, string.Format("Requesting a replacement map Evaluator for {0}", value.Id)); RequestMapEvaluators(1); } else { Logger.Log(Level.Info, string.Format("Requesting a replacement master Evaluator for {0}", value.Id)); RequestUpdateEvaluator(); } }
public void OnNext(IFailedEvaluator value) { Logger.Log(Level.Info, "An evaluator failed, checking if it failed before context and service was submitted"); int currFailedEvaluators = Interlocked.Increment(ref _currentFailedEvaluators); if (value.FailedContexts != null && value.FailedContexts.Count != 0) { Logger.Log(Level.Info, "Some active context failed, cannot continue IMRU task"); Exceptions.Throw(new Exception(), Logger); } if (currFailedEvaluators > _allowedFailedEvaluators) { Exceptions.Throw(new Exception("Cannot continue IMRU job, Failed evaluators reach maximum limit"), Logger); } Logger.Log(Level.Info, "Requesting for the failed evaluator again"); _serviceAndContextConfigurationProvider.EvaluatorFailed(value.Id); // if active context stage is reached for Update Task then assume that failed // evaluator belongs to mapper if (_reachedUpdateTaskActiveContext) { RequestMapEvaluators(1); } else { RequestUpdateEvaluator(); } }
public void OnNext(IFailedEvaluator value) { if (value.EvaluatorException == null) { throw new Exception("Evaluator should contain a valid Exception."); } if (!value.EvaluatorException.Message.Contains(ExpectedEvaluatorFailureMessage)) { throw new Exception("Evaluator expected to contain the message " + ExpectedEvaluatorFailureMessage); } if (!value.FailedTask.IsPresent()) { throw new Exception("Failed task should be present."); } if (value.FailedTask.Value.Id != ExpectedTaskId) { throw new Exception("Failed Task does not have the right Task ID."); } if (_shouldReceiveSerializableException) { var serializableEx = value.EvaluatorException.InnerException as TestSerializableException; if (serializableEx == null) { throw new Exception("Evaluator InnerException expected to be of type " + typeof(TestSerializableException).Name); } if (!serializableEx.Message.Equals(ExpectedEvaluatorFailureMessage)) { throw new Exception("Evaluator InnerException.Message expected to be " + ExpectedEvaluatorFailureMessage); } _shouldReceiveSerializableException = false; Logger.Log(Level.Info, SerializableSuccessMessage); _evaluatorRequestor.Submit( _evaluatorRequestor.NewBuilder() .SetCores(1) .SetNumber(1) .Build()); } else { var nonSerializableEx = value.EvaluatorException.InnerException as NonSerializableEvaluatorException; if (nonSerializableEx == null) { throw new Exception("Evaluator Exception expected to be of type " + typeof(NonSerializableEvaluatorException)); } if (!nonSerializableEx.Message.Contains(ExpectedEvaluatorFailureMessage)) { throw new Exception("Evaluator InnerException.Message expected to contain the message " + ExpectedEvaluatorFailureMessage); } Logger.Log(Level.Info, NonSerializableSuccessMessage); } }
public void OnNext(IFailedEvaluator value) { Assert.True(value.FailedTask.IsPresent()); Assert.Equal(TaskId, value.FailedTask.Value.Id); Assert.True(value.EvaluatorException.InnerException is TestSerializableException); Assert.Equal(TaskSuspendExceptionMessage, value.EvaluatorException.InnerException.Message); Logger.Log(Level.Info, FailedEvaluatorReceived); }
public void OnNext(IFailedEvaluator value) { // We should not have any failed contexts since the context has never become active. Assert.Equal(0, value.FailedContexts.Count); Assert.NotNull(value.EvaluatorException.InnerException); Assert.True(value.EvaluatorException.InnerException is TestSerializableException); Assert.Equal(ExpectedException, value.EvaluatorException.InnerException.Message); Logger.Log(Level.Info, FailedEvaluatorReceived); }
public void OnNext(IFailedEvaluator value) { Logger.Log(Level.Error, FailedEvaluatorMessage); Assert.True(value.FailedTask.IsPresent()); Assert.Equal(value.FailedTask.Value.Id, TaskId); Assert.Equal(value.FailedContexts.Count, 1); Assert.Equal(value.EvaluatorException.EvaluatorId, value.Id); Logger.Log(Level.Error, RightFailedTaskMessage); }
/// <summary> /// This will be a FailedEvaluator generated by the Context with ID as FailEvaluatorContextId. /// </summary> public void OnNext(IFailedEvaluator value) { Assert.Equal(1, value.FailedContexts.Count); Assert.Equal(FailEvaluatorContextId, value.FailedContexts.First().Id); Assert.NotNull(value.EvaluatorException.InnerException); Assert.True(value.EvaluatorException.InnerException is TestSerializableException); Assert.Equal(ExpectedException, value.EvaluatorException.InnerException.Message); Logger.Log(Level.Info, FailedEvaluatorReceived); }
/// <summary> /// Throwing an Exception in a task message handler will result in a Failed Evaluator. /// </summary> public void OnNext(IFailedEvaluator value) { Assert.Equal(1, value.FailedContexts.Count); Assert.NotNull(value.EvaluatorException.InnerException); Assert.True(value.EvaluatorException.InnerException is TestSerializableException, "Unexpected type of evaluator exception: " + value.EvaluatorException.InnerException.GetType()); Assert.Equal(ExpectedExceptionMessage, value.EvaluatorException.InnerException.Message); Logger.Log(Level.Info, ReceivedFailedEvaluator); }
/// <summary> /// Throwing an Exception in a Context message handler will result in a Failed Evaluator. /// We check for the Context ID and Exception type here. /// </summary> public void OnNext(IFailedEvaluator value) { Assert.Equal(1, value.FailedContexts.Count); Assert.Equal(ContextId, value.FailedContexts.Single().Id); Assert.NotNull(value.EvaluatorException.InnerException); Assert.True(value.EvaluatorException.InnerException is ReceiveContextMessageExceptionTestException); Assert.Equal(ExpectedExceptionMessage, value.EvaluatorException.InnerException.Message); Logger.Log(Level.Info, ReceivedFailedEvaluator); }
public void OnNext(IFailedEvaluator value) { Logger.Log(Level.Error, FailedEvaluatorMessage); Assert.True(value.FailedTask.IsPresent()); Assert.Equal(value.FailedTask.Value.Id, TaskId); Assert.Equal(value.FailedContexts.Count, 1); Assert.Equal(value.EvaluatorException.EvaluatorId, value.Id); Logger.Log(Level.Error, string.Format(CultureInfo.CurrentCulture, "Failed task id:{0}, failed Evaluator id: {1}, Failed Exception msg: {2},", value.FailedTask.Value.Id, value.EvaluatorException.EvaluatorId, value.EvaluatorException.Message)); Logger.Log(Level.Error, RightFailedTaskMessage); }
/// <summary> /// Verify when exception is shown in TaskCloseHandler, IFailedEvaluator will be received here with the message set in the task /// </summary> public void OnNext(IFailedEvaluator value) { Assert.True(value.FailedTask.IsPresent()); var failedExeption = value.EvaluatorException.InnerException; Assert.Contains(TaskKilledByDriver, failedExeption.Message); Logger.Log(Level.Error, "In IFailedEvaluator: " + failedExeption); VerifyContextTaskMapping(value.FailedTask.Value.Id, value.FailedContexts.Single().Id); }
/// <summary> /// Throwing an Exception in a Driver message handler will result in a Failed Evaluator. /// We check for the Task ID and Exception type here. /// </summary> public void OnNext(IFailedEvaluator value) { Assert.Equal(1, value.FailedContexts.Count); Assert.True(value.FailedTask.IsPresent()); Assert.Equal(TaskId, value.FailedTask.Value.Id); Assert.NotNull(value.EvaluatorException.InnerException); Assert.True(value.EvaluatorException.InnerException is TestSerializableException); Assert.Equal(ExpectedExceptionMessage, value.EvaluatorException.InnerException.Message); Logger.Log(Level.Info, ReceivedFailedEvaluator); }
/// <summary> /// Verify when exception is shown in task, IFailedTask will be received here with the message set in the task /// And verify the context associated with the failed task is the same as the context that the task was submitted /// </summary> /// <param name="value"></param> public void OnNext(IFailedEvaluator value) { Assert.True(value.FailedTask.IsPresent()); Assert.Equal(TaskId + "2", value.FailedTask.Value.Id); var e = value.EvaluatorException.InnerException; Logger.Log(Level.Error, "In IFailedTask: e.type: {0}, e.message: {1}.", e.GetType(), e.Message); Assert.Equal(typeof(TestSerializableException), e.GetType()); Assert.Equal(TaskKilledByDriver, e.Message); }
public void OnNext(IFailedEvaluator value) { Logger.Log(Level.Info, FailedEvaluatorMessage + ". Evaluator failed: " + value.Id + " FailedTaskId: " + value.FailedTask.Value.Id); Assert.NotNull(value.FailedTask); Assert.NotNull(value.FailedTask.Value); Assert.NotNull(value.FailedTask.Value.Id); Assert.Equal(_failedTaskId, value.FailedTask.Value.Id); Assert.Equal(1, value.FailedContexts.Count); Assert.Equal(_failedContextId, value.FailedContexts[0].Id); _requestor.Submit(_requestor.NewBuilder().Build()); }
/// <summary> /// Throwing an Exception in a Context message handler will result in a Failed Evaluator. /// </summary> public void OnNext(IFailedEvaluator value) { // We will not be expecting any failed contexts here, this is because the Exception // is thrown on the heartbeat to the Driver, and will thus fail before the initial heartbeat // to the Driver is sent. Assert.Equal(0, value.FailedContexts.Count); Assert.NotNull(value.EvaluatorException.InnerException); Assert.True(value.EvaluatorException.InnerException is TestSerializableException); Assert.Equal(ExpectedExceptionMessage, value.EvaluatorException.InnerException.Message); Logger.Log(Level.Info, ReceivedFailedEvaluator); }
public void OnNext(IFailedEvaluator value) { Logger.Log(Level.Error, FailedEvaluatorMessage); if (value.FailedTask.Value == null) { // TODO[JIRA REEF-1343]: fail the test if there's no failed task Logger.Log(Level.Error, "No failed task associated with failed evaluator"); } else { Logger.Log(Level.Error, "Failed task id '" + value.FailedTask.Value.Id + "'"); } }
/// <summary> /// /// </summary> /// <param name="failedEvaluator"></param> public void OnNext(IFailedEvaluator failedEvaluator) { Console.WriteLine("Receive a failed evaluator: " + failedEvaluator.Id); if (++_failureCount < _maxTrial) { Console.WriteLine("Requesting another evaluator"); var newRequest = _evaluatorRequestor.NewBuilder().SetNumber(1).SetCores(_numOfvCoresPerNode).SetMegabytes(_memoryPerNode).Build(); _evaluatorRequestor.Submit(newRequest); } else { Console.WriteLine("Exceed max retries number"); throw new Exception("Unrecoverable evaluator failure."); } }
/// <summary> /// Create a mock IFailedEvaluator /// </summary> /// <param name="ids"></param> /// <returns></returns> private static IFailedEvaluator CreateMockFailedEvaluator(IList <int> ids) { IFailedEvaluator mockFailedEvalutor = Substitute.For <IFailedEvaluator>(); IList <IFailedContext> failedContexts = null; if (ids != null) { failedContexts = new List <IFailedContext>(); foreach (var id in ids) { failedContexts.Add(CreateMockFailedContext(id)); } } mockFailedEvalutor.FailedContexts.Returns(failedContexts); return(mockFailedEvalutor); }
public void OnNext(IFailedEvaluator value) { string action; var evaluatorId = value.Id; lock (_lockObj) { if (!_evaluators.ContainsKey(evaluatorId)) { _evaluators[evaluatorId] = EvaluatorState.FailedAtRestartInit; action = "Restart initialization "; } else { var state = _evaluators[evaluatorId]; switch (state) { case EvaluatorState.Expected: _evaluators[evaluatorId] = EvaluatorState.Expired; action = "Expired on restart "; break; case EvaluatorState.RecoveredFinished: case EvaluatorState.NewFinished: // Note: this can be a result of REEF-61 as well, so we ignore Finished tasks and don't mark them as UnexpectedFailed. action = "Finished (REEF-61) "; break; default: _evaluators[evaluatorId] = EvaluatorState.UnexpectedFailed; action = "Unexpectedly failed (with original state " + state + ") "; break; } } } Logger.Log(Level.Info, action + "Evaluator [" + evaluatorId + "] has failed!"); CheckSuccess(); }
public void OnNext(IFailedEvaluator value) { Assert.True(value.FailedTask.IsPresent()); Assert.Equal(value.FailedTask.Value.Id, "TaskID-EnforceToClose"); Assert.Contains(TaskManager.TaskKilledByDriver, value.EvaluatorException.InnerException.Message); }
/// <summary> /// A simple IFailedEvaluator handler. /// </summary> /// <param name="value"></param> void IObserver <IFailedEvaluator> .OnNext(IFailedEvaluator value) { Logger.Log(Level.Info, "Received IFailedEvaluator: {0}.", value.Id); }
public void OnNext(IFailedEvaluator value) { }
public void OnNext(IFailedEvaluator value) { Logger.Log(Level.Error, FailedEvaluatorMessage); }
public void OnNext(IFailedEvaluator eval) { Log.Log(Level.Warning, "Evaluator failed: " + eval.Id, eval.EvaluatorException); CheckMsgOrder(eval); throw new IllegalStateException("failed evaluator illegal state", eval.EvaluatorException); }
/// <summary> /// This method is called when receiving an IFailedEvaluator event during TaskSubmitted, TaskRunning or system shutting down. /// Removes the task from RunningTasks if the task associated with the FailedEvaluator is present and running. /// Sets the task state to TaskFailedByEvaluatorFailure /// </summary> /// <param name="failedEvaluator"></param> internal void RecordTaskFailWhenReceivingFailedEvaluator(IFailedEvaluator failedEvaluator) { if (failedEvaluator.FailedTask.IsPresent()) { var taskId = failedEvaluator.FailedTask.Value.Id; var taskState = GetTaskState(taskId); if (taskState == StateMachine.TaskState.TaskRunning) { if (!_runningTasks.ContainsKey(taskId)) { var msg = string.Format(CultureInfo.InvariantCulture, "The task [{0}] doesn't exist in Running Tasks.", taskId); Exceptions.Throw(new IMRUSystemException(msg), Logger); } _runningTasks.Remove(taskId); } UpdateState(taskId, TaskStateEvent.FailedTaskEvaluatorError); } else { var taskId = FindTaskAssociatedWithTheEvalutor(failedEvaluator.Id); var taskState = GetTaskState(taskId); if (taskState == StateMachine.TaskState.TaskSubmitted) { UpdateState(taskId, TaskStateEvent.FailedTaskEvaluatorError); } } }
/// <summary> /// No Failed Evaluator is expected /// </summary> /// <param name="value"></param> public void OnNext(IFailedEvaluator value) { throw new Exception(FailedEvaluatorMessage); }
public void OnNext(IFailedEvaluator value) { Logger.Log(Level.Info, FailedEvaluatorMessage + " " + value.Id + " " + (value.FailedTask.IsPresent() ? value.FailedTask.Value.Id : "no task")); }