/// <exception cref="System.IO.IOException"/> public virtual void TestGetMapCompletionEvents() { TaskAttemptCompletionEvent[] empty = new TaskAttemptCompletionEvent[] { }; TaskAttemptCompletionEvent[] taskEvents = new TaskAttemptCompletionEvent[] { CreateTce (0, true, TaskAttemptCompletionEventStatus.Obsolete), CreateTce(1, false, TaskAttemptCompletionEventStatus .Failed), CreateTce(2, true, TaskAttemptCompletionEventStatus.Succeeded), CreateTce (3, false, TaskAttemptCompletionEventStatus.Failed) }; TaskAttemptCompletionEvent[] mapEvents = new TaskAttemptCompletionEvent[] { taskEvents [0], taskEvents[2] }; Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job mockJob = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job >(); Org.Mockito.Mockito.When(mockJob.GetTaskAttemptCompletionEvents(0, 100)).ThenReturn (taskEvents); Org.Mockito.Mockito.When(mockJob.GetTaskAttemptCompletionEvents(0, 2)).ThenReturn (Arrays.CopyOfRange(taskEvents, 0, 2)); Org.Mockito.Mockito.When(mockJob.GetTaskAttemptCompletionEvents(2, 100)).ThenReturn (Arrays.CopyOfRange(taskEvents, 2, 4)); Org.Mockito.Mockito.When(mockJob.GetMapAttemptCompletionEvents(0, 100)).ThenReturn (TypeConverter.FromYarn(mapEvents)); Org.Mockito.Mockito.When(mockJob.GetMapAttemptCompletionEvents(0, 2)).ThenReturn( TypeConverter.FromYarn(mapEvents)); Org.Mockito.Mockito.When(mockJob.GetMapAttemptCompletionEvents(2, 100)).ThenReturn (TypeConverter.FromYarn(empty)); AppContext appCtx = Org.Mockito.Mockito.Mock <AppContext>(); Org.Mockito.Mockito.When(appCtx.GetJob(Matchers.Any <JobId>())).ThenReturn(mockJob ); JobTokenSecretManager secret = Org.Mockito.Mockito.Mock <JobTokenSecretManager>(); RMHeartbeatHandler rmHeartbeatHandler = Org.Mockito.Mockito.Mock <RMHeartbeatHandler >(); TaskHeartbeatHandler hbHandler = Org.Mockito.Mockito.Mock <TaskHeartbeatHandler>(); TaskAttemptListenerImpl listener = new _MockTaskAttemptListenerImpl_200(hbHandler , appCtx, secret, rmHeartbeatHandler); Configuration conf = new Configuration(); listener.Init(conf); listener.Start(); JobID jid = new JobID("12345", 1); TaskAttemptID tid = new TaskAttemptID("12345", 1, TaskType.Reduce, 1, 0); MapTaskCompletionEventsUpdate update = listener.GetMapCompletionEvents(jid, 0, 100 , tid); NUnit.Framework.Assert.AreEqual(2, update.events.Length); update = listener.GetMapCompletionEvents(jid, 0, 2, tid); NUnit.Framework.Assert.AreEqual(2, update.events.Length); update = listener.GetMapCompletionEvents(jid, 2, 100, tid); NUnit.Framework.Assert.AreEqual(0, update.events.Length); }
/// <exception cref="System.IO.IOException"/> public virtual GetTaskAttemptCompletionEventsResponse GetTaskAttemptCompletionEvents (GetTaskAttemptCompletionEventsRequest request) { JobId jobId = request.GetJobId(); int fromEventId = request.GetFromEventId(); int maxEvents = request.GetMaxEvents(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = this.VerifyAndGetJob(jobId, true ); GetTaskAttemptCompletionEventsResponse response = this.recordFactory.NewRecordInstance <GetTaskAttemptCompletionEventsResponse>(); response.AddAllCompletionEvents(Arrays.AsList(job.GetTaskAttemptCompletionEvents( fromEventId, maxEvents))); return(response); }
public virtual void TestFetchFailureMultipleReduces() { MRApp app = new MRApp(1, 3, false, this.GetType().FullName, true); Configuration conf = new Configuration(); // map -> reduce -> fetch-failure -> map retry is incompatible with // sequential, single-task-attempt approach in uber-AM, so disable: conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(conf); app.WaitForState(job, JobState.Running); //all maps would be running NUnit.Framework.Assert.AreEqual("Num tasks not correct", 4, job.GetTasks().Count); IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator(); Task mapTask = it.Next(); Task reduceTask = it.Next(); Task reduceTask2 = it.Next(); Task reduceTask3 = it.Next(); //wait for Task state move to RUNNING app.WaitForState(mapTask, TaskState.Running); TaskAttempt mapAttempt1 = mapTask.GetAttempts().Values.GetEnumerator().Next(); app.WaitForState(mapAttempt1, TaskAttemptState.Running); //send the done signal to the map attempt app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapAttempt1.GetID( ), TaskAttemptEventType.TaDone)); // wait for map success app.WaitForState(mapTask, TaskState.Succeeded); TaskAttemptCompletionEvent[] events = job.GetTaskAttemptCompletionEvents(0, 100); NUnit.Framework.Assert.AreEqual("Num completion events not correct", 1, events.Length ); NUnit.Framework.Assert.AreEqual("Event status not correct", TaskAttemptCompletionEventStatus .Succeeded, events[0].GetStatus()); // wait for reduce to start running app.WaitForState(reduceTask, TaskState.Running); app.WaitForState(reduceTask2, TaskState.Running); app.WaitForState(reduceTask3, TaskState.Running); TaskAttempt reduceAttempt = reduceTask.GetAttempts().Values.GetEnumerator().Next( ); app.WaitForState(reduceAttempt, TaskAttemptState.Running); UpdateStatus(app, reduceAttempt, Phase.Shuffle); TaskAttempt reduceAttempt2 = reduceTask2.GetAttempts().Values.GetEnumerator().Next (); app.WaitForState(reduceAttempt2, TaskAttemptState.Running); UpdateStatus(app, reduceAttempt2, Phase.Shuffle); TaskAttempt reduceAttempt3 = reduceTask3.GetAttempts().Values.GetEnumerator().Next (); app.WaitForState(reduceAttempt3, TaskAttemptState.Running); UpdateStatus(app, reduceAttempt3, Phase.Shuffle); //send 2 fetch failures from reduce to prepare for map re execution SendFetchFailure(app, reduceAttempt, mapAttempt1); SendFetchFailure(app, reduceAttempt, mapAttempt1); //We should not re-launch the map task yet NUnit.Framework.Assert.AreEqual(TaskState.Succeeded, mapTask.GetState()); UpdateStatus(app, reduceAttempt2, Phase.Reduce); UpdateStatus(app, reduceAttempt3, Phase.Reduce); //send 3rd fetch failures from reduce to trigger map re execution SendFetchFailure(app, reduceAttempt, mapAttempt1); //wait for map Task state move back to RUNNING app.WaitForState(mapTask, TaskState.Running); //map attempt must have become FAILED NUnit.Framework.Assert.AreEqual("Map TaskAttempt state not correct", TaskAttemptState .Failed, mapAttempt1.GetState()); NUnit.Framework.Assert.AreEqual("Num attempts in Map Task not correct", 2, mapTask .GetAttempts().Count); IEnumerator <TaskAttempt> atIt = mapTask.GetAttempts().Values.GetEnumerator(); atIt.Next(); TaskAttempt mapAttempt2 = atIt.Next(); app.WaitForState(mapAttempt2, TaskAttemptState.Running); //send the done signal to the second map attempt app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapAttempt2.GetID( ), TaskAttemptEventType.TaDone)); // wait for map success app.WaitForState(mapTask, TaskState.Succeeded); //send done to reduce app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceAttempt.GetID (), TaskAttemptEventType.TaDone)); //send done to reduce app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceAttempt2.GetID (), TaskAttemptEventType.TaDone)); //send done to reduce app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceAttempt3.GetID (), TaskAttemptEventType.TaDone)); app.WaitForState(job, JobState.Succeeded); //previous completion event now becomes obsolete NUnit.Framework.Assert.AreEqual("Event status not correct", TaskAttemptCompletionEventStatus .Obsolete, events[0].GetStatus()); events = job.GetTaskAttemptCompletionEvents(0, 100); NUnit.Framework.Assert.AreEqual("Num completion events not correct", 6, events.Length ); NUnit.Framework.Assert.AreEqual("Event map attempt id not correct", mapAttempt1.GetID (), events[0].GetAttemptId()); NUnit.Framework.Assert.AreEqual("Event map attempt id not correct", mapAttempt1.GetID (), events[1].GetAttemptId()); NUnit.Framework.Assert.AreEqual("Event map attempt id not correct", mapAttempt2.GetID (), events[2].GetAttemptId()); NUnit.Framework.Assert.AreEqual("Event reduce attempt id not correct", reduceAttempt .GetID(), events[3].GetAttemptId()); NUnit.Framework.Assert.AreEqual("Event status not correct for map attempt1", TaskAttemptCompletionEventStatus .Obsolete, events[0].GetStatus()); NUnit.Framework.Assert.AreEqual("Event status not correct for map attempt1", TaskAttemptCompletionEventStatus .Failed, events[1].GetStatus()); NUnit.Framework.Assert.AreEqual("Event status not correct for map attempt2", TaskAttemptCompletionEventStatus .Succeeded, events[2].GetStatus()); NUnit.Framework.Assert.AreEqual("Event status not correct for reduce attempt1", TaskAttemptCompletionEventStatus .Succeeded, events[3].GetStatus()); TaskCompletionEvent[] mapEvents = job.GetMapAttemptCompletionEvents(0, 2); TaskCompletionEvent[] convertedEvents = TypeConverter.FromYarn(events); NUnit.Framework.Assert.AreEqual("Incorrect number of map events", 2, mapEvents.Length ); Assert.AssertArrayEquals("Unexpected map events", Arrays.CopyOfRange(convertedEvents , 0, 2), mapEvents); mapEvents = job.GetMapAttemptCompletionEvents(2, 200); NUnit.Framework.Assert.AreEqual("Incorrect number of map events", 1, mapEvents.Length ); NUnit.Framework.Assert.AreEqual("Unexpected map event", convertedEvents[2], mapEvents [0]); }
public virtual void TestFetchFailureWithRecovery() { int runCount = 0; MRApp app = new TestFetchFailure.MRAppWithHistory(1, 1, false, this.GetType().FullName , true, ++runCount); Configuration conf = new Configuration(); // map -> reduce -> fetch-failure -> map retry is incompatible with // sequential, single-task-attempt approach in uber-AM, so disable: conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(conf); app.WaitForState(job, JobState.Running); //all maps would be running NUnit.Framework.Assert.AreEqual("Num tasks not correct", 2, job.GetTasks().Count); IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator(); Task mapTask = it.Next(); Task reduceTask = it.Next(); //wait for Task state move to RUNNING app.WaitForState(mapTask, TaskState.Running); TaskAttempt mapAttempt1 = mapTask.GetAttempts().Values.GetEnumerator().Next(); app.WaitForState(mapAttempt1, TaskAttemptState.Running); //send the done signal to the map attempt app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapAttempt1.GetID( ), TaskAttemptEventType.TaDone)); // wait for map success app.WaitForState(mapTask, TaskState.Succeeded); TaskAttemptCompletionEvent[] events = job.GetTaskAttemptCompletionEvents(0, 100); NUnit.Framework.Assert.AreEqual("Num completion events not correct", 1, events.Length ); NUnit.Framework.Assert.AreEqual("Event status not correct", TaskAttemptCompletionEventStatus .Succeeded, events[0].GetStatus()); // wait for reduce to start running app.WaitForState(reduceTask, TaskState.Running); TaskAttempt reduceAttempt = reduceTask.GetAttempts().Values.GetEnumerator().Next( ); app.WaitForState(reduceAttempt, TaskAttemptState.Running); //send 3 fetch failures from reduce to trigger map re execution SendFetchFailure(app, reduceAttempt, mapAttempt1); SendFetchFailure(app, reduceAttempt, mapAttempt1); SendFetchFailure(app, reduceAttempt, mapAttempt1); //wait for map Task state move back to RUNNING app.WaitForState(mapTask, TaskState.Running); // Crash the app again. app.Stop(); //rerun app = new TestFetchFailure.MRAppWithHistory(1, 1, false, this.GetType().FullName, false, ++runCount); conf = new Configuration(); conf.SetBoolean(MRJobConfig.MrAmJobRecoveryEnable, true); conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false); job = app.Submit(conf); app.WaitForState(job, JobState.Running); //all maps would be running NUnit.Framework.Assert.AreEqual("Num tasks not correct", 2, job.GetTasks().Count); it = job.GetTasks().Values.GetEnumerator(); mapTask = it.Next(); reduceTask = it.Next(); // the map is not in a SUCCEEDED state after restart of AM app.WaitForState(mapTask, TaskState.Running); mapAttempt1 = mapTask.GetAttempts().Values.GetEnumerator().Next(); app.WaitForState(mapAttempt1, TaskAttemptState.Running); //send the done signal to the map attempt app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapAttempt1.GetID( ), TaskAttemptEventType.TaDone)); // wait for map success app.WaitForState(mapTask, TaskState.Succeeded); reduceAttempt = reduceTask.GetAttempts().Values.GetEnumerator().Next(); //send done to reduce app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceAttempt.GetID (), TaskAttemptEventType.TaDone)); app.WaitForState(job, JobState.Succeeded); events = job.GetTaskAttemptCompletionEvents(0, 100); NUnit.Framework.Assert.AreEqual("Num completion events not correct", 2, events.Length ); }
public override TaskAttemptCompletionEvent[] GetTaskAttemptCompletionEvents(int fromEventId , int maxEvents) { return(job.GetTaskAttemptCompletionEvents(fromEventId, maxEvents)); }
public virtual void TestUpdatedNodes() { int runCount = 0; MRApp app = new TestMRApp.MRAppWithHistory(this, 2, 2, false, this.GetType().FullName , true, ++runCount); Configuration conf = new Configuration(); // after half of the map completion, reduce will start conf.SetFloat(MRJobConfig.CompletedMapsForReduceSlowstart, 0.5f); // uberization forces full slowstart (1.0), so disable that conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(conf); app.WaitForState(job, JobState.Running); NUnit.Framework.Assert.AreEqual("Num tasks not correct", 4, job.GetTasks().Count); IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator(); Task mapTask1 = it.Next(); Task mapTask2 = it.Next(); // all maps must be running app.WaitForState(mapTask1, TaskState.Running); app.WaitForState(mapTask2, TaskState.Running); TaskAttempt task1Attempt = mapTask1.GetAttempts().Values.GetEnumerator().Next(); TaskAttempt task2Attempt = mapTask2.GetAttempts().Values.GetEnumerator().Next(); NodeId node1 = task1Attempt.GetNodeId(); NodeId node2 = task2Attempt.GetNodeId(); NUnit.Framework.Assert.AreEqual(node1, node2); // send the done signal to the task app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task1Attempt.GetID (), TaskAttemptEventType.TaDone)); app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task2Attempt.GetID (), TaskAttemptEventType.TaDone)); // all maps must be succeeded app.WaitForState(mapTask1, TaskState.Succeeded); app.WaitForState(mapTask2, TaskState.Succeeded); TaskAttemptCompletionEvent[] events = job.GetTaskAttemptCompletionEvents(0, 100); NUnit.Framework.Assert.AreEqual("Expecting 2 completion events for success", 2, events .Length); // send updated nodes info AList <NodeReport> updatedNodes = new AList <NodeReport>(); NodeReport nr = RecordFactoryProvider.GetRecordFactory(null).NewRecordInstance <NodeReport >(); nr.SetNodeId(node1); nr.SetNodeState(NodeState.Unhealthy); updatedNodes.AddItem(nr); app.GetContext().GetEventHandler().Handle(new JobUpdatedNodesEvent(job.GetID(), updatedNodes )); app.WaitForState(task1Attempt, TaskAttemptState.Killed); app.WaitForState(task2Attempt, TaskAttemptState.Killed); events = job.GetTaskAttemptCompletionEvents(0, 100); NUnit.Framework.Assert.AreEqual("Expecting 2 more completion events for killed", 4, events.Length); // all maps must be back to running app.WaitForState(mapTask1, TaskState.Running); app.WaitForState(mapTask2, TaskState.Running); IEnumerator <TaskAttempt> itr = mapTask1.GetAttempts().Values.GetEnumerator(); itr.Next(); task1Attempt = itr.Next(); // send the done signal to the task app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task1Attempt.GetID (), TaskAttemptEventType.TaDone)); // map1 must be succeeded. map2 must be running app.WaitForState(mapTask1, TaskState.Succeeded); app.WaitForState(mapTask2, TaskState.Running); events = job.GetTaskAttemptCompletionEvents(0, 100); NUnit.Framework.Assert.AreEqual("Expecting 1 more completion events for success", 5, events.Length); // Crash the app again. app.Stop(); // rerun // in rerun the 1st map will be recovered from previous run app = new TestMRApp.MRAppWithHistory(this, 2, 2, false, this.GetType().FullName, false, ++runCount); conf = new Configuration(); conf.SetBoolean(MRJobConfig.MrAmJobRecoveryEnable, true); conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false); job = app.Submit(conf); app.WaitForState(job, JobState.Running); NUnit.Framework.Assert.AreEqual("No of tasks not correct", 4, job.GetTasks().Count ); it = job.GetTasks().Values.GetEnumerator(); mapTask1 = it.Next(); mapTask2 = it.Next(); Task reduceTask1 = it.Next(); Task reduceTask2 = it.Next(); // map 1 will be recovered, no need to send done app.WaitForState(mapTask1, TaskState.Succeeded); app.WaitForState(mapTask2, TaskState.Running); events = job.GetTaskAttemptCompletionEvents(0, 100); NUnit.Framework.Assert.AreEqual("Expecting 2 completion events for killed & success of map1" , 2, events.Length); task2Attempt = mapTask2.GetAttempts().Values.GetEnumerator().Next(); app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task2Attempt.GetID (), TaskAttemptEventType.TaDone)); app.WaitForState(mapTask2, TaskState.Succeeded); events = job.GetTaskAttemptCompletionEvents(0, 100); NUnit.Framework.Assert.AreEqual("Expecting 1 more completion events for success", 3, events.Length); app.WaitForState(reduceTask1, TaskState.Running); app.WaitForState(reduceTask2, TaskState.Running); TaskAttempt task3Attempt = reduceTask1.GetAttempts().Values.GetEnumerator().Next( ); app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task3Attempt.GetID (), TaskAttemptEventType.TaDone)); app.WaitForState(reduceTask1, TaskState.Succeeded); app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task3Attempt.GetID (), TaskAttemptEventType.TaKill)); app.WaitForState(reduceTask1, TaskState.Succeeded); TaskAttempt task4Attempt = reduceTask2.GetAttempts().Values.GetEnumerator().Next( ); app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task4Attempt.GetID (), TaskAttemptEventType.TaDone)); app.WaitForState(reduceTask2, TaskState.Succeeded); events = job.GetTaskAttemptCompletionEvents(0, 100); NUnit.Framework.Assert.AreEqual("Expecting 2 more completion events for reduce success" , 5, events.Length); // job succeeds app.WaitForState(job, JobState.Succeeded); }
public virtual TaskAttemptCompletionEvent[] GetTaskAttemptCompletionEvents(int fromEventId , int maxEvents) { return(mockJob.GetTaskAttemptCompletionEvents(fromEventId, maxEvents)); }