/// <exception cref="System.IO.IOException"/>
        public virtual void TestGetMapCompletionEvents()
        {
            TaskAttemptCompletionEvent[] empty      = new TaskAttemptCompletionEvent[] {  };
            TaskAttemptCompletionEvent[] taskEvents = new TaskAttemptCompletionEvent[] { CreateTce
                                                                                             (0, true, TaskAttemptCompletionEventStatus.Obsolete), CreateTce(1, false, TaskAttemptCompletionEventStatus
                                                                                                                                                             .Failed), CreateTce(2, true, TaskAttemptCompletionEventStatus.Succeeded), CreateTce
                                                                                             (3, false, TaskAttemptCompletionEventStatus.Failed) };
            TaskAttemptCompletionEvent[] mapEvents = new TaskAttemptCompletionEvent[] { taskEvents
                                                                                        [0], taskEvents[2] };
            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job mockJob = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job
                                                                                           >();
            Org.Mockito.Mockito.When(mockJob.GetTaskAttemptCompletionEvents(0, 100)).ThenReturn
                (taskEvents);
            Org.Mockito.Mockito.When(mockJob.GetTaskAttemptCompletionEvents(0, 2)).ThenReturn
                (Arrays.CopyOfRange(taskEvents, 0, 2));
            Org.Mockito.Mockito.When(mockJob.GetTaskAttemptCompletionEvents(2, 100)).ThenReturn
                (Arrays.CopyOfRange(taskEvents, 2, 4));
            Org.Mockito.Mockito.When(mockJob.GetMapAttemptCompletionEvents(0, 100)).ThenReturn
                (TypeConverter.FromYarn(mapEvents));
            Org.Mockito.Mockito.When(mockJob.GetMapAttemptCompletionEvents(0, 2)).ThenReturn(
                TypeConverter.FromYarn(mapEvents));
            Org.Mockito.Mockito.When(mockJob.GetMapAttemptCompletionEvents(2, 100)).ThenReturn
                (TypeConverter.FromYarn(empty));
            AppContext appCtx = Org.Mockito.Mockito.Mock <AppContext>();

            Org.Mockito.Mockito.When(appCtx.GetJob(Matchers.Any <JobId>())).ThenReturn(mockJob
                                                                                       );
            JobTokenSecretManager secret             = Org.Mockito.Mockito.Mock <JobTokenSecretManager>();
            RMHeartbeatHandler    rmHeartbeatHandler = Org.Mockito.Mockito.Mock <RMHeartbeatHandler
                                                                                 >();
            TaskHeartbeatHandler    hbHandler = Org.Mockito.Mockito.Mock <TaskHeartbeatHandler>();
            TaskAttemptListenerImpl listener  = new _MockTaskAttemptListenerImpl_200(hbHandler
                                                                                     , appCtx, secret, rmHeartbeatHandler);
            Configuration conf = new Configuration();

            listener.Init(conf);
            listener.Start();
            JobID         jid = new JobID("12345", 1);
            TaskAttemptID tid = new TaskAttemptID("12345", 1, TaskType.Reduce, 1, 0);
            MapTaskCompletionEventsUpdate update = listener.GetMapCompletionEvents(jid, 0, 100
                                                                                   , tid);

            NUnit.Framework.Assert.AreEqual(2, update.events.Length);
            update = listener.GetMapCompletionEvents(jid, 0, 2, tid);
            NUnit.Framework.Assert.AreEqual(2, update.events.Length);
            update = listener.GetMapCompletionEvents(jid, 2, 100, tid);
            NUnit.Framework.Assert.AreEqual(0, update.events.Length);
        }
Example #2
0
            /// <exception cref="System.IO.IOException"/>
            public virtual GetTaskAttemptCompletionEventsResponse GetTaskAttemptCompletionEvents
                (GetTaskAttemptCompletionEventsRequest request)
            {
                JobId jobId       = request.GetJobId();
                int   fromEventId = request.GetFromEventId();
                int   maxEvents   = request.GetMaxEvents();

                Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = this.VerifyAndGetJob(jobId, true
                                                                                      );
                GetTaskAttemptCompletionEventsResponse response = this.recordFactory.NewRecordInstance
                                                                  <GetTaskAttemptCompletionEventsResponse>();

                response.AddAllCompletionEvents(Arrays.AsList(job.GetTaskAttemptCompletionEvents(
                                                                  fromEventId, maxEvents)));
                return(response);
            }
Example #3
0
        public virtual void TestFetchFailureMultipleReduces()
        {
            MRApp         app  = new MRApp(1, 3, false, this.GetType().FullName, true);
            Configuration conf = new Configuration();

            // map -> reduce -> fetch-failure -> map retry is incompatible with
            // sequential, single-task-attempt approach in uber-AM, so disable:
            conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false);
            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(conf);
            app.WaitForState(job, JobState.Running);
            //all maps would be running
            NUnit.Framework.Assert.AreEqual("Num tasks not correct", 4, job.GetTasks().Count);
            IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator();
            Task mapTask          = it.Next();
            Task reduceTask       = it.Next();
            Task reduceTask2      = it.Next();
            Task reduceTask3      = it.Next();

            //wait for Task state move to RUNNING
            app.WaitForState(mapTask, TaskState.Running);
            TaskAttempt mapAttempt1 = mapTask.GetAttempts().Values.GetEnumerator().Next();

            app.WaitForState(mapAttempt1, TaskAttemptState.Running);
            //send the done signal to the map attempt
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapAttempt1.GetID(
                                                                               ), TaskAttemptEventType.TaDone));
            // wait for map success
            app.WaitForState(mapTask, TaskState.Succeeded);
            TaskAttemptCompletionEvent[] events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Num completion events not correct", 1, events.Length
                                            );
            NUnit.Framework.Assert.AreEqual("Event status not correct", TaskAttemptCompletionEventStatus
                                            .Succeeded, events[0].GetStatus());
            // wait for reduce to start running
            app.WaitForState(reduceTask, TaskState.Running);
            app.WaitForState(reduceTask2, TaskState.Running);
            app.WaitForState(reduceTask3, TaskState.Running);
            TaskAttempt reduceAttempt = reduceTask.GetAttempts().Values.GetEnumerator().Next(
                );

            app.WaitForState(reduceAttempt, TaskAttemptState.Running);
            UpdateStatus(app, reduceAttempt, Phase.Shuffle);
            TaskAttempt reduceAttempt2 = reduceTask2.GetAttempts().Values.GetEnumerator().Next
                                             ();

            app.WaitForState(reduceAttempt2, TaskAttemptState.Running);
            UpdateStatus(app, reduceAttempt2, Phase.Shuffle);
            TaskAttempt reduceAttempt3 = reduceTask3.GetAttempts().Values.GetEnumerator().Next
                                             ();

            app.WaitForState(reduceAttempt3, TaskAttemptState.Running);
            UpdateStatus(app, reduceAttempt3, Phase.Shuffle);
            //send 2 fetch failures from reduce to prepare for map re execution
            SendFetchFailure(app, reduceAttempt, mapAttempt1);
            SendFetchFailure(app, reduceAttempt, mapAttempt1);
            //We should not re-launch the map task yet
            NUnit.Framework.Assert.AreEqual(TaskState.Succeeded, mapTask.GetState());
            UpdateStatus(app, reduceAttempt2, Phase.Reduce);
            UpdateStatus(app, reduceAttempt3, Phase.Reduce);
            //send 3rd fetch failures from reduce to trigger map re execution
            SendFetchFailure(app, reduceAttempt, mapAttempt1);
            //wait for map Task state move back to RUNNING
            app.WaitForState(mapTask, TaskState.Running);
            //map attempt must have become FAILED
            NUnit.Framework.Assert.AreEqual("Map TaskAttempt state not correct", TaskAttemptState
                                            .Failed, mapAttempt1.GetState());
            NUnit.Framework.Assert.AreEqual("Num attempts in Map Task not correct", 2, mapTask
                                            .GetAttempts().Count);
            IEnumerator <TaskAttempt> atIt = mapTask.GetAttempts().Values.GetEnumerator();

            atIt.Next();
            TaskAttempt mapAttempt2 = atIt.Next();

            app.WaitForState(mapAttempt2, TaskAttemptState.Running);
            //send the done signal to the second map attempt
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapAttempt2.GetID(
                                                                               ), TaskAttemptEventType.TaDone));
            // wait for map success
            app.WaitForState(mapTask, TaskState.Succeeded);
            //send done to reduce
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceAttempt.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            //send done to reduce
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceAttempt2.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            //send done to reduce
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceAttempt3.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            app.WaitForState(job, JobState.Succeeded);
            //previous completion event now becomes obsolete
            NUnit.Framework.Assert.AreEqual("Event status not correct", TaskAttemptCompletionEventStatus
                                            .Obsolete, events[0].GetStatus());
            events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Num completion events not correct", 6, events.Length
                                            );
            NUnit.Framework.Assert.AreEqual("Event map attempt id not correct", mapAttempt1.GetID
                                                (), events[0].GetAttemptId());
            NUnit.Framework.Assert.AreEqual("Event map attempt id not correct", mapAttempt1.GetID
                                                (), events[1].GetAttemptId());
            NUnit.Framework.Assert.AreEqual("Event map attempt id not correct", mapAttempt2.GetID
                                                (), events[2].GetAttemptId());
            NUnit.Framework.Assert.AreEqual("Event reduce attempt id not correct", reduceAttempt
                                            .GetID(), events[3].GetAttemptId());
            NUnit.Framework.Assert.AreEqual("Event status not correct for map attempt1", TaskAttemptCompletionEventStatus
                                            .Obsolete, events[0].GetStatus());
            NUnit.Framework.Assert.AreEqual("Event status not correct for map attempt1", TaskAttemptCompletionEventStatus
                                            .Failed, events[1].GetStatus());
            NUnit.Framework.Assert.AreEqual("Event status not correct for map attempt2", TaskAttemptCompletionEventStatus
                                            .Succeeded, events[2].GetStatus());
            NUnit.Framework.Assert.AreEqual("Event status not correct for reduce attempt1", TaskAttemptCompletionEventStatus
                                            .Succeeded, events[3].GetStatus());
            TaskCompletionEvent[] mapEvents       = job.GetMapAttemptCompletionEvents(0, 2);
            TaskCompletionEvent[] convertedEvents = TypeConverter.FromYarn(events);
            NUnit.Framework.Assert.AreEqual("Incorrect number of map events", 2, mapEvents.Length
                                            );
            Assert.AssertArrayEquals("Unexpected map events", Arrays.CopyOfRange(convertedEvents
                                                                                 , 0, 2), mapEvents);
            mapEvents = job.GetMapAttemptCompletionEvents(2, 200);
            NUnit.Framework.Assert.AreEqual("Incorrect number of map events", 1, mapEvents.Length
                                            );
            NUnit.Framework.Assert.AreEqual("Unexpected map event", convertedEvents[2], mapEvents
                                            [0]);
        }
Example #4
0
        public virtual void TestFetchFailureWithRecovery()
        {
            int   runCount = 0;
            MRApp app      = new TestFetchFailure.MRAppWithHistory(1, 1, false, this.GetType().FullName
                                                                   , true, ++runCount);
            Configuration conf = new Configuration();

            // map -> reduce -> fetch-failure -> map retry is incompatible with
            // sequential, single-task-attempt approach in uber-AM, so disable:
            conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false);
            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(conf);
            app.WaitForState(job, JobState.Running);
            //all maps would be running
            NUnit.Framework.Assert.AreEqual("Num tasks not correct", 2, job.GetTasks().Count);
            IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator();
            Task mapTask          = it.Next();
            Task reduceTask       = it.Next();

            //wait for Task state move to RUNNING
            app.WaitForState(mapTask, TaskState.Running);
            TaskAttempt mapAttempt1 = mapTask.GetAttempts().Values.GetEnumerator().Next();

            app.WaitForState(mapAttempt1, TaskAttemptState.Running);
            //send the done signal to the map attempt
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapAttempt1.GetID(
                                                                               ), TaskAttemptEventType.TaDone));
            // wait for map success
            app.WaitForState(mapTask, TaskState.Succeeded);
            TaskAttemptCompletionEvent[] events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Num completion events not correct", 1, events.Length
                                            );
            NUnit.Framework.Assert.AreEqual("Event status not correct", TaskAttemptCompletionEventStatus
                                            .Succeeded, events[0].GetStatus());
            // wait for reduce to start running
            app.WaitForState(reduceTask, TaskState.Running);
            TaskAttempt reduceAttempt = reduceTask.GetAttempts().Values.GetEnumerator().Next(
                );

            app.WaitForState(reduceAttempt, TaskAttemptState.Running);
            //send 3 fetch failures from reduce to trigger map re execution
            SendFetchFailure(app, reduceAttempt, mapAttempt1);
            SendFetchFailure(app, reduceAttempt, mapAttempt1);
            SendFetchFailure(app, reduceAttempt, mapAttempt1);
            //wait for map Task state move back to RUNNING
            app.WaitForState(mapTask, TaskState.Running);
            // Crash the app again.
            app.Stop();
            //rerun
            app = new TestFetchFailure.MRAppWithHistory(1, 1, false, this.GetType().FullName,
                                                        false, ++runCount);
            conf = new Configuration();
            conf.SetBoolean(MRJobConfig.MrAmJobRecoveryEnable, true);
            conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false);
            job = app.Submit(conf);
            app.WaitForState(job, JobState.Running);
            //all maps would be running
            NUnit.Framework.Assert.AreEqual("Num tasks not correct", 2, job.GetTasks().Count);
            it         = job.GetTasks().Values.GetEnumerator();
            mapTask    = it.Next();
            reduceTask = it.Next();
            // the map is not in a SUCCEEDED state after restart of AM
            app.WaitForState(mapTask, TaskState.Running);
            mapAttempt1 = mapTask.GetAttempts().Values.GetEnumerator().Next();
            app.WaitForState(mapAttempt1, TaskAttemptState.Running);
            //send the done signal to the map attempt
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapAttempt1.GetID(
                                                                               ), TaskAttemptEventType.TaDone));
            // wait for map success
            app.WaitForState(mapTask, TaskState.Succeeded);
            reduceAttempt = reduceTask.GetAttempts().Values.GetEnumerator().Next();
            //send done to reduce
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceAttempt.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            app.WaitForState(job, JobState.Succeeded);
            events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Num completion events not correct", 2, events.Length
                                            );
        }
Example #5
0
 public override TaskAttemptCompletionEvent[] GetTaskAttemptCompletionEvents(int fromEventId
                                                                             , int maxEvents)
 {
     return(job.GetTaskAttemptCompletionEvents(fromEventId, maxEvents));
 }
Example #6
0
        public virtual void TestUpdatedNodes()
        {
            int   runCount = 0;
            MRApp app      = new TestMRApp.MRAppWithHistory(this, 2, 2, false, this.GetType().FullName
                                                            , true, ++runCount);
            Configuration conf = new Configuration();

            // after half of the map completion, reduce will start
            conf.SetFloat(MRJobConfig.CompletedMapsForReduceSlowstart, 0.5f);
            // uberization forces full slowstart (1.0), so disable that
            conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false);
            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(conf);
            app.WaitForState(job, JobState.Running);
            NUnit.Framework.Assert.AreEqual("Num tasks not correct", 4, job.GetTasks().Count);
            IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator();
            Task mapTask1         = it.Next();
            Task mapTask2         = it.Next();

            // all maps must be running
            app.WaitForState(mapTask1, TaskState.Running);
            app.WaitForState(mapTask2, TaskState.Running);
            TaskAttempt task1Attempt = mapTask1.GetAttempts().Values.GetEnumerator().Next();
            TaskAttempt task2Attempt = mapTask2.GetAttempts().Values.GetEnumerator().Next();
            NodeId      node1        = task1Attempt.GetNodeId();
            NodeId      node2        = task2Attempt.GetNodeId();

            NUnit.Framework.Assert.AreEqual(node1, node2);
            // send the done signal to the task
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task1Attempt.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task2Attempt.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            // all maps must be succeeded
            app.WaitForState(mapTask1, TaskState.Succeeded);
            app.WaitForState(mapTask2, TaskState.Succeeded);
            TaskAttemptCompletionEvent[] events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Expecting 2 completion events for success", 2, events
                                            .Length);
            // send updated nodes info
            AList <NodeReport> updatedNodes = new AList <NodeReport>();
            NodeReport         nr           = RecordFactoryProvider.GetRecordFactory(null).NewRecordInstance <NodeReport
                                                                                                              >();

            nr.SetNodeId(node1);
            nr.SetNodeState(NodeState.Unhealthy);
            updatedNodes.AddItem(nr);
            app.GetContext().GetEventHandler().Handle(new JobUpdatedNodesEvent(job.GetID(), updatedNodes
                                                                               ));
            app.WaitForState(task1Attempt, TaskAttemptState.Killed);
            app.WaitForState(task2Attempt, TaskAttemptState.Killed);
            events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Expecting 2 more completion events for killed",
                                            4, events.Length);
            // all maps must be back to running
            app.WaitForState(mapTask1, TaskState.Running);
            app.WaitForState(mapTask2, TaskState.Running);
            IEnumerator <TaskAttempt> itr = mapTask1.GetAttempts().Values.GetEnumerator();

            itr.Next();
            task1Attempt = itr.Next();
            // send the done signal to the task
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task1Attempt.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            // map1 must be succeeded. map2 must be running
            app.WaitForState(mapTask1, TaskState.Succeeded);
            app.WaitForState(mapTask2, TaskState.Running);
            events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Expecting 1 more completion events for success",
                                            5, events.Length);
            // Crash the app again.
            app.Stop();
            // rerun
            // in rerun the 1st map will be recovered from previous run
            app = new TestMRApp.MRAppWithHistory(this, 2, 2, false, this.GetType().FullName,
                                                 false, ++runCount);
            conf = new Configuration();
            conf.SetBoolean(MRJobConfig.MrAmJobRecoveryEnable, true);
            conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false);
            job = app.Submit(conf);
            app.WaitForState(job, JobState.Running);
            NUnit.Framework.Assert.AreEqual("No of tasks not correct", 4, job.GetTasks().Count
                                            );
            it       = job.GetTasks().Values.GetEnumerator();
            mapTask1 = it.Next();
            mapTask2 = it.Next();
            Task reduceTask1 = it.Next();
            Task reduceTask2 = it.Next();

            // map 1 will be recovered, no need to send done
            app.WaitForState(mapTask1, TaskState.Succeeded);
            app.WaitForState(mapTask2, TaskState.Running);
            events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Expecting 2 completion events for killed & success of map1"
                                            , 2, events.Length);
            task2Attempt = mapTask2.GetAttempts().Values.GetEnumerator().Next();
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task2Attempt.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            app.WaitForState(mapTask2, TaskState.Succeeded);
            events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Expecting 1 more completion events for success",
                                            3, events.Length);
            app.WaitForState(reduceTask1, TaskState.Running);
            app.WaitForState(reduceTask2, TaskState.Running);
            TaskAttempt task3Attempt = reduceTask1.GetAttempts().Values.GetEnumerator().Next(
                );

            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task3Attempt.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            app.WaitForState(reduceTask1, TaskState.Succeeded);
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task3Attempt.GetID
                                                                               (), TaskAttemptEventType.TaKill));
            app.WaitForState(reduceTask1, TaskState.Succeeded);
            TaskAttempt task4Attempt = reduceTask2.GetAttempts().Values.GetEnumerator().Next(
                );

            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task4Attempt.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            app.WaitForState(reduceTask2, TaskState.Succeeded);
            events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Expecting 2 more completion events for reduce success"
                                            , 5, events.Length);
            // job succeeds
            app.WaitForState(job, JobState.Succeeded);
        }
 public virtual TaskAttemptCompletionEvent[] GetTaskAttemptCompletionEvents(int fromEventId
                                                                            , int maxEvents)
 {
     return(mockJob.GetTaskAttemptCompletionEvents(fromEventId, maxEvents));
 }