示例#1
0
        public virtual void TestCommitPending()
        {
            MRApp app = new MRApp(1, 0, false, this.GetType().FullName, true);

            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(new Configuration());
            app.WaitForState(job, JobState.Running);
            NUnit.Framework.Assert.AreEqual("Num tasks not correct", 1, job.GetTasks().Count);
            IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator();
            Task task             = it.Next();

            app.WaitForState(task, TaskState.Running);
            TaskAttempt attempt = task.GetAttempts().Values.GetEnumerator().Next();

            app.WaitForState(attempt, TaskAttemptState.Running);
            //send the commit pending signal to the task
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(attempt.GetID(), TaskAttemptEventType
                                                                           .TaCommitPending));
            //wait for first attempt to commit pending
            app.WaitForState(attempt, TaskAttemptState.CommitPending);
            //re-send the commit pending signal to the task
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(attempt.GetID(), TaskAttemptEventType
                                                                           .TaCommitPending));
            //the task attempt should be still at COMMIT_PENDING
            app.WaitForState(attempt, TaskAttemptState.CommitPending);
            //send the done signal to the task
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task.GetAttempts()
                                                                           .Values.GetEnumerator().Next().GetID(), TaskAttemptEventType.TaDone));
            app.WaitForState(job, JobState.Succeeded);
        }
示例#2
0
        public virtual void TestNotificationOnLastRetryUnregistrationFailure()
        {
            HttpServer2 server = StartHttpServer();
            MRApp       app    = Org.Mockito.Mockito.Spy(new TestJobEndNotifier.MRAppWithCustomContainerAllocator
                                                             (this, 2, 2, false, this.GetType().FullName, true, 2, false));

            // Currently, we will have isLastRetry always equals to false at beginning
            // of MRAppMaster, except staging area exists or commit already started at
            // the beginning.
            // Now manually set isLastRetry to true and this should reset to false when
            // unregister failed.
            app.isLastAMRetry = true;
            Org.Mockito.Mockito.DoNothing().When(app).Sysexit();
            JobConf conf = new JobConf();

            conf.Set(JobContext.MrJobEndNotificationUrl, TestJobEndNotifier.JobEndServlet.baseUrl
                     + "jobend?jobid=$jobId&status=$jobStatus");
            JobImpl job = (JobImpl)app.Submit(conf);

            app.WaitForState(job, JobState.Running);
            app.GetContext().GetEventHandler().Handle(new JobEvent(app.GetJobId(), JobEventType
                                                                   .JobAmReboot));
            app.WaitForInternalState(job, JobStateInternal.Reboot);
            // Now shutdown. User should see FAILED state.
            // Unregistration fails: isLastAMRetry is recalculated, this is
            ///reboot will stop service internally, we don't need to shutdown twice
            app.WaitForServiceToStop(10000);
            NUnit.Framework.Assert.IsFalse(app.IsLastAMRetry());
            // Since it's not last retry, JobEndServlet didn't called
            NUnit.Framework.Assert.AreEqual(0, TestJobEndNotifier.JobEndServlet.calledTimes);
            NUnit.Framework.Assert.IsNull(TestJobEndNotifier.JobEndServlet.requestUri);
            NUnit.Framework.Assert.IsNull(TestJobEndNotifier.JobEndServlet.foundJobState);
            server.Stop();
        }
示例#3
0
        public virtual void TestAbsentNotificationOnNotLastRetryUnregistrationFailure()
        {
            HttpServer2 server = StartHttpServer();
            MRApp       app    = Org.Mockito.Mockito.Spy(new TestJobEndNotifier.MRAppWithCustomContainerAllocator
                                                             (this, 2, 2, false, this.GetType().FullName, true, 1, false));

            Org.Mockito.Mockito.DoNothing().When(app).Sysexit();
            JobConf conf = new JobConf();

            conf.Set(JobContext.MrJobEndNotificationUrl, TestJobEndNotifier.JobEndServlet.baseUrl
                     + "jobend?jobid=$jobId&status=$jobStatus");
            JobImpl job = (JobImpl)app.Submit(conf);

            app.WaitForState(job, JobState.Running);
            app.GetContext().GetEventHandler().Handle(new JobEvent(app.GetJobId(), JobEventType
                                                                   .JobAmReboot));
            app.WaitForInternalState(job, JobStateInternal.Reboot);
            // Now shutdown.
            // Unregistration fails: isLastAMRetry is recalculated, this is not
            app.ShutDownJob();
            // Not the last AM attempt. So user should that the job is still running.
            app.WaitForState(job, JobState.Running);
            NUnit.Framework.Assert.IsFalse(app.IsLastAMRetry());
            NUnit.Framework.Assert.AreEqual(0, TestJobEndNotifier.JobEndServlet.calledTimes);
            NUnit.Framework.Assert.IsNull(TestJobEndNotifier.JobEndServlet.requestUri);
            NUnit.Framework.Assert.IsNull(TestJobEndNotifier.JobEndServlet.foundJobState);
            server.Stop();
        }
示例#4
0
        //@Test
        /// <exception cref="System.Exception"/>
        public virtual void TestCompletedMapsForReduceSlowstart()
        {
            MRApp         app  = new MRApp(2, 1, false, this.GetType().FullName, true);
            Configuration conf = new Configuration();

            //after half of the map completion, reduce will start
            conf.SetFloat(MRJobConfig.CompletedMapsForReduceSlowstart, 0.5f);
            //uberization forces full slowstart (1.0), so disable that
            conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false);
            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(conf);
            app.WaitForState(job, JobState.Running);
            //all maps would be running
            NUnit.Framework.Assert.AreEqual("Num tasks not correct", 3, job.GetTasks().Count);
            IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator();
            Task mapTask1         = it.Next();
            Task mapTask2         = it.Next();
            Task reduceTask       = it.Next();

            // all maps must be running
            app.WaitForState(mapTask1, TaskState.Running);
            app.WaitForState(mapTask2, TaskState.Running);
            TaskAttempt task1Attempt = mapTask1.GetAttempts().Values.GetEnumerator().Next();
            TaskAttempt task2Attempt = mapTask2.GetAttempts().Values.GetEnumerator().Next();

            //before sending the TA_DONE, event make sure attempt has come to
            //RUNNING state
            app.WaitForState(task1Attempt, TaskAttemptState.Running);
            app.WaitForState(task2Attempt, TaskAttemptState.Running);
            // reduces must be in NEW state
            NUnit.Framework.Assert.AreEqual("Reduce Task state not correct", TaskState.New, reduceTask
                                            .GetReport().GetTaskState());
            //send the done signal to the 1st map task
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapTask1.GetAttempts
                                                                               ().Values.GetEnumerator().Next().GetID(), TaskAttemptEventType.TaDone));
            //wait for first map task to complete
            app.WaitForState(mapTask1, TaskState.Succeeded);
            //Once the first map completes, it will schedule the reduces
            //now reduce must be running
            app.WaitForState(reduceTask, TaskState.Running);
            //send the done signal to 2nd map and the reduce to complete the job
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapTask2.GetAttempts
                                                                               ().Values.GetEnumerator().Next().GetID(), TaskAttemptEventType.TaDone));
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceTask.GetAttempts
                                                                               ().Values.GetEnumerator().Next().GetID(), TaskAttemptEventType.TaDone));
            app.WaitForState(job, JobState.Succeeded);
        }
示例#5
0
        public virtual void TestJobError()
        {
            MRApp app = new MRApp(1, 0, false, this.GetType().FullName, true);

            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(new Configuration());
            app.WaitForState(job, JobState.Running);
            NUnit.Framework.Assert.AreEqual("Num tasks not correct", 1, job.GetTasks().Count);
            IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator();
            Task task             = it.Next();

            app.WaitForState(task, TaskState.Running);
            //send an invalid event on task at current state
            app.GetContext().GetEventHandler().Handle(new TaskEvent(task.GetID(), TaskEventType
                                                                    .TSchedule));
            //this must lead to job error
            app.WaitForState(job, JobState.Error);
        }
示例#6
0
        public virtual void TestJobRebootNotLastRetryOnUnregistrationFailure()
        {
            MRApp app = new MRApp(1, 0, false, this.GetType().FullName, true);

            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(new Configuration());
            app.WaitForState(job, JobState.Running);
            NUnit.Framework.Assert.AreEqual("Num tasks not correct", 1, job.GetTasks().Count);
            IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator();
            Task task             = it.Next();

            app.WaitForState(task, TaskState.Running);
            //send an reboot event
            app.GetContext().GetEventHandler().Handle(new JobEvent(job.GetID(), JobEventType.
                                                                   JobAmReboot));
            // return exteranl state as RUNNING since otherwise the JobClient will
            // prematurely exit.
            app.WaitForState(job, JobState.Running);
        }
示例#7
0
        private void UpdateStatus(MRApp app, TaskAttempt attempt, Phase phase)
        {
            TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                                                        ();
            status.counters        = new Counters();
            status.fetchFailedMaps = new AList <TaskAttemptId>();
            status.id                = attempt.GetID();
            status.mapFinishTime     = 0;
            status.phase             = phase;
            status.progress          = 0.5f;
            status.shuffleFinishTime = 0;
            status.sortFinishTime    = 0;
            status.stateString       = "OK";
            status.taskState         = attempt.GetState();
            TaskAttemptStatusUpdateEvent @event = new TaskAttemptStatusUpdateEvent(attempt.GetID
                                                                                       (), status);

            app.GetContext().GetEventHandler().Handle(@event);
        }
示例#8
0
        public virtual void TestJobRebootOnLastRetryOnUnregistrationFailure()
        {
            // make startCount as 2 since this is last retry which equals to
            // DEFAULT_MAX_AM_RETRY
            // The last param mocks the unregistration failure
            MRApp         app  = new MRApp(1, 0, false, this.GetType().FullName, true, 2, false);
            Configuration conf = new Configuration();

            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(conf);
            app.WaitForState(job, JobState.Running);
            NUnit.Framework.Assert.AreEqual("Num tasks not correct", 1, job.GetTasks().Count);
            IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator();
            Task task             = it.Next();

            app.WaitForState(task, TaskState.Running);
            //send an reboot event
            app.GetContext().GetEventHandler().Handle(new JobEvent(job.GetID(), JobEventType.
                                                                   JobAmReboot));
            app.WaitForInternalState((JobImpl)job, JobStateInternal.Reboot);
            // return exteranl state as RUNNING if this is the last retry while
            // unregistration fails
            app.WaitForState(job, JobState.Running);
        }
示例#9
0
 private void SendFetchFailure(MRApp app, TaskAttempt reduceAttempt, TaskAttempt mapAttempt
                               )
 {
     app.GetContext().GetEventHandler().Handle(new JobTaskAttemptFetchFailureEvent(reduceAttempt
                                                                                   .GetID(), Arrays.AsList(new TaskAttemptId[] { mapAttempt.GetID() })));
 }
示例#10
0
        public virtual void TestFetchFailureMultipleReduces()
        {
            MRApp         app  = new MRApp(1, 3, false, this.GetType().FullName, true);
            Configuration conf = new Configuration();

            // map -> reduce -> fetch-failure -> map retry is incompatible with
            // sequential, single-task-attempt approach in uber-AM, so disable:
            conf.SetBoolean(MRJobConfig.JobUbertaskEnable, false);
            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(conf);
            app.WaitForState(job, JobState.Running);
            //all maps would be running
            NUnit.Framework.Assert.AreEqual("Num tasks not correct", 4, job.GetTasks().Count);
            IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator();
            Task mapTask          = it.Next();
            Task reduceTask       = it.Next();
            Task reduceTask2      = it.Next();
            Task reduceTask3      = it.Next();

            //wait for Task state move to RUNNING
            app.WaitForState(mapTask, TaskState.Running);
            TaskAttempt mapAttempt1 = mapTask.GetAttempts().Values.GetEnumerator().Next();

            app.WaitForState(mapAttempt1, TaskAttemptState.Running);
            //send the done signal to the map attempt
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapAttempt1.GetID(
                                                                               ), TaskAttemptEventType.TaDone));
            // wait for map success
            app.WaitForState(mapTask, TaskState.Succeeded);
            TaskAttemptCompletionEvent[] events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Num completion events not correct", 1, events.Length
                                            );
            NUnit.Framework.Assert.AreEqual("Event status not correct", TaskAttemptCompletionEventStatus
                                            .Succeeded, events[0].GetStatus());
            // wait for reduce to start running
            app.WaitForState(reduceTask, TaskState.Running);
            app.WaitForState(reduceTask2, TaskState.Running);
            app.WaitForState(reduceTask3, TaskState.Running);
            TaskAttempt reduceAttempt = reduceTask.GetAttempts().Values.GetEnumerator().Next(
                );

            app.WaitForState(reduceAttempt, TaskAttemptState.Running);
            UpdateStatus(app, reduceAttempt, Phase.Shuffle);
            TaskAttempt reduceAttempt2 = reduceTask2.GetAttempts().Values.GetEnumerator().Next
                                             ();

            app.WaitForState(reduceAttempt2, TaskAttemptState.Running);
            UpdateStatus(app, reduceAttempt2, Phase.Shuffle);
            TaskAttempt reduceAttempt3 = reduceTask3.GetAttempts().Values.GetEnumerator().Next
                                             ();

            app.WaitForState(reduceAttempt3, TaskAttemptState.Running);
            UpdateStatus(app, reduceAttempt3, Phase.Shuffle);
            //send 2 fetch failures from reduce to prepare for map re execution
            SendFetchFailure(app, reduceAttempt, mapAttempt1);
            SendFetchFailure(app, reduceAttempt, mapAttempt1);
            //We should not re-launch the map task yet
            NUnit.Framework.Assert.AreEqual(TaskState.Succeeded, mapTask.GetState());
            UpdateStatus(app, reduceAttempt2, Phase.Reduce);
            UpdateStatus(app, reduceAttempt3, Phase.Reduce);
            //send 3rd fetch failures from reduce to trigger map re execution
            SendFetchFailure(app, reduceAttempt, mapAttempt1);
            //wait for map Task state move back to RUNNING
            app.WaitForState(mapTask, TaskState.Running);
            //map attempt must have become FAILED
            NUnit.Framework.Assert.AreEqual("Map TaskAttempt state not correct", TaskAttemptState
                                            .Failed, mapAttempt1.GetState());
            NUnit.Framework.Assert.AreEqual("Num attempts in Map Task not correct", 2, mapTask
                                            .GetAttempts().Count);
            IEnumerator <TaskAttempt> atIt = mapTask.GetAttempts().Values.GetEnumerator();

            atIt.Next();
            TaskAttempt mapAttempt2 = atIt.Next();

            app.WaitForState(mapAttempt2, TaskAttemptState.Running);
            //send the done signal to the second map attempt
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(mapAttempt2.GetID(
                                                                               ), TaskAttemptEventType.TaDone));
            // wait for map success
            app.WaitForState(mapTask, TaskState.Succeeded);
            //send done to reduce
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceAttempt.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            //send done to reduce
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceAttempt2.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            //send done to reduce
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(reduceAttempt3.GetID
                                                                               (), TaskAttemptEventType.TaDone));
            app.WaitForState(job, JobState.Succeeded);
            //previous completion event now becomes obsolete
            NUnit.Framework.Assert.AreEqual("Event status not correct", TaskAttemptCompletionEventStatus
                                            .Obsolete, events[0].GetStatus());
            events = job.GetTaskAttemptCompletionEvents(0, 100);
            NUnit.Framework.Assert.AreEqual("Num completion events not correct", 6, events.Length
                                            );
            NUnit.Framework.Assert.AreEqual("Event map attempt id not correct", mapAttempt1.GetID
                                                (), events[0].GetAttemptId());
            NUnit.Framework.Assert.AreEqual("Event map attempt id not correct", mapAttempt1.GetID
                                                (), events[1].GetAttemptId());
            NUnit.Framework.Assert.AreEqual("Event map attempt id not correct", mapAttempt2.GetID
                                                (), events[2].GetAttemptId());
            NUnit.Framework.Assert.AreEqual("Event reduce attempt id not correct", reduceAttempt
                                            .GetID(), events[3].GetAttemptId());
            NUnit.Framework.Assert.AreEqual("Event status not correct for map attempt1", TaskAttemptCompletionEventStatus
                                            .Obsolete, events[0].GetStatus());
            NUnit.Framework.Assert.AreEqual("Event status not correct for map attempt1", TaskAttemptCompletionEventStatus
                                            .Failed, events[1].GetStatus());
            NUnit.Framework.Assert.AreEqual("Event status not correct for map attempt2", TaskAttemptCompletionEventStatus
                                            .Succeeded, events[2].GetStatus());
            NUnit.Framework.Assert.AreEqual("Event status not correct for reduce attempt1", TaskAttemptCompletionEventStatus
                                            .Succeeded, events[3].GetStatus());
            TaskCompletionEvent[] mapEvents       = job.GetMapAttemptCompletionEvents(0, 2);
            TaskCompletionEvent[] convertedEvents = TypeConverter.FromYarn(events);
            NUnit.Framework.Assert.AreEqual("Incorrect number of map events", 2, mapEvents.Length
                                            );
            Assert.AssertArrayEquals("Unexpected map events", Arrays.CopyOfRange(convertedEvents
                                                                                 , 0, 2), mapEvents);
            mapEvents = job.GetMapAttemptCompletionEvents(2, 200);
            NUnit.Framework.Assert.AreEqual("Incorrect number of map events", 1, mapEvents.Length
                                            );
            NUnit.Framework.Assert.AreEqual("Unexpected map event", convertedEvents[2], mapEvents
                                            [0]);
        }