예제 #1
0
        public override void HandleAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                           status)
        {
            long timestamp = clock.GetTime();

            StatusUpdate(status, timestamp);
        }
예제 #2
0
        /// <summary>Absorbs one TaskAttemptStatus</summary>
        /// <param name="reportedStatus">
        /// the status report that we got from a task attempt
        /// that we want to fold into the speculation data for this job
        /// </param>
        /// <param name="timestamp">
        /// the time this status corresponds to.  This matters
        /// because statuses contain progress.
        /// </param>
        protected internal virtual void StatusUpdate(TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                                     reportedStatus, long timestamp)
        {
            string        stateString = reportedStatus.taskState.ToString();
            TaskAttemptId attemptID   = reportedStatus.id;
            TaskId        taskID      = attemptID.GetTaskId();

            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(taskID.GetJobId()
                                                                            );
            if (job == null)
            {
                return;
            }
            Task task = job.GetTask(taskID);

            if (task == null)
            {
                return;
            }
            estimator.UpdateAttempt(reportedStatus, timestamp);
            if (stateString.Equals(TaskAttemptState.Running.ToString()))
            {
                runningTasks.PutIfAbsent(taskID, true);
            }
            else
            {
                runningTasks.Remove(taskID, true);
                if (!stateString.Equals(TaskAttemptState.Starting.ToString()))
                {
                    Sharpen.Collections.Remove(runningTaskAttemptStatistics, attemptID);
                }
            }
        }
예제 #3
0
 public SpeculatorEvent(TaskAttemptId attemptID, bool flag, long timestamp)
     : base(Speculator.EventType.AttemptStart, timestamp)
 {
     this.reportedStatus    = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus();
     this.reportedStatus.id = attemptID;
     this.taskID            = attemptID.GetTaskId();
 }
예제 #4
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public virtual bool StatusUpdate(TaskAttemptID taskAttemptID, TaskStatus taskStatus
                                         )
        {
            TaskAttemptId yarnAttemptID = TypeConverter.ToYarn(taskAttemptID);

            taskHeartbeatHandler.Progressing(yarnAttemptID);
            TaskAttemptStatusUpdateEvent.TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                                                                   ();
            taskAttemptStatus.id = yarnAttemptID;
            // Task sends the updated progress to the TT.
            taskAttemptStatus.progress = taskStatus.GetProgress();
            Log.Info("Progress of TaskAttempt " + taskAttemptID + " is : " + taskStatus.GetProgress
                         ());
            // Task sends the updated state-string to the TT.
            taskAttemptStatus.stateString = taskStatus.GetStateString();
            // Task sends the updated phase to the TT.
            taskAttemptStatus.phase = TypeConverter.ToYarn(taskStatus.GetPhase());
            // Counters are updated by the task. Convert counters into new format as
            // that is the primary storage format inside the AM to avoid multiple
            // conversions and unnecessary heap usage.
            taskAttemptStatus.counters = new Counters(taskStatus.GetCounters());
            // Map Finish time set by the task (map only)
            if (taskStatus.GetIsMap() && taskStatus.GetMapFinishTime() != 0)
            {
                taskAttemptStatus.mapFinishTime = taskStatus.GetMapFinishTime();
            }
            // Shuffle Finish time set by the task (reduce only).
            if (!taskStatus.GetIsMap() && taskStatus.GetShuffleFinishTime() != 0)
            {
                taskAttemptStatus.shuffleFinishTime = taskStatus.GetShuffleFinishTime();
            }
            // Sort finish time set by the task (reduce only).
            if (!taskStatus.GetIsMap() && taskStatus.GetSortFinishTime() != 0)
            {
                taskAttemptStatus.sortFinishTime = taskStatus.GetSortFinishTime();
            }
            // Not Setting the task state. Used by speculation - will be set in TaskAttemptImpl
            //taskAttemptStatus.taskState =  TypeConverter.toYarn(taskStatus.getRunState());
            //set the fetch failures
            if (taskStatus.GetFetchFailedMaps() != null && taskStatus.GetFetchFailedMaps().Count
                > 0)
            {
                taskAttemptStatus.fetchFailedMaps = new AList <TaskAttemptId>();
                foreach (TaskAttemptID failedMapId in taskStatus.GetFetchFailedMaps())
                {
                    taskAttemptStatus.fetchFailedMaps.AddItem(TypeConverter.ToYarn(failedMapId));
                }
            }
            // Task sends the information about the nextRecordRange to the TT
            //    TODO: The following are not needed here, but needed to be set somewhere inside AppMaster.
            //    taskStatus.getRunState(); // Set by the TT/JT. Transform into a state TODO
            //    taskStatus.getStartTime(); // Used to be set by the TaskTracker. This should be set by getTask().
            //    taskStatus.getFinishTime(); // Used to be set by TT/JT. Should be set when task finishes
            //    // This was used by TT to do counter updates only once every minute. So this
            //    // isn't ever changed by the Task itself.
            //    taskStatus.getIncludeCounters();
            context.GetEventHandler().Handle(new TaskAttemptStatusUpdateEvent(taskAttemptStatus
                                                                              .id, taskAttemptStatus));
            return(true);
        }
예제 #5
0
        public override void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                           status, long timestamp)
        {
            base.UpdateAttempt(status, timestamp);
            TaskAttemptId attemptID = status.id;
            float         progress  = status.progress;

            IncorporateReading(attemptID, progress, timestamp);
        }
예제 #6
0
 // The speculative attempt may be not killed before the MR job succeeds.
 private TaskAttemptStatusUpdateEvent.TaskAttemptStatus CreateTaskAttemptStatus(TaskAttemptId
                                                                                id, float progress, TaskAttemptState state)
 {
     TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                                                 ();
     status.id        = id;
     status.progress  = progress;
     status.taskState = state;
     return(status);
 }
예제 #7
0
        public virtual void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                          status, long timestamp)
        {
            TaskAttemptId attemptID = status.id;
            TaskId        taskID    = attemptID.GetTaskId();
            JobId         jobID     = taskID.GetJobId();

            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(jobID);
            if (job == null)
            {
                return;
            }
            Task task = job.GetTask(taskID);

            if (task == null)
            {
                return;
            }
            long        boxedStart  = startTimes[attemptID];
            long        start       = boxedStart == null ? long.MinValue : boxedStart;
            TaskAttempt taskAttempt = task.GetAttempt(attemptID);

            if (taskAttempt.GetState() == TaskAttemptState.Succeeded)
            {
                bool isNew = false;
                // is this  a new success?
                lock (doneTasks)
                {
                    if (!doneTasks.Contains(task))
                    {
                        doneTasks.AddItem(task);
                        isNew = true;
                    }
                }
                // It's a new completion
                // Note that if a task completes twice [because of a previous speculation
                //  and a race, or a success followed by loss of the machine with the
                //  local data] we only count the first one.
                if (isNew)
                {
                    long finish = timestamp;
                    if (start > 1L && finish > 1L && start <= finish)
                    {
                        long           duration   = finish - start;
                        DataStatistics statistics = DataStatisticsForTask(taskID);
                        if (statistics != null)
                        {
                            statistics.Add(duration);
                        }
                    }
                }
            }
        }
예제 #8
0
        private void UpdateStatus(MRApp app, TaskAttempt attempt, Phase phase)
        {
            TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                                                        ();
            status.counters        = new Counters();
            status.fetchFailedMaps = new AList <TaskAttemptId>();
            status.id                = attempt.GetID();
            status.mapFinishTime     = 0;
            status.phase             = phase;
            status.progress          = 0.5f;
            status.shuffleFinishTime = 0;
            status.sortFinishTime    = 0;
            status.stateString       = "OK";
            status.taskState         = attempt.GetState();
            TaskAttemptStatusUpdateEvent @event = new TaskAttemptStatusUpdateEvent(attempt.GetID
                                                                                       (), status);

            app.GetContext().GetEventHandler().Handle(@event);
        }
예제 #9
0
        public virtual void Test()
        {
            TestMRClientService.MRAppWithClientService app = new TestMRClientService.MRAppWithClientService
                                                                 (this, 1, 0, false);
            Configuration conf = new Configuration();

            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(conf);
            app.WaitForState(job, JobState.Running);
            NUnit.Framework.Assert.AreEqual("Num tasks not correct", 1, job.GetTasks().Count);
            IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator();
            Task task             = it.Next();

            app.WaitForState(task, TaskState.Running);
            TaskAttempt attempt = task.GetAttempts().Values.GetEnumerator().Next();

            app.WaitForState(attempt, TaskAttemptState.Running);
            // send the diagnostic
            string diagnostic1 = "Diagnostic1";
            string diagnostic2 = "Diagnostic2";

            app.GetContext().GetEventHandler().Handle(new TaskAttemptDiagnosticsUpdateEvent(attempt
                                                                                            .GetID(), diagnostic1));
            // send the status update
            TaskAttemptStatusUpdateEvent.TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                                                                   ();
            taskAttemptStatus.id          = attempt.GetID();
            taskAttemptStatus.progress    = 0.5f;
            taskAttemptStatus.stateString = "RUNNING";
            taskAttemptStatus.taskState   = TaskAttemptState.Running;
            taskAttemptStatus.phase       = Phase.Map;
            // send the status update
            app.GetContext().GetEventHandler().Handle(new TaskAttemptStatusUpdateEvent(attempt
                                                                                       .GetID(), taskAttemptStatus));
            //verify that all object are fully populated by invoking RPCs.
            YarnRPC          rpc   = YarnRPC.Create(conf);
            MRClientProtocol proxy = (MRClientProtocol)rpc.GetProxy(typeof(MRClientProtocol),
                                                                    app.clientService.GetBindAddress(), conf);
            GetCountersRequest gcRequest = recordFactory.NewRecordInstance <GetCountersRequest
                                                                            >();

            gcRequest.SetJobId(job.GetID());
            NUnit.Framework.Assert.IsNotNull("Counters is null", proxy.GetCounters(gcRequest)
                                             .GetCounters());
            GetJobReportRequest gjrRequest = recordFactory.NewRecordInstance <GetJobReportRequest
                                                                              >();

            gjrRequest.SetJobId(job.GetID());
            JobReport jr = proxy.GetJobReport(gjrRequest).GetJobReport();

            VerifyJobReport(jr);
            GetTaskAttemptCompletionEventsRequest gtaceRequest = recordFactory.NewRecordInstance
                                                                 <GetTaskAttemptCompletionEventsRequest>();

            gtaceRequest.SetJobId(job.GetID());
            gtaceRequest.SetFromEventId(0);
            gtaceRequest.SetMaxEvents(10);
            NUnit.Framework.Assert.IsNotNull("TaskCompletionEvents is null", proxy.GetTaskAttemptCompletionEvents
                                                 (gtaceRequest).GetCompletionEventList());
            GetDiagnosticsRequest gdRequest = recordFactory.NewRecordInstance <GetDiagnosticsRequest
                                                                               >();

            gdRequest.SetTaskAttemptId(attempt.GetID());
            NUnit.Framework.Assert.IsNotNull("Diagnostics is null", proxy.GetDiagnostics(gdRequest
                                                                                         ).GetDiagnosticsList());
            GetTaskAttemptReportRequest gtarRequest = recordFactory.NewRecordInstance <GetTaskAttemptReportRequest
                                                                                       >();

            gtarRequest.SetTaskAttemptId(attempt.GetID());
            TaskAttemptReport tar = proxy.GetTaskAttemptReport(gtarRequest).GetTaskAttemptReport
                                        ();

            VerifyTaskAttemptReport(tar);
            GetTaskReportRequest gtrRequest = recordFactory.NewRecordInstance <GetTaskReportRequest
                                                                               >();

            gtrRequest.SetTaskId(task.GetID());
            NUnit.Framework.Assert.IsNotNull("TaskReport is null", proxy.GetTaskReport(gtrRequest
                                                                                       ).GetTaskReport());
            GetTaskReportsRequest gtreportsRequest = recordFactory.NewRecordInstance <GetTaskReportsRequest
                                                                                      >();

            gtreportsRequest.SetJobId(job.GetID());
            gtreportsRequest.SetTaskType(TaskType.Map);
            NUnit.Framework.Assert.IsNotNull("TaskReports for map is null", proxy.GetTaskReports
                                                 (gtreportsRequest).GetTaskReportList());
            gtreportsRequest = recordFactory.NewRecordInstance <GetTaskReportsRequest>();
            gtreportsRequest.SetJobId(job.GetID());
            gtreportsRequest.SetTaskType(TaskType.Reduce);
            NUnit.Framework.Assert.IsNotNull("TaskReports for reduce is null", proxy.GetTaskReports
                                                 (gtreportsRequest).GetTaskReportList());
            IList <string> diag = proxy.GetDiagnostics(gdRequest).GetDiagnosticsList();

            NUnit.Framework.Assert.AreEqual("Num diagnostics not correct", 1, diag.Count);
            NUnit.Framework.Assert.AreEqual("Diag 1 not correct", diagnostic1, diag[0].ToString
                                                ());
            TaskReport taskReport = proxy.GetTaskReport(gtrRequest).GetTaskReport();

            NUnit.Framework.Assert.AreEqual("Num diagnostics not correct", 1, taskReport.GetDiagnosticsCount
                                                ());
            //send the done signal to the task
            app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task.GetAttempts()
                                                                           .Values.GetEnumerator().Next().GetID(), TaskAttemptEventType.TaDone));
            app.WaitForState(job, JobState.Succeeded);
            // For invalid jobid, throw IOException
            gtreportsRequest = recordFactory.NewRecordInstance <GetTaskReportsRequest>();
            gtreportsRequest.SetJobId(TypeConverter.ToYarn(JobID.ForName("job_1415730144495_0001"
                                                                         )));
            gtreportsRequest.SetTaskType(TaskType.Reduce);
            try
            {
                proxy.GetTaskReports(gtreportsRequest);
                NUnit.Framework.Assert.Fail("IOException not thrown for invalid job id");
            }
            catch (IOException)
            {
            }
        }
예제 #10
0
        /*   *************************************************************    */
        // This is the code section that runs periodically and adds speculations for
        //  those jobs that need them.
        // This can return a few magic values for tasks that shouldn't speculate:
        //  returns ON_SCHEDULE if thresholdRuntime(taskID) says that we should not
        //     considering speculating this task
        //  returns ALREADY_SPECULATING if that is true.  This has priority.
        //  returns TOO_NEW if our companion task hasn't gotten any information
        //  returns PROGRESS_IS_GOOD if the task is sailing through
        //  returns NOT_RUNNING if the task is not running
        //
        // All of these values are negative.  Any value that should be allowed to
        //  speculate is 0 or positive.
        private long SpeculationValue(TaskId taskID, long now)
        {
            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(taskID.GetJobId()
                                                                            );
            Task task = job.GetTask(taskID);
            IDictionary <TaskAttemptId, TaskAttempt> attempts = task.GetAttempts();
            long acceptableRuntime = long.MinValue;
            long result            = long.MinValue;

            if (!mayHaveSpeculated.Contains(taskID))
            {
                acceptableRuntime = estimator.ThresholdRuntime(taskID);
                if (acceptableRuntime == long.MaxValue)
                {
                    return(OnSchedule);
                }
            }
            TaskAttemptId runningTaskAttemptID  = null;
            int           numberRunningAttempts = 0;

            foreach (TaskAttempt taskAttempt in attempts.Values)
            {
                if (taskAttempt.GetState() == TaskAttemptState.Running || taskAttempt.GetState()
                    == TaskAttemptState.Starting)
                {
                    if (++numberRunningAttempts > 1)
                    {
                        return(AlreadySpeculating);
                    }
                    runningTaskAttemptID = taskAttempt.GetID();
                    long estimatedRunTime     = estimator.EstimatedRuntime(runningTaskAttemptID);
                    long taskAttemptStartTime = estimator.AttemptEnrolledTime(runningTaskAttemptID);
                    if (taskAttemptStartTime > now)
                    {
                        // This background process ran before we could process the task
                        //  attempt status change that chronicles the attempt start
                        return(TooNew);
                    }
                    long estimatedEndTime            = estimatedRunTime + taskAttemptStartTime;
                    long estimatedReplacementEndTime = now + estimator.EstimatedNewAttemptRuntime(taskID
                                                                                                  );
                    float progress = taskAttempt.GetProgress();
                    DefaultSpeculator.TaskAttemptHistoryStatistics data = runningTaskAttemptStatistics
                                                                          [runningTaskAttemptID];
                    if (data == null)
                    {
                        runningTaskAttemptStatistics[runningTaskAttemptID] = new DefaultSpeculator.TaskAttemptHistoryStatistics
                                                                                 (estimatedRunTime, progress, now);
                    }
                    else
                    {
                        if (estimatedRunTime == data.GetEstimatedRunTime() && progress == data.GetProgress
                                ())
                        {
                            // Previous stats are same as same stats
                            if (data.NotHeartbeatedInAWhile(now))
                            {
                                // Stats have stagnated for a while, simulate heart-beat.
                                TaskAttemptStatusUpdateEvent.TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                                                                                       ();
                                taskAttemptStatus.id        = runningTaskAttemptID;
                                taskAttemptStatus.progress  = progress;
                                taskAttemptStatus.taskState = taskAttempt.GetState();
                                // Now simulate the heart-beat
                                HandleAttempt(taskAttemptStatus);
                            }
                        }
                        else
                        {
                            // Stats have changed - update our data structure
                            data.SetEstimatedRunTime(estimatedRunTime);
                            data.SetProgress(progress);
                            data.ResetHeartBeatTime(now);
                        }
                    }
                    if (estimatedEndTime < now)
                    {
                        return(ProgressIsGood);
                    }
                    if (estimatedReplacementEndTime >= estimatedEndTime)
                    {
                        return(TooLateToSpeculate);
                    }
                    result = estimatedEndTime - estimatedReplacementEndTime;
                }
            }
            // If we are here, there's at most one task attempt.
            if (numberRunningAttempts == 0)
            {
                return(NotRunning);
            }
            if (acceptableRuntime == long.MinValue)
            {
                acceptableRuntime = estimator.ThresholdRuntime(taskID);
                if (acceptableRuntime == long.MaxValue)
                {
                    return(OnSchedule);
                }
            }
            return(result);
        }
예제 #11
0
 // XXXX This class design assumes that the contents of AppContext.getAllJobs
 //   never changes.  Is that right?
 //
 // This assumption comes in in several places, mostly in data structure that
 //   can grow without limit if a AppContext gets new Job's when the old ones
 //   run out.  Also, these mapper statistics blocks won't cover the Job's
 //   we don't know about.
 public virtual void EnrollAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                   status, long timestamp)
 {
     startTimes[status.id] = timestamp;
 }
예제 #12
0
 public virtual void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                   status, long timestamp)
 {
 }
        public override void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                           status, long timestamp)
        {
            base.UpdateAttempt(status, timestamp);
            TaskAttemptId attemptID = status.id;
            TaskId        taskID    = attemptID.GetTaskId();
            JobId         jobID     = taskID.GetJobId();

            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(jobID);
            if (job == null)
            {
                return;
            }
            Task task = job.GetTask(taskID);

            if (task == null)
            {
                return;
            }
            TaskAttempt taskAttempt = task.GetAttempt(attemptID);

            if (taskAttempt == null)
            {
                return;
            }
            long boxedStart = startTimes[attemptID];
            long start      = boxedStart == null ? long.MinValue : boxedStart;

            // We need to do two things.
            //  1: If this is a completion, we accumulate statistics in the superclass
            //  2: If this is not a completion, we learn more about it.
            // This is not a completion, but we're cooking.
            //
            if (taskAttempt.GetState() == TaskAttemptState.Running)
            {
                // See if this task is already in the registry
                AtomicLong estimateContainer         = attemptRuntimeEstimates[taskAttempt];
                AtomicLong estimateVarianceContainer = attemptRuntimeEstimateVariances[taskAttempt
                                                       ];
                if (estimateContainer == null)
                {
                    if (attemptRuntimeEstimates[taskAttempt] == null)
                    {
                        attemptRuntimeEstimates[taskAttempt] = new AtomicLong();
                        estimateContainer = attemptRuntimeEstimates[taskAttempt];
                    }
                }
                if (estimateVarianceContainer == null)
                {
                    attemptRuntimeEstimateVariances.PutIfAbsent(taskAttempt, new AtomicLong());
                    estimateVarianceContainer = attemptRuntimeEstimateVariances[taskAttempt];
                }
                long estimate         = -1;
                long varianceEstimate = -1;
                // This code assumes that we'll never consider starting a third
                //  speculative task attempt if two are already running for this task
                if (start > 0 && timestamp > start)
                {
                    estimate         = (long)((timestamp - start) / Math.Max(0.0001, status.progress));
                    varianceEstimate = (long)(estimate * status.progress / 10);
                }
                if (estimateContainer != null)
                {
                    estimateContainer.Set(estimate);
                }
                if (estimateVarianceContainer != null)
                {
                    estimateVarianceContainer.Set(varianceEstimate);
                }
            }
        }
예제 #14
0
 // This will be implemented if we go to a model where the events are
 //  processed within the TaskAttempts' state transitions' code.
 public abstract void HandleAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                    status);
예제 #15
0
        public virtual void TestSpeculateSuccessfulWithoutUpdateEvents()
        {
            Clock           actualClock = new SystemClock();
            ControlledClock clock       = new ControlledClock(actualClock);

            clock.SetTime(Runtime.CurrentTimeMillis());
            MRApp app = new MRApp(NumMappers, NumReducers, false, "test", true, clock);

            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(new Configuration(),
                                                                        true, true);
            app.WaitForState(job, JobState.Running);
            IDictionary <TaskId, Task> tasks = job.GetTasks();

            NUnit.Framework.Assert.AreEqual("Num tasks is not correct", NumMappers + NumReducers
                                            , tasks.Count);
            IEnumerator <Task> taskIter = tasks.Values.GetEnumerator();

            while (taskIter.HasNext())
            {
                app.WaitForState(taskIter.Next(), TaskState.Running);
            }
            // Process the update events
            clock.SetTime(Runtime.CurrentTimeMillis() + 2000);
            EventHandler appEventHandler = app.GetContext().GetEventHandler();

            foreach (KeyValuePair <TaskId, Task> mapTask in tasks)
            {
                foreach (KeyValuePair <TaskAttemptId, TaskAttempt> taskAttempt in mapTask.Value.GetAttempts
                             ())
                {
                    TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = CreateTaskAttemptStatus(taskAttempt
                                                                                                    .Key, (float)0.8, TaskAttemptState.Running);
                    TaskAttemptStatusUpdateEvent @event = new TaskAttemptStatusUpdateEvent(taskAttempt
                                                                                           .Key, status);
                    appEventHandler.Handle(@event);
                }
            }
            Random generator = new Random();

            object[] taskValues         = Sharpen.Collections.ToArray(tasks.Values);
            Task     taskToBeSpeculated = (Task)taskValues[generator.Next(taskValues.Length)];

            // Other than one random task, finish every other task.
            foreach (KeyValuePair <TaskId, Task> mapTask_1 in tasks)
            {
                foreach (KeyValuePair <TaskAttemptId, TaskAttempt> taskAttempt in mapTask_1.Value.
                         GetAttempts())
                {
                    if (mapTask_1.Key != taskToBeSpeculated.GetID())
                    {
                        appEventHandler.Handle(new TaskAttemptEvent(taskAttempt.Key, TaskAttemptEventType
                                                                    .TaDone));
                        appEventHandler.Handle(new TaskAttemptEvent(taskAttempt.Key, TaskAttemptEventType
                                                                    .TaContainerCleaned));
                        app.WaitForState(taskAttempt.Value, TaskAttemptState.Succeeded);
                    }
                }
            }
            GenericTestUtils.WaitFor(new _Supplier_111(taskToBeSpeculated, clock), 1000, 60000
                                     );
            // finish 1st TA, 2nd will be killed
            TaskAttempt[] ta = MakeFirstAttemptWin(appEventHandler, taskToBeSpeculated);
            VerifySpeculationMessage(app, ta);
            app.WaitForState(Service.STATE.Stopped);
        }
예제 #16
0
        public virtual void TestSepculateSuccessfulWithUpdateEvents()
        {
            Clock           actualClock = new SystemClock();
            ControlledClock clock       = new ControlledClock(actualClock);

            clock.SetTime(Runtime.CurrentTimeMillis());
            MRApp app = new MRApp(NumMappers, NumReducers, false, "test", true, clock);

            Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(new Configuration(),
                                                                        true, true);
            app.WaitForState(job, JobState.Running);
            IDictionary <TaskId, Task> tasks = job.GetTasks();

            NUnit.Framework.Assert.AreEqual("Num tasks is not correct", NumMappers + NumReducers
                                            , tasks.Count);
            IEnumerator <Task> taskIter = tasks.Values.GetEnumerator();

            while (taskIter.HasNext())
            {
                app.WaitForState(taskIter.Next(), TaskState.Running);
            }
            // Process the update events
            clock.SetTime(Runtime.CurrentTimeMillis() + 1000);
            EventHandler appEventHandler = app.GetContext().GetEventHandler();

            foreach (KeyValuePair <TaskId, Task> mapTask in tasks)
            {
                foreach (KeyValuePair <TaskAttemptId, TaskAttempt> taskAttempt in mapTask.Value.GetAttempts
                             ())
                {
                    TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = CreateTaskAttemptStatus(taskAttempt
                                                                                                    .Key, (float)0.5, TaskAttemptState.Running);
                    TaskAttemptStatusUpdateEvent @event = new TaskAttemptStatusUpdateEvent(taskAttempt
                                                                                           .Key, status);
                    appEventHandler.Handle(@event);
                }
            }
            Task speculatedTask   = null;
            int  numTasksToFinish = NumMappers + NumReducers - 1;

            clock.SetTime(Runtime.CurrentTimeMillis() + 1000);
            foreach (KeyValuePair <TaskId, Task> task in tasks)
            {
                foreach (KeyValuePair <TaskAttemptId, TaskAttempt> taskAttempt in task.Value.GetAttempts
                             ())
                {
                    if (numTasksToFinish > 0)
                    {
                        appEventHandler.Handle(new TaskAttemptEvent(taskAttempt.Key, TaskAttemptEventType
                                                                    .TaDone));
                        appEventHandler.Handle(new TaskAttemptEvent(taskAttempt.Key, TaskAttemptEventType
                                                                    .TaContainerCleaned));
                        numTasksToFinish--;
                        app.WaitForState(taskAttempt.Value, TaskAttemptState.Succeeded);
                    }
                    else
                    {
                        // The last task is chosen for speculation
                        TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = CreateTaskAttemptStatus(taskAttempt
                                                                                                        .Key, (float)0.75, TaskAttemptState.Running);
                        speculatedTask = task.Value;
                        TaskAttemptStatusUpdateEvent @event = new TaskAttemptStatusUpdateEvent(taskAttempt
                                                                                               .Key, status);
                        appEventHandler.Handle(@event);
                    }
                }
            }
            clock.SetTime(Runtime.CurrentTimeMillis() + 15000);
            foreach (KeyValuePair <TaskId, Task> task_1 in tasks)
            {
                foreach (KeyValuePair <TaskAttemptId, TaskAttempt> taskAttempt in task_1.Value.GetAttempts
                             ())
                {
                    if (taskAttempt.Value.GetState() != TaskAttemptState.Succeeded)
                    {
                        TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = CreateTaskAttemptStatus(taskAttempt
                                                                                                        .Key, (float)0.75, TaskAttemptState.Running);
                        TaskAttemptStatusUpdateEvent @event = new TaskAttemptStatusUpdateEvent(taskAttempt
                                                                                               .Key, status);
                        appEventHandler.Handle(@event);
                    }
                }
            }
            Task speculatedTaskConst = speculatedTask;

            GenericTestUtils.WaitFor(new _Supplier_205(speculatedTaskConst, clock), 1000, 60000
                                     );
            TaskAttempt[] ta = MakeFirstAttemptWin(appEventHandler, speculatedTask);
            VerifySpeculationMessage(app, ta);
            app.WaitForState(Service.STATE.Stopped);
        }
예제 #17
0
        // this has to be at least as much as map slot requirement
        // This is a huge kluge.  The real implementations have a decent approach
        private void CoreTestEstimator(TaskRuntimeEstimator testedEstimator, int expectedSpeculations
                                       )
        {
            estimator  = testedEstimator;
            clock      = new TestRuntimeEstimators.MockClock();
            dispatcher = new AsyncDispatcher();
            myJob      = null;
            slotsInUse.Set(0);
            completedMaps.Set(0);
            completedReduces.Set(0);
            successfulSpeculations.Set(0);
            taskTimeSavedBySpeculation.Set(0);
            clock.AdvanceTime(1000);
            Configuration conf = new Configuration();

            myAppContext = new TestRuntimeEstimators.MyAppContext(this, MapTasks, ReduceTasks
                                                                  );
            myJob = myAppContext.GetAllJobs().Values.GetEnumerator().Next();
            estimator.Contextualize(conf, myAppContext);
            conf.SetLong(MRJobConfig.SpeculativeRetryAfterNoSpeculate, 500L);
            conf.SetLong(MRJobConfig.SpeculativeRetryAfterSpeculate, 5000L);
            conf.SetDouble(MRJobConfig.SpeculativecapRunningTasks, 0.1);
            conf.SetDouble(MRJobConfig.SpeculativecapTotalTasks, 0.001);
            conf.SetInt(MRJobConfig.SpeculativeMinimumAllowedTasks, 5);
            speculator = new DefaultSpeculator(conf, myAppContext, estimator, clock);
            NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_NO_SPECULATE value"
                                            , 500L, speculator.GetSoonestRetryAfterNoSpeculate());
            NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_SPECULATE value",
                                            5000L, speculator.GetSoonestRetryAfterSpeculate());
            NUnit.Framework.Assert.AreEqual(speculator.GetProportionRunningTasksSpeculatable(
                                                ), 0.1, 0.00001);
            NUnit.Framework.Assert.AreEqual(speculator.GetProportionTotalTasksSpeculatable(),
                                            0.001, 0.00001);
            NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_MINIMUM_ALLOWED_TASKS value",
                                            5, speculator.GetMinimumAllowedSpeculativeTasks());
            dispatcher.Register(typeof(Speculator.EventType), speculator);
            dispatcher.Register(typeof(TaskEventType), new TestRuntimeEstimators.SpeculationRequestEventHandler
                                    (this));
            dispatcher.Init(conf);
            dispatcher.Start();
            speculator.Init(conf);
            speculator.Start();
            // Now that the plumbing is hooked up, we do the following:
            //  do until all tasks are finished, ...
            //  1: If we have spare capacity, assign as many map tasks as we can, then
            //     assign as many reduce tasks as we can.  Note that an odd reduce
            //     task might be started while there are still map tasks, because
            //     map tasks take 3 slots and reduce tasks 2 slots.
            //  2: Send a speculation event for every task attempt that's running
            //  note that new attempts might get started by the speculator
            // discover undone tasks
            int undoneMaps    = MapTasks;
            int undoneReduces = ReduceTasks;
            // build a task sequence where all the maps precede any of the reduces
            IList <Task> allTasksSequence = new List <Task>();

            Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Map).Values);
            Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Reduce).Values
                                       );
            while (undoneMaps + undoneReduces > 0)
            {
                undoneMaps    = 0;
                undoneReduces = 0;
                // start all attempts which are new but for which there is enough slots
                foreach (Task task in allTasksSequence)
                {
                    if (!task.IsFinished())
                    {
                        if (task.GetType() == TaskType.Map)
                        {
                            ++undoneMaps;
                        }
                        else
                        {
                            ++undoneReduces;
                        }
                    }
                    foreach (TaskAttempt attempt in task.GetAttempts().Values)
                    {
                        if (attempt.GetState() == TaskAttemptState.New && InitialNumberFreeSlots - slotsInUse
                            .Get() >= TaskTypeSlots(task.GetType()))
                        {
                            TestRuntimeEstimators.MyTaskAttemptImpl attemptImpl = (TestRuntimeEstimators.MyTaskAttemptImpl
                                                                                   )attempt;
                            SpeculatorEvent @event = new SpeculatorEvent(attempt.GetID(), false, clock.GetTime
                                                                             ());
                            speculator.Handle(@event);
                            attemptImpl.StartUp();
                        }
                        else
                        {
                            // If a task attempt is in progress we should send the news to
                            // the Speculator.
                            TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                                                                        ();
                            status.id          = attempt.GetID();
                            status.progress    = attempt.GetProgress();
                            status.stateString = attempt.GetState().ToString();
                            status.taskState   = attempt.GetState();
                            SpeculatorEvent @event = new SpeculatorEvent(status, clock.GetTime());
                            speculator.Handle(@event);
                        }
                    }
                }
                long startTime = Runtime.CurrentTimeMillis();
                // drain the speculator event queue
                while (!speculator.EventQueueEmpty())
                {
                    Sharpen.Thread.Yield();
                    if (Runtime.CurrentTimeMillis() > startTime + 130000)
                    {
                        return;
                    }
                }
                clock.AdvanceTime(1000L);
                if (clock.GetTime() % 10000L == 0L)
                {
                    speculator.ScanForSpeculations();
                }
            }
            NUnit.Framework.Assert.AreEqual("We got the wrong number of successful speculations."
                                            , expectedSpeculations, successfulSpeculations.Get());
        }
예제 #18
0
 public SpeculatorEvent(TaskAttemptStatusUpdateEvent.TaskAttemptStatus reportedStatus
                        , long timestamp)
     : base(Speculator.EventType.AttemptStatusUpdate, timestamp)
 {
     this.reportedStatus = reportedStatus;
 }