public override void HandleAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus status) { long timestamp = clock.GetTime(); StatusUpdate(status, timestamp); }
/// <summary>Absorbs one TaskAttemptStatus</summary> /// <param name="reportedStatus"> /// the status report that we got from a task attempt /// that we want to fold into the speculation data for this job /// </param> /// <param name="timestamp"> /// the time this status corresponds to. This matters /// because statuses contain progress. /// </param> protected internal virtual void StatusUpdate(TaskAttemptStatusUpdateEvent.TaskAttemptStatus reportedStatus, long timestamp) { string stateString = reportedStatus.taskState.ToString(); TaskAttemptId attemptID = reportedStatus.id; TaskId taskID = attemptID.GetTaskId(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(taskID.GetJobId() ); if (job == null) { return; } Task task = job.GetTask(taskID); if (task == null) { return; } estimator.UpdateAttempt(reportedStatus, timestamp); if (stateString.Equals(TaskAttemptState.Running.ToString())) { runningTasks.PutIfAbsent(taskID, true); } else { runningTasks.Remove(taskID, true); if (!stateString.Equals(TaskAttemptState.Starting.ToString())) { Sharpen.Collections.Remove(runningTaskAttemptStatistics, attemptID); } } }
public SpeculatorEvent(TaskAttemptId attemptID, bool flag, long timestamp) : base(Speculator.EventType.AttemptStart, timestamp) { this.reportedStatus = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus(); this.reportedStatus.id = attemptID; this.taskID = attemptID.GetTaskId(); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual bool StatusUpdate(TaskAttemptID taskAttemptID, TaskStatus taskStatus ) { TaskAttemptId yarnAttemptID = TypeConverter.ToYarn(taskAttemptID); taskHeartbeatHandler.Progressing(yarnAttemptID); TaskAttemptStatusUpdateEvent.TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus (); taskAttemptStatus.id = yarnAttemptID; // Task sends the updated progress to the TT. taskAttemptStatus.progress = taskStatus.GetProgress(); Log.Info("Progress of TaskAttempt " + taskAttemptID + " is : " + taskStatus.GetProgress ()); // Task sends the updated state-string to the TT. taskAttemptStatus.stateString = taskStatus.GetStateString(); // Task sends the updated phase to the TT. taskAttemptStatus.phase = TypeConverter.ToYarn(taskStatus.GetPhase()); // Counters are updated by the task. Convert counters into new format as // that is the primary storage format inside the AM to avoid multiple // conversions and unnecessary heap usage. taskAttemptStatus.counters = new Counters(taskStatus.GetCounters()); // Map Finish time set by the task (map only) if (taskStatus.GetIsMap() && taskStatus.GetMapFinishTime() != 0) { taskAttemptStatus.mapFinishTime = taskStatus.GetMapFinishTime(); } // Shuffle Finish time set by the task (reduce only). if (!taskStatus.GetIsMap() && taskStatus.GetShuffleFinishTime() != 0) { taskAttemptStatus.shuffleFinishTime = taskStatus.GetShuffleFinishTime(); } // Sort finish time set by the task (reduce only). if (!taskStatus.GetIsMap() && taskStatus.GetSortFinishTime() != 0) { taskAttemptStatus.sortFinishTime = taskStatus.GetSortFinishTime(); } // Not Setting the task state. Used by speculation - will be set in TaskAttemptImpl //taskAttemptStatus.taskState = TypeConverter.toYarn(taskStatus.getRunState()); //set the fetch failures if (taskStatus.GetFetchFailedMaps() != null && taskStatus.GetFetchFailedMaps().Count > 0) { taskAttemptStatus.fetchFailedMaps = new AList <TaskAttemptId>(); foreach (TaskAttemptID failedMapId in taskStatus.GetFetchFailedMaps()) { taskAttemptStatus.fetchFailedMaps.AddItem(TypeConverter.ToYarn(failedMapId)); } } // Task sends the information about the nextRecordRange to the TT // TODO: The following are not needed here, but needed to be set somewhere inside AppMaster. // taskStatus.getRunState(); // Set by the TT/JT. Transform into a state TODO // taskStatus.getStartTime(); // Used to be set by the TaskTracker. This should be set by getTask(). // taskStatus.getFinishTime(); // Used to be set by TT/JT. Should be set when task finishes // // This was used by TT to do counter updates only once every minute. So this // // isn't ever changed by the Task itself. // taskStatus.getIncludeCounters(); context.GetEventHandler().Handle(new TaskAttemptStatusUpdateEvent(taskAttemptStatus .id, taskAttemptStatus)); return(true); }
public override void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus status, long timestamp) { base.UpdateAttempt(status, timestamp); TaskAttemptId attemptID = status.id; float progress = status.progress; IncorporateReading(attemptID, progress, timestamp); }
// The speculative attempt may be not killed before the MR job succeeds. private TaskAttemptStatusUpdateEvent.TaskAttemptStatus CreateTaskAttemptStatus(TaskAttemptId id, float progress, TaskAttemptState state) { TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus (); status.id = id; status.progress = progress; status.taskState = state; return(status); }
public virtual void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus status, long timestamp) { TaskAttemptId attemptID = status.id; TaskId taskID = attemptID.GetTaskId(); JobId jobID = taskID.GetJobId(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(jobID); if (job == null) { return; } Task task = job.GetTask(taskID); if (task == null) { return; } long boxedStart = startTimes[attemptID]; long start = boxedStart == null ? long.MinValue : boxedStart; TaskAttempt taskAttempt = task.GetAttempt(attemptID); if (taskAttempt.GetState() == TaskAttemptState.Succeeded) { bool isNew = false; // is this a new success? lock (doneTasks) { if (!doneTasks.Contains(task)) { doneTasks.AddItem(task); isNew = true; } } // It's a new completion // Note that if a task completes twice [because of a previous speculation // and a race, or a success followed by loss of the machine with the // local data] we only count the first one. if (isNew) { long finish = timestamp; if (start > 1L && finish > 1L && start <= finish) { long duration = finish - start; DataStatistics statistics = DataStatisticsForTask(taskID); if (statistics != null) { statistics.Add(duration); } } } } }
private void UpdateStatus(MRApp app, TaskAttempt attempt, Phase phase) { TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus (); status.counters = new Counters(); status.fetchFailedMaps = new AList <TaskAttemptId>(); status.id = attempt.GetID(); status.mapFinishTime = 0; status.phase = phase; status.progress = 0.5f; status.shuffleFinishTime = 0; status.sortFinishTime = 0; status.stateString = "OK"; status.taskState = attempt.GetState(); TaskAttemptStatusUpdateEvent @event = new TaskAttemptStatusUpdateEvent(attempt.GetID (), status); app.GetContext().GetEventHandler().Handle(@event); }
public virtual void Test() { TestMRClientService.MRAppWithClientService app = new TestMRClientService.MRAppWithClientService (this, 1, 0, false); Configuration conf = new Configuration(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(conf); app.WaitForState(job, JobState.Running); NUnit.Framework.Assert.AreEqual("Num tasks not correct", 1, job.GetTasks().Count); IEnumerator <Task> it = job.GetTasks().Values.GetEnumerator(); Task task = it.Next(); app.WaitForState(task, TaskState.Running); TaskAttempt attempt = task.GetAttempts().Values.GetEnumerator().Next(); app.WaitForState(attempt, TaskAttemptState.Running); // send the diagnostic string diagnostic1 = "Diagnostic1"; string diagnostic2 = "Diagnostic2"; app.GetContext().GetEventHandler().Handle(new TaskAttemptDiagnosticsUpdateEvent(attempt .GetID(), diagnostic1)); // send the status update TaskAttemptStatusUpdateEvent.TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus (); taskAttemptStatus.id = attempt.GetID(); taskAttemptStatus.progress = 0.5f; taskAttemptStatus.stateString = "RUNNING"; taskAttemptStatus.taskState = TaskAttemptState.Running; taskAttemptStatus.phase = Phase.Map; // send the status update app.GetContext().GetEventHandler().Handle(new TaskAttemptStatusUpdateEvent(attempt .GetID(), taskAttemptStatus)); //verify that all object are fully populated by invoking RPCs. YarnRPC rpc = YarnRPC.Create(conf); MRClientProtocol proxy = (MRClientProtocol)rpc.GetProxy(typeof(MRClientProtocol), app.clientService.GetBindAddress(), conf); GetCountersRequest gcRequest = recordFactory.NewRecordInstance <GetCountersRequest >(); gcRequest.SetJobId(job.GetID()); NUnit.Framework.Assert.IsNotNull("Counters is null", proxy.GetCounters(gcRequest) .GetCounters()); GetJobReportRequest gjrRequest = recordFactory.NewRecordInstance <GetJobReportRequest >(); gjrRequest.SetJobId(job.GetID()); JobReport jr = proxy.GetJobReport(gjrRequest).GetJobReport(); VerifyJobReport(jr); GetTaskAttemptCompletionEventsRequest gtaceRequest = recordFactory.NewRecordInstance <GetTaskAttemptCompletionEventsRequest>(); gtaceRequest.SetJobId(job.GetID()); gtaceRequest.SetFromEventId(0); gtaceRequest.SetMaxEvents(10); NUnit.Framework.Assert.IsNotNull("TaskCompletionEvents is null", proxy.GetTaskAttemptCompletionEvents (gtaceRequest).GetCompletionEventList()); GetDiagnosticsRequest gdRequest = recordFactory.NewRecordInstance <GetDiagnosticsRequest >(); gdRequest.SetTaskAttemptId(attempt.GetID()); NUnit.Framework.Assert.IsNotNull("Diagnostics is null", proxy.GetDiagnostics(gdRequest ).GetDiagnosticsList()); GetTaskAttemptReportRequest gtarRequest = recordFactory.NewRecordInstance <GetTaskAttemptReportRequest >(); gtarRequest.SetTaskAttemptId(attempt.GetID()); TaskAttemptReport tar = proxy.GetTaskAttemptReport(gtarRequest).GetTaskAttemptReport (); VerifyTaskAttemptReport(tar); GetTaskReportRequest gtrRequest = recordFactory.NewRecordInstance <GetTaskReportRequest >(); gtrRequest.SetTaskId(task.GetID()); NUnit.Framework.Assert.IsNotNull("TaskReport is null", proxy.GetTaskReport(gtrRequest ).GetTaskReport()); GetTaskReportsRequest gtreportsRequest = recordFactory.NewRecordInstance <GetTaskReportsRequest >(); gtreportsRequest.SetJobId(job.GetID()); gtreportsRequest.SetTaskType(TaskType.Map); NUnit.Framework.Assert.IsNotNull("TaskReports for map is null", proxy.GetTaskReports (gtreportsRequest).GetTaskReportList()); gtreportsRequest = recordFactory.NewRecordInstance <GetTaskReportsRequest>(); gtreportsRequest.SetJobId(job.GetID()); gtreportsRequest.SetTaskType(TaskType.Reduce); NUnit.Framework.Assert.IsNotNull("TaskReports for reduce is null", proxy.GetTaskReports (gtreportsRequest).GetTaskReportList()); IList <string> diag = proxy.GetDiagnostics(gdRequest).GetDiagnosticsList(); NUnit.Framework.Assert.AreEqual("Num diagnostics not correct", 1, diag.Count); NUnit.Framework.Assert.AreEqual("Diag 1 not correct", diagnostic1, diag[0].ToString ()); TaskReport taskReport = proxy.GetTaskReport(gtrRequest).GetTaskReport(); NUnit.Framework.Assert.AreEqual("Num diagnostics not correct", 1, taskReport.GetDiagnosticsCount ()); //send the done signal to the task app.GetContext().GetEventHandler().Handle(new TaskAttemptEvent(task.GetAttempts() .Values.GetEnumerator().Next().GetID(), TaskAttemptEventType.TaDone)); app.WaitForState(job, JobState.Succeeded); // For invalid jobid, throw IOException gtreportsRequest = recordFactory.NewRecordInstance <GetTaskReportsRequest>(); gtreportsRequest.SetJobId(TypeConverter.ToYarn(JobID.ForName("job_1415730144495_0001" ))); gtreportsRequest.SetTaskType(TaskType.Reduce); try { proxy.GetTaskReports(gtreportsRequest); NUnit.Framework.Assert.Fail("IOException not thrown for invalid job id"); } catch (IOException) { } }
/* ************************************************************* */ // This is the code section that runs periodically and adds speculations for // those jobs that need them. // This can return a few magic values for tasks that shouldn't speculate: // returns ON_SCHEDULE if thresholdRuntime(taskID) says that we should not // considering speculating this task // returns ALREADY_SPECULATING if that is true. This has priority. // returns TOO_NEW if our companion task hasn't gotten any information // returns PROGRESS_IS_GOOD if the task is sailing through // returns NOT_RUNNING if the task is not running // // All of these values are negative. Any value that should be allowed to // speculate is 0 or positive. private long SpeculationValue(TaskId taskID, long now) { Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(taskID.GetJobId() ); Task task = job.GetTask(taskID); IDictionary <TaskAttemptId, TaskAttempt> attempts = task.GetAttempts(); long acceptableRuntime = long.MinValue; long result = long.MinValue; if (!mayHaveSpeculated.Contains(taskID)) { acceptableRuntime = estimator.ThresholdRuntime(taskID); if (acceptableRuntime == long.MaxValue) { return(OnSchedule); } } TaskAttemptId runningTaskAttemptID = null; int numberRunningAttempts = 0; foreach (TaskAttempt taskAttempt in attempts.Values) { if (taskAttempt.GetState() == TaskAttemptState.Running || taskAttempt.GetState() == TaskAttemptState.Starting) { if (++numberRunningAttempts > 1) { return(AlreadySpeculating); } runningTaskAttemptID = taskAttempt.GetID(); long estimatedRunTime = estimator.EstimatedRuntime(runningTaskAttemptID); long taskAttemptStartTime = estimator.AttemptEnrolledTime(runningTaskAttemptID); if (taskAttemptStartTime > now) { // This background process ran before we could process the task // attempt status change that chronicles the attempt start return(TooNew); } long estimatedEndTime = estimatedRunTime + taskAttemptStartTime; long estimatedReplacementEndTime = now + estimator.EstimatedNewAttemptRuntime(taskID ); float progress = taskAttempt.GetProgress(); DefaultSpeculator.TaskAttemptHistoryStatistics data = runningTaskAttemptStatistics [runningTaskAttemptID]; if (data == null) { runningTaskAttemptStatistics[runningTaskAttemptID] = new DefaultSpeculator.TaskAttemptHistoryStatistics (estimatedRunTime, progress, now); } else { if (estimatedRunTime == data.GetEstimatedRunTime() && progress == data.GetProgress ()) { // Previous stats are same as same stats if (data.NotHeartbeatedInAWhile(now)) { // Stats have stagnated for a while, simulate heart-beat. TaskAttemptStatusUpdateEvent.TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus (); taskAttemptStatus.id = runningTaskAttemptID; taskAttemptStatus.progress = progress; taskAttemptStatus.taskState = taskAttempt.GetState(); // Now simulate the heart-beat HandleAttempt(taskAttemptStatus); } } else { // Stats have changed - update our data structure data.SetEstimatedRunTime(estimatedRunTime); data.SetProgress(progress); data.ResetHeartBeatTime(now); } } if (estimatedEndTime < now) { return(ProgressIsGood); } if (estimatedReplacementEndTime >= estimatedEndTime) { return(TooLateToSpeculate); } result = estimatedEndTime - estimatedReplacementEndTime; } } // If we are here, there's at most one task attempt. if (numberRunningAttempts == 0) { return(NotRunning); } if (acceptableRuntime == long.MinValue) { acceptableRuntime = estimator.ThresholdRuntime(taskID); if (acceptableRuntime == long.MaxValue) { return(OnSchedule); } } return(result); }
// XXXX This class design assumes that the contents of AppContext.getAllJobs // never changes. Is that right? // // This assumption comes in in several places, mostly in data structure that // can grow without limit if a AppContext gets new Job's when the old ones // run out. Also, these mapper statistics blocks won't cover the Job's // we don't know about. public virtual void EnrollAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus status, long timestamp) { startTimes[status.id] = timestamp; }
public virtual void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus status, long timestamp) { }
public override void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus status, long timestamp) { base.UpdateAttempt(status, timestamp); TaskAttemptId attemptID = status.id; TaskId taskID = attemptID.GetTaskId(); JobId jobID = taskID.GetJobId(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(jobID); if (job == null) { return; } Task task = job.GetTask(taskID); if (task == null) { return; } TaskAttempt taskAttempt = task.GetAttempt(attemptID); if (taskAttempt == null) { return; } long boxedStart = startTimes[attemptID]; long start = boxedStart == null ? long.MinValue : boxedStart; // We need to do two things. // 1: If this is a completion, we accumulate statistics in the superclass // 2: If this is not a completion, we learn more about it. // This is not a completion, but we're cooking. // if (taskAttempt.GetState() == TaskAttemptState.Running) { // See if this task is already in the registry AtomicLong estimateContainer = attemptRuntimeEstimates[taskAttempt]; AtomicLong estimateVarianceContainer = attemptRuntimeEstimateVariances[taskAttempt ]; if (estimateContainer == null) { if (attemptRuntimeEstimates[taskAttempt] == null) { attemptRuntimeEstimates[taskAttempt] = new AtomicLong(); estimateContainer = attemptRuntimeEstimates[taskAttempt]; } } if (estimateVarianceContainer == null) { attemptRuntimeEstimateVariances.PutIfAbsent(taskAttempt, new AtomicLong()); estimateVarianceContainer = attemptRuntimeEstimateVariances[taskAttempt]; } long estimate = -1; long varianceEstimate = -1; // This code assumes that we'll never consider starting a third // speculative task attempt if two are already running for this task if (start > 0 && timestamp > start) { estimate = (long)((timestamp - start) / Math.Max(0.0001, status.progress)); varianceEstimate = (long)(estimate * status.progress / 10); } if (estimateContainer != null) { estimateContainer.Set(estimate); } if (estimateVarianceContainer != null) { estimateVarianceContainer.Set(varianceEstimate); } } }
// This will be implemented if we go to a model where the events are // processed within the TaskAttempts' state transitions' code. public abstract void HandleAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus status);
public virtual void TestSpeculateSuccessfulWithoutUpdateEvents() { Clock actualClock = new SystemClock(); ControlledClock clock = new ControlledClock(actualClock); clock.SetTime(Runtime.CurrentTimeMillis()); MRApp app = new MRApp(NumMappers, NumReducers, false, "test", true, clock); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(new Configuration(), true, true); app.WaitForState(job, JobState.Running); IDictionary <TaskId, Task> tasks = job.GetTasks(); NUnit.Framework.Assert.AreEqual("Num tasks is not correct", NumMappers + NumReducers , tasks.Count); IEnumerator <Task> taskIter = tasks.Values.GetEnumerator(); while (taskIter.HasNext()) { app.WaitForState(taskIter.Next(), TaskState.Running); } // Process the update events clock.SetTime(Runtime.CurrentTimeMillis() + 2000); EventHandler appEventHandler = app.GetContext().GetEventHandler(); foreach (KeyValuePair <TaskId, Task> mapTask in tasks) { foreach (KeyValuePair <TaskAttemptId, TaskAttempt> taskAttempt in mapTask.Value.GetAttempts ()) { TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = CreateTaskAttemptStatus(taskAttempt .Key, (float)0.8, TaskAttemptState.Running); TaskAttemptStatusUpdateEvent @event = new TaskAttemptStatusUpdateEvent(taskAttempt .Key, status); appEventHandler.Handle(@event); } } Random generator = new Random(); object[] taskValues = Sharpen.Collections.ToArray(tasks.Values); Task taskToBeSpeculated = (Task)taskValues[generator.Next(taskValues.Length)]; // Other than one random task, finish every other task. foreach (KeyValuePair <TaskId, Task> mapTask_1 in tasks) { foreach (KeyValuePair <TaskAttemptId, TaskAttempt> taskAttempt in mapTask_1.Value. GetAttempts()) { if (mapTask_1.Key != taskToBeSpeculated.GetID()) { appEventHandler.Handle(new TaskAttemptEvent(taskAttempt.Key, TaskAttemptEventType .TaDone)); appEventHandler.Handle(new TaskAttemptEvent(taskAttempt.Key, TaskAttemptEventType .TaContainerCleaned)); app.WaitForState(taskAttempt.Value, TaskAttemptState.Succeeded); } } } GenericTestUtils.WaitFor(new _Supplier_111(taskToBeSpeculated, clock), 1000, 60000 ); // finish 1st TA, 2nd will be killed TaskAttempt[] ta = MakeFirstAttemptWin(appEventHandler, taskToBeSpeculated); VerifySpeculationMessage(app, ta); app.WaitForState(Service.STATE.Stopped); }
public virtual void TestSepculateSuccessfulWithUpdateEvents() { Clock actualClock = new SystemClock(); ControlledClock clock = new ControlledClock(actualClock); clock.SetTime(Runtime.CurrentTimeMillis()); MRApp app = new MRApp(NumMappers, NumReducers, false, "test", true, clock); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.Submit(new Configuration(), true, true); app.WaitForState(job, JobState.Running); IDictionary <TaskId, Task> tasks = job.GetTasks(); NUnit.Framework.Assert.AreEqual("Num tasks is not correct", NumMappers + NumReducers , tasks.Count); IEnumerator <Task> taskIter = tasks.Values.GetEnumerator(); while (taskIter.HasNext()) { app.WaitForState(taskIter.Next(), TaskState.Running); } // Process the update events clock.SetTime(Runtime.CurrentTimeMillis() + 1000); EventHandler appEventHandler = app.GetContext().GetEventHandler(); foreach (KeyValuePair <TaskId, Task> mapTask in tasks) { foreach (KeyValuePair <TaskAttemptId, TaskAttempt> taskAttempt in mapTask.Value.GetAttempts ()) { TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = CreateTaskAttemptStatus(taskAttempt .Key, (float)0.5, TaskAttemptState.Running); TaskAttemptStatusUpdateEvent @event = new TaskAttemptStatusUpdateEvent(taskAttempt .Key, status); appEventHandler.Handle(@event); } } Task speculatedTask = null; int numTasksToFinish = NumMappers + NumReducers - 1; clock.SetTime(Runtime.CurrentTimeMillis() + 1000); foreach (KeyValuePair <TaskId, Task> task in tasks) { foreach (KeyValuePair <TaskAttemptId, TaskAttempt> taskAttempt in task.Value.GetAttempts ()) { if (numTasksToFinish > 0) { appEventHandler.Handle(new TaskAttemptEvent(taskAttempt.Key, TaskAttemptEventType .TaDone)); appEventHandler.Handle(new TaskAttemptEvent(taskAttempt.Key, TaskAttemptEventType .TaContainerCleaned)); numTasksToFinish--; app.WaitForState(taskAttempt.Value, TaskAttemptState.Succeeded); } else { // The last task is chosen for speculation TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = CreateTaskAttemptStatus(taskAttempt .Key, (float)0.75, TaskAttemptState.Running); speculatedTask = task.Value; TaskAttemptStatusUpdateEvent @event = new TaskAttemptStatusUpdateEvent(taskAttempt .Key, status); appEventHandler.Handle(@event); } } } clock.SetTime(Runtime.CurrentTimeMillis() + 15000); foreach (KeyValuePair <TaskId, Task> task_1 in tasks) { foreach (KeyValuePair <TaskAttemptId, TaskAttempt> taskAttempt in task_1.Value.GetAttempts ()) { if (taskAttempt.Value.GetState() != TaskAttemptState.Succeeded) { TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = CreateTaskAttemptStatus(taskAttempt .Key, (float)0.75, TaskAttemptState.Running); TaskAttemptStatusUpdateEvent @event = new TaskAttemptStatusUpdateEvent(taskAttempt .Key, status); appEventHandler.Handle(@event); } } } Task speculatedTaskConst = speculatedTask; GenericTestUtils.WaitFor(new _Supplier_205(speculatedTaskConst, clock), 1000, 60000 ); TaskAttempt[] ta = MakeFirstAttemptWin(appEventHandler, speculatedTask); VerifySpeculationMessage(app, ta); app.WaitForState(Service.STATE.Stopped); }
// this has to be at least as much as map slot requirement // This is a huge kluge. The real implementations have a decent approach private void CoreTestEstimator(TaskRuntimeEstimator testedEstimator, int expectedSpeculations ) { estimator = testedEstimator; clock = new TestRuntimeEstimators.MockClock(); dispatcher = new AsyncDispatcher(); myJob = null; slotsInUse.Set(0); completedMaps.Set(0); completedReduces.Set(0); successfulSpeculations.Set(0); taskTimeSavedBySpeculation.Set(0); clock.AdvanceTime(1000); Configuration conf = new Configuration(); myAppContext = new TestRuntimeEstimators.MyAppContext(this, MapTasks, ReduceTasks ); myJob = myAppContext.GetAllJobs().Values.GetEnumerator().Next(); estimator.Contextualize(conf, myAppContext); conf.SetLong(MRJobConfig.SpeculativeRetryAfterNoSpeculate, 500L); conf.SetLong(MRJobConfig.SpeculativeRetryAfterSpeculate, 5000L); conf.SetDouble(MRJobConfig.SpeculativecapRunningTasks, 0.1); conf.SetDouble(MRJobConfig.SpeculativecapTotalTasks, 0.001); conf.SetInt(MRJobConfig.SpeculativeMinimumAllowedTasks, 5); speculator = new DefaultSpeculator(conf, myAppContext, estimator, clock); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_NO_SPECULATE value" , 500L, speculator.GetSoonestRetryAfterNoSpeculate()); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_SPECULATE value", 5000L, speculator.GetSoonestRetryAfterSpeculate()); NUnit.Framework.Assert.AreEqual(speculator.GetProportionRunningTasksSpeculatable( ), 0.1, 0.00001); NUnit.Framework.Assert.AreEqual(speculator.GetProportionTotalTasksSpeculatable(), 0.001, 0.00001); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_MINIMUM_ALLOWED_TASKS value", 5, speculator.GetMinimumAllowedSpeculativeTasks()); dispatcher.Register(typeof(Speculator.EventType), speculator); dispatcher.Register(typeof(TaskEventType), new TestRuntimeEstimators.SpeculationRequestEventHandler (this)); dispatcher.Init(conf); dispatcher.Start(); speculator.Init(conf); speculator.Start(); // Now that the plumbing is hooked up, we do the following: // do until all tasks are finished, ... // 1: If we have spare capacity, assign as many map tasks as we can, then // assign as many reduce tasks as we can. Note that an odd reduce // task might be started while there are still map tasks, because // map tasks take 3 slots and reduce tasks 2 slots. // 2: Send a speculation event for every task attempt that's running // note that new attempts might get started by the speculator // discover undone tasks int undoneMaps = MapTasks; int undoneReduces = ReduceTasks; // build a task sequence where all the maps precede any of the reduces IList <Task> allTasksSequence = new List <Task>(); Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Map).Values); Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Reduce).Values ); while (undoneMaps + undoneReduces > 0) { undoneMaps = 0; undoneReduces = 0; // start all attempts which are new but for which there is enough slots foreach (Task task in allTasksSequence) { if (!task.IsFinished()) { if (task.GetType() == TaskType.Map) { ++undoneMaps; } else { ++undoneReduces; } } foreach (TaskAttempt attempt in task.GetAttempts().Values) { if (attempt.GetState() == TaskAttemptState.New && InitialNumberFreeSlots - slotsInUse .Get() >= TaskTypeSlots(task.GetType())) { TestRuntimeEstimators.MyTaskAttemptImpl attemptImpl = (TestRuntimeEstimators.MyTaskAttemptImpl )attempt; SpeculatorEvent @event = new SpeculatorEvent(attempt.GetID(), false, clock.GetTime ()); speculator.Handle(@event); attemptImpl.StartUp(); } else { // If a task attempt is in progress we should send the news to // the Speculator. TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus (); status.id = attempt.GetID(); status.progress = attempt.GetProgress(); status.stateString = attempt.GetState().ToString(); status.taskState = attempt.GetState(); SpeculatorEvent @event = new SpeculatorEvent(status, clock.GetTime()); speculator.Handle(@event); } } } long startTime = Runtime.CurrentTimeMillis(); // drain the speculator event queue while (!speculator.EventQueueEmpty()) { Sharpen.Thread.Yield(); if (Runtime.CurrentTimeMillis() > startTime + 130000) { return; } } clock.AdvanceTime(1000L); if (clock.GetTime() % 10000L == 0L) { speculator.ScanForSpeculations(); } } NUnit.Framework.Assert.AreEqual("We got the wrong number of successful speculations." , expectedSpeculations, successfulSpeculations.Get()); }
public SpeculatorEvent(TaskAttemptStatusUpdateEvent.TaskAttemptStatus reportedStatus , long timestamp) : base(Speculator.EventType.AttemptStatusUpdate, timestamp) { this.reportedStatus = reportedStatus; }