private void ProcessSpeculatorEvent(SpeculatorEvent @event) { lock (this) { switch (@event.GetType()) { case Speculator.EventType.AttemptStatusUpdate: { StatusUpdate(@event.GetReportedStatus(), @event.GetTimestamp()); break; } case Speculator.EventType.TaskContainerNeedUpdate: { AtomicInteger need = ContainerNeed(@event.GetTaskID()); need.AddAndGet(@event.ContainersNeededChange()); break; } case Speculator.EventType.AttemptStart: { Log.Info("ATTEMPT_START " + @event.GetTaskID()); estimator.EnrollAttempt(@event.GetReportedStatus(), @event.GetTimestamp()); break; } case Speculator.EventType.JobCreate: { Log.Info("JOB_CREATE " + @event.GetJobID()); estimator.Contextualize(GetConfig(), context); break; } } } }
// Used to track any TaskAttempts that aren't heart-beating for a while, so // that we can aggressively speculate instead of waiting for task-timeout. // Regular heartbeat from tasks is every 3 secs. So if we don't get a // heartbeat in 9 secs (3 heartbeats), we simulate a heartbeat with no change // in progress. // These are the current needs, not the initial needs. For each job, these // record the number of attempts that exist and that are actively // waiting for a container [as opposed to running or finished] private static TaskRuntimeEstimator GetEstimator(Configuration conf, AppContext context ) { TaskRuntimeEstimator estimator; try { // "yarn.mapreduce.job.task.runtime.estimator.class" Type estimatorClass = conf.GetClass <TaskRuntimeEstimator>(MRJobConfig.MrAmTaskEstimator , typeof(LegacyTaskRuntimeEstimator)); Constructor <TaskRuntimeEstimator> estimatorConstructor = estimatorClass.GetConstructor (); estimator = estimatorConstructor.NewInstance(); estimator.Contextualize(conf, context); } catch (InstantiationException ex) { Log.Error("Can't make a speculation runtime estimator", ex); throw new YarnRuntimeException(ex); } catch (MemberAccessException ex) { Log.Error("Can't make a speculation runtime estimator", ex); throw new YarnRuntimeException(ex); } catch (TargetInvocationException ex) { Log.Error("Can't make a speculation runtime estimator", ex); throw new YarnRuntimeException(ex); } catch (MissingMethodException ex) { Log.Error("Can't make a speculation runtime estimator", ex); throw new YarnRuntimeException(ex); } return(estimator); }
// this has to be at least as much as map slot requirement // This is a huge kluge. The real implementations have a decent approach private void CoreTestEstimator(TaskRuntimeEstimator testedEstimator, int expectedSpeculations ) { estimator = testedEstimator; clock = new TestRuntimeEstimators.MockClock(); dispatcher = new AsyncDispatcher(); myJob = null; slotsInUse.Set(0); completedMaps.Set(0); completedReduces.Set(0); successfulSpeculations.Set(0); taskTimeSavedBySpeculation.Set(0); clock.AdvanceTime(1000); Configuration conf = new Configuration(); myAppContext = new TestRuntimeEstimators.MyAppContext(this, MapTasks, ReduceTasks ); myJob = myAppContext.GetAllJobs().Values.GetEnumerator().Next(); estimator.Contextualize(conf, myAppContext); conf.SetLong(MRJobConfig.SpeculativeRetryAfterNoSpeculate, 500L); conf.SetLong(MRJobConfig.SpeculativeRetryAfterSpeculate, 5000L); conf.SetDouble(MRJobConfig.SpeculativecapRunningTasks, 0.1); conf.SetDouble(MRJobConfig.SpeculativecapTotalTasks, 0.001); conf.SetInt(MRJobConfig.SpeculativeMinimumAllowedTasks, 5); speculator = new DefaultSpeculator(conf, myAppContext, estimator, clock); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_NO_SPECULATE value" , 500L, speculator.GetSoonestRetryAfterNoSpeculate()); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_SPECULATE value", 5000L, speculator.GetSoonestRetryAfterSpeculate()); NUnit.Framework.Assert.AreEqual(speculator.GetProportionRunningTasksSpeculatable( ), 0.1, 0.00001); NUnit.Framework.Assert.AreEqual(speculator.GetProportionTotalTasksSpeculatable(), 0.001, 0.00001); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_MINIMUM_ALLOWED_TASKS value", 5, speculator.GetMinimumAllowedSpeculativeTasks()); dispatcher.Register(typeof(Speculator.EventType), speculator); dispatcher.Register(typeof(TaskEventType), new TestRuntimeEstimators.SpeculationRequestEventHandler (this)); dispatcher.Init(conf); dispatcher.Start(); speculator.Init(conf); speculator.Start(); // Now that the plumbing is hooked up, we do the following: // do until all tasks are finished, ... // 1: If we have spare capacity, assign as many map tasks as we can, then // assign as many reduce tasks as we can. Note that an odd reduce // task might be started while there are still map tasks, because // map tasks take 3 slots and reduce tasks 2 slots. // 2: Send a speculation event for every task attempt that's running // note that new attempts might get started by the speculator // discover undone tasks int undoneMaps = MapTasks; int undoneReduces = ReduceTasks; // build a task sequence where all the maps precede any of the reduces IList <Task> allTasksSequence = new List <Task>(); Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Map).Values); Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Reduce).Values ); while (undoneMaps + undoneReduces > 0) { undoneMaps = 0; undoneReduces = 0; // start all attempts which are new but for which there is enough slots foreach (Task task in allTasksSequence) { if (!task.IsFinished()) { if (task.GetType() == TaskType.Map) { ++undoneMaps; } else { ++undoneReduces; } } foreach (TaskAttempt attempt in task.GetAttempts().Values) { if (attempt.GetState() == TaskAttemptState.New && InitialNumberFreeSlots - slotsInUse .Get() >= TaskTypeSlots(task.GetType())) { TestRuntimeEstimators.MyTaskAttemptImpl attemptImpl = (TestRuntimeEstimators.MyTaskAttemptImpl )attempt; SpeculatorEvent @event = new SpeculatorEvent(attempt.GetID(), false, clock.GetTime ()); speculator.Handle(@event); attemptImpl.StartUp(); } else { // If a task attempt is in progress we should send the news to // the Speculator. TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus (); status.id = attempt.GetID(); status.progress = attempt.GetProgress(); status.stateString = attempt.GetState().ToString(); status.taskState = attempt.GetState(); SpeculatorEvent @event = new SpeculatorEvent(status, clock.GetTime()); speculator.Handle(@event); } } } long startTime = Runtime.CurrentTimeMillis(); // drain the speculator event queue while (!speculator.EventQueueEmpty()) { Sharpen.Thread.Yield(); if (Runtime.CurrentTimeMillis() > startTime + 130000) { return; } } clock.AdvanceTime(1000L); if (clock.GetTime() % 10000L == 0L) { speculator.ScanForSpeculations(); } } NUnit.Framework.Assert.AreEqual("We got the wrong number of successful speculations." , expectedSpeculations, successfulSpeculations.Get()); }