// Used to track any TaskAttempts that aren't heart-beating for a while, so // that we can aggressively speculate instead of waiting for task-timeout. // Regular heartbeat from tasks is every 3 secs. So if we don't get a // heartbeat in 9 secs (3 heartbeats), we simulate a heartbeat with no change // in progress. // These are the current needs, not the initial needs. For each job, these // record the number of attempts that exist and that are actively // waiting for a container [as opposed to running or finished] private static TaskRuntimeEstimator GetEstimator(Configuration conf, AppContext context ) { TaskRuntimeEstimator estimator; try { // "yarn.mapreduce.job.task.runtime.estimator.class" Type estimatorClass = conf.GetClass <TaskRuntimeEstimator>(MRJobConfig.MrAmTaskEstimator , typeof(LegacyTaskRuntimeEstimator)); Constructor <TaskRuntimeEstimator> estimatorConstructor = estimatorClass.GetConstructor (); estimator = estimatorConstructor.NewInstance(); estimator.Contextualize(conf, context); } catch (InstantiationException ex) { Log.Error("Can't make a speculation runtime estimator", ex); throw new YarnRuntimeException(ex); } catch (MemberAccessException ex) { Log.Error("Can't make a speculation runtime estimator", ex); throw new YarnRuntimeException(ex); } catch (TargetInvocationException ex) { Log.Error("Can't make a speculation runtime estimator", ex); throw new YarnRuntimeException(ex); } catch (MissingMethodException ex) { Log.Error("Can't make a speculation runtime estimator", ex); throw new YarnRuntimeException(ex); } return(estimator); }
public DefaultSpeculator(Configuration conf, AppContext context, TaskRuntimeEstimator estimator, Clock clock) : base(typeof(Org.Apache.Hadoop.Mapreduce.V2.App.Speculate.DefaultSpeculator).FullName ) { // This constructor is designed to be called by other constructors. // However, it's public because we do use it in the test cases. // Normally we figure out our own estimator. this.conf = conf; this.context = context; this.estimator = estimator; this.clock = clock; this.eventHandler = context.GetEventHandler(); this.soonestRetryAfterNoSpeculate = conf.GetLong(MRJobConfig.SpeculativeRetryAfterNoSpeculate , MRJobConfig.DefaultSpeculativeRetryAfterNoSpeculate); this.soonestRetryAfterSpeculate = conf.GetLong(MRJobConfig.SpeculativeRetryAfterSpeculate , MRJobConfig.DefaultSpeculativeRetryAfterSpeculate); this.proportionRunningTasksSpeculatable = conf.GetDouble(MRJobConfig.SpeculativecapRunningTasks , MRJobConfig.DefaultSpeculativecapRunningTasks); this.proportionTotalTasksSpeculatable = conf.GetDouble(MRJobConfig.SpeculativecapTotalTasks , MRJobConfig.DefaultSpeculativecapTotalTasks); this.minimumAllowedSpeculativeTasks = conf.GetInt(MRJobConfig.SpeculativeMinimumAllowedTasks , MRJobConfig.DefaultSpeculativeMinimumAllowedTasks); }
// this has to be at least as much as map slot requirement // This is a huge kluge. The real implementations have a decent approach private void CoreTestEstimator(TaskRuntimeEstimator testedEstimator, int expectedSpeculations ) { estimator = testedEstimator; clock = new TestRuntimeEstimators.MockClock(); dispatcher = new AsyncDispatcher(); myJob = null; slotsInUse.Set(0); completedMaps.Set(0); completedReduces.Set(0); successfulSpeculations.Set(0); taskTimeSavedBySpeculation.Set(0); clock.AdvanceTime(1000); Configuration conf = new Configuration(); myAppContext = new TestRuntimeEstimators.MyAppContext(this, MapTasks, ReduceTasks ); myJob = myAppContext.GetAllJobs().Values.GetEnumerator().Next(); estimator.Contextualize(conf, myAppContext); conf.SetLong(MRJobConfig.SpeculativeRetryAfterNoSpeculate, 500L); conf.SetLong(MRJobConfig.SpeculativeRetryAfterSpeculate, 5000L); conf.SetDouble(MRJobConfig.SpeculativecapRunningTasks, 0.1); conf.SetDouble(MRJobConfig.SpeculativecapTotalTasks, 0.001); conf.SetInt(MRJobConfig.SpeculativeMinimumAllowedTasks, 5); speculator = new DefaultSpeculator(conf, myAppContext, estimator, clock); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_NO_SPECULATE value" , 500L, speculator.GetSoonestRetryAfterNoSpeculate()); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_SPECULATE value", 5000L, speculator.GetSoonestRetryAfterSpeculate()); NUnit.Framework.Assert.AreEqual(speculator.GetProportionRunningTasksSpeculatable( ), 0.1, 0.00001); NUnit.Framework.Assert.AreEqual(speculator.GetProportionTotalTasksSpeculatable(), 0.001, 0.00001); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_MINIMUM_ALLOWED_TASKS value", 5, speculator.GetMinimumAllowedSpeculativeTasks()); dispatcher.Register(typeof(Speculator.EventType), speculator); dispatcher.Register(typeof(TaskEventType), new TestRuntimeEstimators.SpeculationRequestEventHandler (this)); dispatcher.Init(conf); dispatcher.Start(); speculator.Init(conf); speculator.Start(); // Now that the plumbing is hooked up, we do the following: // do until all tasks are finished, ... // 1: If we have spare capacity, assign as many map tasks as we can, then // assign as many reduce tasks as we can. Note that an odd reduce // task might be started while there are still map tasks, because // map tasks take 3 slots and reduce tasks 2 slots. // 2: Send a speculation event for every task attempt that's running // note that new attempts might get started by the speculator // discover undone tasks int undoneMaps = MapTasks; int undoneReduces = ReduceTasks; // build a task sequence where all the maps precede any of the reduces IList <Task> allTasksSequence = new List <Task>(); Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Map).Values); Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Reduce).Values ); while (undoneMaps + undoneReduces > 0) { undoneMaps = 0; undoneReduces = 0; // start all attempts which are new but for which there is enough slots foreach (Task task in allTasksSequence) { if (!task.IsFinished()) { if (task.GetType() == TaskType.Map) { ++undoneMaps; } else { ++undoneReduces; } } foreach (TaskAttempt attempt in task.GetAttempts().Values) { if (attempt.GetState() == TaskAttemptState.New && InitialNumberFreeSlots - slotsInUse .Get() >= TaskTypeSlots(task.GetType())) { TestRuntimeEstimators.MyTaskAttemptImpl attemptImpl = (TestRuntimeEstimators.MyTaskAttemptImpl )attempt; SpeculatorEvent @event = new SpeculatorEvent(attempt.GetID(), false, clock.GetTime ()); speculator.Handle(@event); attemptImpl.StartUp(); } else { // If a task attempt is in progress we should send the news to // the Speculator. TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus (); status.id = attempt.GetID(); status.progress = attempt.GetProgress(); status.stateString = attempt.GetState().ToString(); status.taskState = attempt.GetState(); SpeculatorEvent @event = new SpeculatorEvent(status, clock.GetTime()); speculator.Handle(@event); } } } long startTime = Runtime.CurrentTimeMillis(); // drain the speculator event queue while (!speculator.EventQueueEmpty()) { Sharpen.Thread.Yield(); if (Runtime.CurrentTimeMillis() > startTime + 130000) { return; } } clock.AdvanceTime(1000L); if (clock.GetTime() % 10000L == 0L) { speculator.ScanForSpeculations(); } } NUnit.Framework.Assert.AreEqual("We got the wrong number of successful speculations." , expectedSpeculations, successfulSpeculations.Get()); }