protected override void Dispatch(Org.Apache.Hadoop.Yarn.Event.Event @event) { if (@event is TaskAttemptEvent) { TaskAttemptEvent killEvent = (TaskAttemptEvent)@event; if (killEvent.GetType() == TaskAttemptEventType.TaKill) { TaskAttemptId taID = killEvent.GetTaskAttemptID(); if (taID.GetTaskId().GetTaskType() == TaskType.Reduce && taID.GetTaskId().GetId() == 0 && taID.GetId() == 0) { base.Dispatch(new TaskAttemptEvent(taID, TaskAttemptEventType.TaDone)); base.Dispatch(new TaskAttemptEvent(taID, TaskAttemptEventType.TaContainerCleaned) ); base.Dispatch(new TaskTAttemptEvent(taID, TaskEventType.TAttemptSucceeded)); this.cachedKillEvent = killEvent; return; } } } else { if (@event is TaskEvent) { TaskEvent taskEvent = (TaskEvent)@event; if (taskEvent.GetType() == TaskEventType.TAttemptSucceeded && this.cachedKillEvent != null) { base.Dispatch(this.cachedKillEvent); return; } } } base.Dispatch(@event); }
public static TaskAttemptReport NewTaskAttemptReport(TaskAttemptId id) { ApplicationAttemptId appAttemptId = ApplicationAttemptId.NewInstance(id.GetTaskId ().GetJobId().GetAppId(), 0); ContainerId containerId = ContainerId.NewContainerId(appAttemptId, 0); TaskAttemptReport report = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <TaskAttemptReport >(); report.SetTaskAttemptId(id); report.SetStartTime(Runtime.CurrentTimeMillis() - (int)(Math.Random() * Dt)); report.SetFinishTime(Runtime.CurrentTimeMillis() + (int)(Math.Random() * Dt) + 1); if (id.GetTaskId().GetTaskType() == TaskType.Reduce) { report.SetShuffleFinishTime((report.GetFinishTime() + report.GetStartTime()) / 2); report.SetSortFinishTime((report.GetFinishTime() + report.GetShuffleFinishTime()) / 2); } report.SetPhase(Phases.Next()); report.SetTaskAttemptState(TaskAttemptStates.Next()); report.SetProgress((float)Math.Random()); report.SetCounters(TypeConverter.ToYarn(NewCounters())); report.SetContainerId(containerId); report.SetDiagnosticInfo(Diags.Next()); report.SetStateString("Moving average " + Math.Random()); return(report); }
public virtual void TestToTaskAttemptID() { TaskAttemptId taid = MRApps.ToTaskAttemptID("attempt_0_1_m_2_3"); NUnit.Framework.Assert.AreEqual(0, taid.GetTaskId().GetJobId().GetAppId().GetClusterTimestamp ()); NUnit.Framework.Assert.AreEqual(1, taid.GetTaskId().GetJobId().GetAppId().GetId() ); NUnit.Framework.Assert.AreEqual(1, taid.GetTaskId().GetJobId().GetId()); NUnit.Framework.Assert.AreEqual(2, taid.GetTaskId().GetId()); NUnit.Framework.Assert.AreEqual(3, taid.GetId()); }
protected override void AttemptLaunched(TaskAttemptId attemptID) { if (attemptID.GetTaskId().GetId() == 0) { GetContext().GetEventHandler().Handle(new JobEvent(attemptID.GetTaskId().GetJobId (), JobEventType.JobKill)); } else { GetContext().GetEventHandler().Handle(new TaskAttemptEvent(attemptID, TaskAttemptEventType .TaDone)); } }
//TODO_get.set public virtual void TestTaskAttemptIDtoString() { TaskAttemptId taid = RecordFactoryProvider.GetRecordFactory(null).NewRecordInstance <TaskAttemptId>(); taid.SetTaskId(RecordFactoryProvider.GetRecordFactory(null).NewRecordInstance <TaskId >()); taid.GetTaskId().SetTaskType(TaskType.Map); taid.GetTaskId().SetJobId(RecordFactoryProvider.GetRecordFactory(null).NewRecordInstance <JobId>()); taid.GetTaskId().GetJobId().SetAppId(ApplicationId.NewInstance(0, 0)); NUnit.Framework.Assert.AreEqual("attempt_0_0000_m_000000_0", MRApps.ToString(taid )); }
public SpeculatorEvent(TaskAttemptId attemptID, bool flag, long timestamp) : base(Speculator.EventType.AttemptStart, timestamp) { this.reportedStatus = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus(); this.reportedStatus.id = attemptID; this.taskID = attemptID.GetTaskId(); }
/// <summary>Absorbs one TaskAttemptStatus</summary> /// <param name="reportedStatus"> /// the status report that we got from a task attempt /// that we want to fold into the speculation data for this job /// </param> /// <param name="timestamp"> /// the time this status corresponds to. This matters /// because statuses contain progress. /// </param> protected internal virtual void StatusUpdate(TaskAttemptStatusUpdateEvent.TaskAttemptStatus reportedStatus, long timestamp) { string stateString = reportedStatus.taskState.ToString(); TaskAttemptId attemptID = reportedStatus.id; TaskId taskID = attemptID.GetTaskId(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(taskID.GetJobId() ); if (job == null) { return; } Task task = job.GetTask(taskID); if (task == null) { return; } estimator.UpdateAttempt(reportedStatus, timestamp); if (stateString.Equals(TaskAttemptState.Running.ToString())) { runningTasks.PutIfAbsent(taskID, true); } else { runningTasks.Remove(taskID, true); if (!stateString.Equals(TaskAttemptState.Starting.ToString())) { Sharpen.Collections.Remove(runningTaskAttemptStatistics, attemptID); } } }
public JobTaskAttemptFetchFailureEvent(TaskAttemptId reduce, IList <TaskAttemptId> maps) : base(reduce.GetTaskId().GetJobId(), JobEventType.JobTaskAttemptFetchFailure) { this.reduce = reduce; this.maps = maps; }
/* * This class is used to control when speculative execution happens. */ /* * This will only be called if speculative execution is turned on. * * If either mapper or reducer speculation is turned on, this will be * called. * * This will cause speculation to engage for the first mapper or first * reducer (that is, attempt ID "*_m_000000_0" or "*_r_000000_0") * * If this attempt is killed, the retry will have attempt id 1, so it * will not engage speculation again. */ public override long EstimatedRuntime(TaskAttemptId id) { if ((id.GetTaskId().GetId() == 0) && (id.GetId() == 0)) { return(SpeculateThis); } return(base.EstimatedRuntime(id)); }
public virtual void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus status, long timestamp) { TaskAttemptId attemptID = status.id; TaskId taskID = attemptID.GetTaskId(); JobId jobID = taskID.GetJobId(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(jobID); if (job == null) { return; } Task task = job.GetTask(taskID); if (task == null) { return; } long boxedStart = startTimes[attemptID]; long start = boxedStart == null ? long.MinValue : boxedStart; TaskAttempt taskAttempt = task.GetAttempt(attemptID); if (taskAttempt.GetState() == TaskAttemptState.Succeeded) { bool isNew = false; // is this a new success? lock (doneTasks) { if (!doneTasks.Contains(task)) { doneTasks.AddItem(task); isNew = true; } } // It's a new completion // Note that if a task completes twice [because of a previous speculation // and a race, or a success followed by loss of the machine with the // local data] we only count the first one. if (isNew) { long finish = timestamp; if (start > 1L && finish > 1L && start <= finish) { long duration = finish - start; DataStatistics statistics = DataStatisticsForTask(taskID); if (statistics != null) { statistics.Add(duration); } } } } }
/// <exception cref="System.IO.IOException"/> private TaskAttempt VerifyAndGetAttempt(TaskAttemptId attemptID, JobACL accessType ) { TaskAttempt attempt = this.VerifyAndGetTask(attemptID.GetTaskId(), accessType).GetAttempt (attemptID); if (attempt == null) { throw new IOException("Unknown TaskAttempt " + attemptID); } return(attempt); }
/// <summary>Child checking whether it can commit.</summary> /// <remarks> /// Child checking whether it can commit. /// <br /> /// Commit is a two-phased protocol. First the attempt informs the /// ApplicationMaster that it is /// <see cref="CommitPending(TaskAttemptID, TaskStatus)"/> /// . Then it repeatedly polls /// the ApplicationMaster whether it /// <see cref="CanCommit(TaskAttemptID)"/> /// This is /// a legacy from the centralized commit protocol handling by the JobTracker. /// </remarks> /// <exception cref="System.IO.IOException"/> public virtual bool CanCommit(TaskAttemptID taskAttemptID) { Log.Info("Commit go/no-go request from " + taskAttemptID.ToString()); // An attempt is asking if it can commit its output. This can be decided // only by the task which is managing the multiple attempts. So redirect the // request there. TaskAttemptId attemptID = TypeConverter.ToYarn(taskAttemptID); taskHeartbeatHandler.Progressing(attemptID); // tell task to retry later if AM has not heard from RM within the commit // window to help avoid double-committing in a split-brain situation long now = context.GetClock().GetTime(); if (now - rmHeartbeatHandler.GetLastHeartbeatTime() > commitWindowMs) { return(false); } Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(attemptID.GetTaskId ().GetJobId()); Task task = job.GetTask(attemptID.GetTaskId()); return(task.CanCommit(attemptID)); }
/// <exception cref="System.IO.IOException"/> public virtual GetDiagnosticsResponse GetDiagnostics(GetDiagnosticsRequest request ) { TaskAttemptId taskAttemptId = request.GetTaskAttemptId(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = this.VerifyAndGetJob(taskAttemptId .GetTaskId().GetJobId(), true); GetDiagnosticsResponse response = this.recordFactory.NewRecordInstance <GetDiagnosticsResponse >(); response.AddAllDiagnostics(job.GetTask(taskAttemptId.GetTaskId()).GetAttempt(taskAttemptId ).GetDiagnostics()); return(response); }
// TODO: This isn't really used in any MR code. Ask for removal. /// <exception cref="System.IO.IOException"/> public virtual MapTaskCompletionEventsUpdate GetMapCompletionEvents(JobID jobIdentifier , int startIndex, int maxEvents, TaskAttemptID taskAttemptID) { Log.Info("MapCompletionEvents request from " + taskAttemptID.ToString() + ". startIndex " + startIndex + " maxEvents " + maxEvents); // TODO: shouldReset is never used. See TT. Ask for Removal. bool shouldReset = false; TaskAttemptId attemptID = TypeConverter.ToYarn(taskAttemptID); TaskCompletionEvent[] events = context.GetJob(attemptID.GetTaskId().GetJobId()).GetMapAttemptCompletionEvents (startIndex, maxEvents); taskHeartbeatHandler.Progressing(attemptID); return(new MapTaskCompletionEventsUpdate(events, shouldReset)); }
//First attempt is failed protected internal override void AttemptLaunched(TaskAttemptId attemptID) { if (attemptID.GetTaskId().GetId() == 0 && attemptID.GetId() == 0) { //check if it is first task's first attempt // send the Fail event GetContext().GetEventHandler().Handle(new TaskAttemptEvent(attemptID, TaskAttemptEventType .TaFailmsg)); } else { GetContext().GetEventHandler().Handle(new TaskAttemptEvent(attemptID, TaskAttemptEventType .TaDone)); } }
protected internal override void AttemptLaunched(TaskAttemptId attemptID) { if (attemptID.GetTaskId().GetId() == 0 && attemptID.GetId() == 0) { //this blocks the first task's first attempt //the subsequent ones are completed try { latch.Await(); } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); } } else { GetContext().GetEventHandler().Handle(new TaskAttemptEvent(attemptID, TaskAttemptEventType .TaDone)); } }
protected override void Dispatch(Org.Apache.Hadoop.Yarn.Event.Event @event) { if (@event is TaskAttemptEvent) { TaskAttemptEvent attemptEvent = (TaskAttemptEvent)@event; TaskAttemptId attemptID = ((TaskAttemptEvent)@event).GetTaskAttemptID(); if (attemptEvent.GetType() == this.attemptEventTypeToWait && attemptID.GetTaskId( ).GetId() == 0 && attemptID.GetId() == 0) { try { latch.Await(); } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); } } } base.Dispatch(@event); }
private long StoredPerAttemptValue(IDictionary <TaskAttempt, AtomicLong> data, TaskAttemptId attemptID) { TaskId taskID = attemptID.GetTaskId(); JobId jobID = taskID.GetJobId(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(jobID); Task task = job.GetTask(taskID); if (task == null) { return(-1L); } TaskAttempt taskAttempt = task.GetAttempt(attemptID); if (taskAttempt == null) { return(-1L); } AtomicLong estimate = data[taskAttempt]; return(estimate == null ? -1L : estimate.Get()); }
private void RunTask(ContainerRemoteLaunchEvent launchEv, IDictionary <TaskAttemptID , MapOutputFile> localMapFiles) { TaskAttemptId attemptID = launchEv.GetTaskAttemptID(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = this._enclosing.context.GetAllJobs ()[attemptID.GetTaskId().GetJobId()]; int numMapTasks = job.GetTotalMaps(); int numReduceTasks = job.GetTotalReduces(); // YARN (tracking) Task: Task ytask = job.GetTask(attemptID.GetTaskId()); // classic mapred Task: Task remoteTask = launchEv.GetRemoteTask(); // after "launching," send launched event to task attempt to move // state from ASSIGNED to RUNNING (also nukes "remoteTask", so must // do getRemoteTask() call first) //There is no port number because we are not really talking to a task // tracker. The shuffle is just done through local files. So the // port number is set to -1 in this case. this._enclosing.context.GetEventHandler().Handle(new TaskAttemptContainerLaunchedEvent (attemptID, -1)); if (numMapTasks == 0) { this.doneWithMaps = true; } try { if (remoteTask.IsMapOrReduce()) { JobCounterUpdateEvent jce = new JobCounterUpdateEvent(attemptID.GetTaskId().GetJobId ()); jce.AddCounterUpdate(JobCounter.TotalLaunchedUbertasks, 1); if (remoteTask.IsMapTask()) { jce.AddCounterUpdate(JobCounter.NumUberSubmaps, 1); } else { jce.AddCounterUpdate(JobCounter.NumUberSubreduces, 1); } this._enclosing.context.GetEventHandler().Handle(jce); } this.RunSubtask(remoteTask, ytask.GetType(), attemptID, numMapTasks, (numReduceTasks > 0), localMapFiles); } catch (RuntimeException) { JobCounterUpdateEvent jce = new JobCounterUpdateEvent(attemptID.GetTaskId().GetJobId ()); jce.AddCounterUpdate(JobCounter.NumFailedUbertasks, 1); this._enclosing.context.GetEventHandler().Handle(jce); // this is our signal that the subtask failed in some way, so // simulate a failed JVM/container and send a container-completed // event to task attempt (i.e., move state machine from RUNNING // to FAIL_CONTAINER_CLEANUP [and ultimately to FAILED]) this._enclosing.context.GetEventHandler().Handle(new TaskAttemptEvent(attemptID, TaskAttemptEventType.TaContainerCompleted)); } catch (IOException ioe) { // if umbilical itself barfs (in error-handler of runSubMap()), // we're pretty much hosed, so do what YarnChild main() does // (i.e., exit clumsily--but can never happen, so no worries!) LocalContainerLauncher.Log.Fatal("oopsie... this can never happen: " + StringUtils .StringifyException(ioe)); ExitUtil.Terminate(-1); } finally { // remove my future if (Sharpen.Collections.Remove(this.futures, attemptID) != null) { LocalContainerLauncher.Log.Info("removed attempt " + attemptID + " from the futures to keep track of" ); } } }
public TaskTAttemptEvent(TaskAttemptId id, TaskEventType type) : base(id.GetTaskId(), type) { this.attemptID = id; }
//Test reports of JobHistoryServer. History server should get log files from MRApp and read them /// <exception cref="System.Exception"/> public virtual void TestReports() { Configuration config = new Configuration(); config.SetClass(CommonConfigurationKeysPublic.NetTopologyNodeSwitchMappingImplKey , typeof(TestJobHistoryParsing.MyResolver), typeof(DNSToSwitchMapping)); RackResolver.Init(config); MRApp app = new TestJobHistoryEvents.MRAppWithHistory(1, 1, true, this.GetType(). FullName, true); app.Submit(config); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = app.GetContext().GetAllJobs().Values .GetEnumerator().Next(); app.WaitForState(job, JobState.Succeeded); historyServer = new JobHistoryServer(); historyServer.Init(config); historyServer.Start(); // search JobHistory service JobHistory jobHistory = null; foreach (Org.Apache.Hadoop.Service.Service service in historyServer.GetServices()) { if (service is JobHistory) { jobHistory = (JobHistory)service; } } IDictionary <JobId, Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job> jobs = jobHistory. GetAllJobs(); NUnit.Framework.Assert.AreEqual(1, jobs.Count); NUnit.Framework.Assert.AreEqual("job_0_0000", jobs.Keys.GetEnumerator().Next().ToString ()); Task task = job.GetTasks().Values.GetEnumerator().Next(); TaskAttempt attempt = task.GetAttempts().Values.GetEnumerator().Next(); HistoryClientService historyService = historyServer.GetClientService(); MRClientProtocol protocol = historyService.GetClientHandler(); GetTaskAttemptReportRequest gtarRequest = recordFactory.NewRecordInstance <GetTaskAttemptReportRequest >(); // test getTaskAttemptReport TaskAttemptId taId = attempt.GetID(); taId.SetTaskId(task.GetID()); taId.GetTaskId().SetJobId(job.GetID()); gtarRequest.SetTaskAttemptId(taId); GetTaskAttemptReportResponse response = protocol.GetTaskAttemptReport(gtarRequest ); NUnit.Framework.Assert.AreEqual("container_0_0000_01_000000", response.GetTaskAttemptReport ().GetContainerId().ToString()); NUnit.Framework.Assert.IsTrue(response.GetTaskAttemptReport().GetDiagnosticInfo() .IsEmpty()); // counters NUnit.Framework.Assert.IsNotNull(response.GetTaskAttemptReport().GetCounters().GetCounter (TaskCounter.PhysicalMemoryBytes)); NUnit.Framework.Assert.AreEqual(taId.ToString(), response.GetTaskAttemptReport(). GetTaskAttemptId().ToString()); // test getTaskReport GetTaskReportRequest request = recordFactory.NewRecordInstance <GetTaskReportRequest >(); TaskId taskId = task.GetID(); taskId.SetJobId(job.GetID()); request.SetTaskId(taskId); GetTaskReportResponse reportResponse = protocol.GetTaskReport(request); NUnit.Framework.Assert.AreEqual(string.Empty, reportResponse.GetTaskReport().GetDiagnosticsList ().GetEnumerator().Next()); // progress NUnit.Framework.Assert.AreEqual(1.0f, reportResponse.GetTaskReport().GetProgress( ), 0.01); // report has corrected taskId NUnit.Framework.Assert.AreEqual(taskId.ToString(), reportResponse.GetTaskReport() .GetTaskId().ToString()); // Task state should be SUCCEEDED NUnit.Framework.Assert.AreEqual(TaskState.Succeeded, reportResponse.GetTaskReport ().GetTaskState()); // For invalid jobid, throw IOException GetTaskReportsRequest gtreportsRequest = recordFactory.NewRecordInstance <GetTaskReportsRequest >(); gtreportsRequest.SetJobId(TypeConverter.ToYarn(JobID.ForName("job_1415730144495_0001" ))); gtreportsRequest.SetTaskType(TaskType.Reduce); try { protocol.GetTaskReports(gtreportsRequest); NUnit.Framework.Assert.Fail("IOException not thrown for invalid job id"); } catch (IOException) { } // Expected // test getTaskAttemptCompletionEvents GetTaskAttemptCompletionEventsRequest taskAttemptRequest = recordFactory.NewRecordInstance <GetTaskAttemptCompletionEventsRequest>(); taskAttemptRequest.SetJobId(job.GetID()); GetTaskAttemptCompletionEventsResponse taskAttemptCompletionEventsResponse = protocol .GetTaskAttemptCompletionEvents(taskAttemptRequest); NUnit.Framework.Assert.AreEqual(0, taskAttemptCompletionEventsResponse.GetCompletionEventCount ()); // test getDiagnostics GetDiagnosticsRequest diagnosticRequest = recordFactory.NewRecordInstance <GetDiagnosticsRequest >(); diagnosticRequest.SetTaskAttemptId(taId); GetDiagnosticsResponse diagnosticResponse = protocol.GetDiagnostics(diagnosticRequest ); // it is strange : why one empty string ? NUnit.Framework.Assert.AreEqual(1, diagnosticResponse.GetDiagnosticsCount()); NUnit.Framework.Assert.AreEqual(string.Empty, diagnosticResponse.GetDiagnostics(0 )); }
public static TaskAttemptID FromYarn(TaskAttemptId id) { return(new TaskAttemptID(FromYarn(id.GetTaskId()), id.GetId())); }
public override void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus status, long timestamp) { base.UpdateAttempt(status, timestamp); TaskAttemptId attemptID = status.id; TaskId taskID = attemptID.GetTaskId(); JobId jobID = taskID.GetJobId(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(jobID); if (job == null) { return; } Task task = job.GetTask(taskID); if (task == null) { return; } TaskAttempt taskAttempt = task.GetAttempt(attemptID); if (taskAttempt == null) { return; } long boxedStart = startTimes[attemptID]; long start = boxedStart == null ? long.MinValue : boxedStart; // We need to do two things. // 1: If this is a completion, we accumulate statistics in the superclass // 2: If this is not a completion, we learn more about it. // This is not a completion, but we're cooking. // if (taskAttempt.GetState() == TaskAttemptState.Running) { // See if this task is already in the registry AtomicLong estimateContainer = attemptRuntimeEstimates[taskAttempt]; AtomicLong estimateVarianceContainer = attemptRuntimeEstimateVariances[taskAttempt ]; if (estimateContainer == null) { if (attemptRuntimeEstimates[taskAttempt] == null) { attemptRuntimeEstimates[taskAttempt] = new AtomicLong(); estimateContainer = attemptRuntimeEstimates[taskAttempt]; } } if (estimateVarianceContainer == null) { attemptRuntimeEstimateVariances.PutIfAbsent(taskAttempt, new AtomicLong()); estimateVarianceContainer = attemptRuntimeEstimateVariances[taskAttempt]; } long estimate = -1; long varianceEstimate = -1; // This code assumes that we'll never consider starting a third // speculative task attempt if two are already running for this task if (start > 0 && timestamp > start) { estimate = (long)((timestamp - start) / Math.Max(0.0001, status.progress)); varianceEstimate = (long)(estimate * status.progress / 10); } if (estimateContainer != null) { estimateContainer.Set(estimate); } if (estimateVarianceContainer != null) { estimateVarianceContainer.Set(varianceEstimate); } } }