/* * Uber-AM lifecycle/ordering ("normal" case): * * - [somebody] sends TA_ASSIGNED * - handled by ContainerAssignedTransition (TaskAttemptImpl.java) * - creates "remoteTask" for us == real Task * - sends CONTAINER_REMOTE_LAUNCH * - TA: UNASSIGNED -> ASSIGNED * - CONTAINER_REMOTE_LAUNCH handled by LocalContainerLauncher (us) * - sucks "remoteTask" out of TaskAttemptImpl via getRemoteTask() * - sends TA_CONTAINER_LAUNCHED * [[ elsewhere... * - TA_CONTAINER_LAUNCHED handled by LaunchedContainerTransition * - registers "remoteTask" with TaskAttemptListener (== umbilical) * - NUKES "remoteTask" * - sends T_ATTEMPT_LAUNCHED (Task: SCHEDULED -> RUNNING) * - TA: ASSIGNED -> RUNNING * ]] * - runs Task (runSubMap() or runSubReduce()) * - TA can safely send TA_UPDATE since in RUNNING state */ // doneWithMaps and finishedSubMaps are accessed from only // one thread. Therefore, no need to make them volatile. public virtual void Run() { ContainerLauncherEvent @event = null; // Collect locations of map outputs to give to reduces IDictionary <TaskAttemptID, MapOutputFile> localMapFiles = new Dictionary <TaskAttemptID , MapOutputFile>(); // _must_ either run subtasks sequentially or accept expense of new JVMs // (i.e., fork()), else will get weird failures when maps try to create/ // write same dirname or filename: no chdir() in Java while (!Sharpen.Thread.CurrentThread().IsInterrupted()) { try { @event = this._enclosing.eventQueue.Take(); } catch (Exception e) { // mostly via T_KILL? JOB_KILL? LocalContainerLauncher.Log.Error("Returning, interrupted : " + e); break; } LocalContainerLauncher.Log.Info("Processing the event " + @event.ToString()); if (@event.GetType() == ContainerLauncher.EventType.ContainerRemoteLaunch) { ContainerRemoteLaunchEvent launchEv = (ContainerRemoteLaunchEvent)@event; // execute the task on a separate thread Future <object> future = this._enclosing.taskRunner.Submit(new _Runnable_228(this, launchEv, localMapFiles)); // remember the current attempt this.futures[@event.GetTaskAttemptID()] = future; } else { if (@event.GetType() == ContainerLauncher.EventType.ContainerRemoteCleanup) { // cancel (and interrupt) the current running task associated with the // event TaskAttemptId taId = @event.GetTaskAttemptID(); Future <object> future = Sharpen.Collections.Remove(this.futures, taId); if (future != null) { LocalContainerLauncher.Log.Info("canceling the task attempt " + taId); future.Cancel(true); } // send "cleaned" event to task attempt to move us from // SUCCESS_CONTAINER_CLEANUP to SUCCEEDED state (or // {FAIL|KILL}_CONTAINER_CLEANUP to {FAIL|KILL}_TASK_CLEANUP) this._enclosing.context.GetEventHandler().Handle(new TaskAttemptEvent(taId, TaskAttemptEventType .TaContainerCleaned)); } else { LocalContainerLauncher.Log.Warn("Ignoring unexpected event " + @event.ToString()); } } } }
public virtual void SetSuccessfulAttempt(TaskAttemptId successfulAttempt) { MaybeInitBuilder(); if (successfulAttempt == null) { builder.ClearSuccessfulAttempt(); } this.successfulAttemptId = successfulAttempt; }
public static TaskAttemptId NewTaskAttemptId(TaskId taskId, int attemptId) { TaskAttemptId taskAttemptId = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <TaskAttemptId >(); taskAttemptId.SetTaskId(taskId); taskAttemptId.SetId(attemptId); return(taskAttemptId); }
/// <exception cref="System.IO.IOException"/> public virtual void Done(TaskAttemptID taskAttemptID) { Log.Info("Done acknowledgement from " + taskAttemptID.ToString()); TaskAttemptId attemptID = TypeConverter.ToYarn(taskAttemptID); taskHeartbeatHandler.Progressing(attemptID); context.GetEventHandler().Handle(new TaskAttemptEvent(attemptID, TaskAttemptEventType .TaDone)); }
internal virtual void SendContainerLaunchFailedMsg(TaskAttemptId taskAttemptID, string message) { Log.Error(message); context.GetEventHandler().Handle(new TaskAttemptDiagnosticsUpdateEvent(taskAttemptID , message)); context.GetEventHandler().Handle(new TaskAttemptEvent(taskAttemptID, TaskAttemptEventType .TaContainerLaunchFailed)); }
protected internal virtual void AttemptLaunched(TaskAttemptId attemptID) { if (autoComplete) { // send the done event GetContext().GetEventHandler().Handle(new TaskAttemptEvent(attemptID, TaskAttemptEventType .TaDone)); } }
public virtual void SetTaskAttemptId(TaskAttemptId taskAttemptId) { MaybeInitBuilder(); if (taskAttemptId == null) { builder.ClearTaskAttemptId(); } this.taskAttemptId = taskAttemptId; }
public override void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus status, long timestamp) { base.UpdateAttempt(status, timestamp); TaskAttemptId attemptID = status.id; float progress = status.progress; IncorporateReading(attemptID, progress, timestamp); }
public ContainerLauncherEvent(TaskAttemptId taskAttemptID, ContainerId containerID , string containerMgrAddress, Token containerToken, ContainerLauncher.EventType type) : base(type) { this.taskAttemptID = taskAttemptID; this.containerID = containerID; this.containerMgrAddress = containerMgrAddress; this.containerToken = containerToken; }
/// <exception cref="System.IO.IOException"/> public virtual void FsError(TaskAttemptID taskAttemptID, string message) { // This happens only in Child. Log.Fatal("Task: " + taskAttemptID + " - failed due to FSError: " + message); ReportDiagnosticInfo(taskAttemptID, "FSError: " + message); TaskAttemptId attemptID = TypeConverter.ToYarn(taskAttemptID); context.GetEventHandler().Handle(new TaskAttemptEvent(attemptID, TaskAttemptEventType .TaFailmsg)); }
/// <exception cref="System.IO.IOException"/> public virtual void FatalError(TaskAttemptID taskAttemptID, string msg) { // This happens only in Child and in the Task. Log.Fatal("Task: " + taskAttemptID + " - exited : " + msg); ReportDiagnosticInfo(taskAttemptID, "Error: " + msg); TaskAttemptId attemptID = TypeConverter.ToYarn(taskAttemptID); context.GetEventHandler().Handle(new TaskAttemptEvent(attemptID, TaskAttemptEventType .TaFailmsg)); }
public virtual void Progressing(TaskAttemptId attemptID) { //only put for the registered attempts //TODO throw an exception if the task isn't registered. TaskHeartbeatHandler.ReportTime time = runningAttempts[attemptID]; if (time != null) { time.SetLastProgress(clock.GetTime()); } }
public ContainerRemoteLaunchEvent(TaskAttemptId taskAttemptID, ContainerLaunchContext containerLaunchContext, Container allocatedContainer, Task remoteTask) : base(taskAttemptID, allocatedContainer.GetId(), StringInterner.WeakIntern(allocatedContainer .GetNodeId().ToString()), allocatedContainer.GetContainerToken(), ContainerLauncher.EventType .ContainerRemoteLaunch) { this.allocatedContainer = allocatedContainer; this.containerLaunchContext = containerLaunchContext; this.task = remoteTask; }
public Container(ContainerLauncherImpl _enclosing, TaskAttemptId taId, ContainerId containerID, string containerMgrAddress) { this._enclosing = _enclosing; // store enough information to be able to cleanup the container this.state = ContainerLauncherImpl.ContainerState.Prep; this.taskAttemptID = taId; this.containerMgrAddress = containerMgrAddress; this.containerID = containerID; }
public static TaskAttempt NewTaskAttempt(TaskId tid, int i) { TaskAttemptId taid = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <TaskAttemptId> (); taid.SetTaskId(tid); taid.SetId(i); TaskAttemptReport report = NewTaskAttemptReport(taid); return(new _TaskAttempt_248(taid, report)); }
/// <exception cref="System.IO.IOException"/> public virtual GetDiagnosticsResponse GetDiagnostics(GetDiagnosticsRequest request ) { TaskAttemptId taskAttemptId = request.GetTaskAttemptId(); GetDiagnosticsResponse response = this.recordFactory.NewRecordInstance <GetDiagnosticsResponse >(); response.AddAllDiagnostics(this.VerifyAndGetAttempt(taskAttemptId, JobACL.ViewJob ).GetDiagnostics()); return(response); }
/// <exception cref="System.IO.IOException"/> public virtual GetTaskAttemptReportResponse GetTaskAttemptReport(GetTaskAttemptReportRequest request) { TaskAttemptId taskAttemptId = request.GetTaskAttemptId(); GetTaskAttemptReportResponse response = this.recordFactory.NewRecordInstance <GetTaskAttemptReportResponse >(); response.SetTaskAttemptReport(this.VerifyAndGetAttempt(taskAttemptId, JobACL.ViewJob ).GetReport()); return(response); }
/// <exception cref="System.IO.IOException"/> private TaskAttempt VerifyAndGetAttempt(TaskAttemptId attemptID, JobACL accessType ) { TaskAttempt attempt = this.VerifyAndGetTask(attemptID.GetTaskId(), accessType).GetAttempt (attemptID); if (attempt == null) { throw new IOException("Unknown TaskAttempt " + attemptID); } return(attempt); }
public virtual void UpdateAttempt(TaskAttemptStatusUpdateEvent.TaskAttemptStatus status, long timestamp) { TaskAttemptId attemptID = status.id; TaskId taskID = attemptID.GetTaskId(); JobId jobID = taskID.GetJobId(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = context.GetJob(jobID); if (job == null) { return; } Task task = job.GetTask(taskID); if (task == null) { return; } long boxedStart = startTimes[attemptID]; long start = boxedStart == null ? long.MinValue : boxedStart; TaskAttempt taskAttempt = task.GetAttempt(attemptID); if (taskAttempt.GetState() == TaskAttemptState.Succeeded) { bool isNew = false; // is this a new success? lock (doneTasks) { if (!doneTasks.Contains(task)) { doneTasks.AddItem(task); isNew = true; } } // It's a new completion // Note that if a task completes twice [because of a previous speculation // and a race, or a success followed by loss of the machine with the // local data] we only count the first one. if (isNew) { long finish = timestamp; if (start > 1L && finish > 1L && start <= finish) { long duration = finish - start; DataStatistics statistics = DataStatisticsForTask(taskID); if (statistics != null) { statistics.Add(duration); } } } } }
public virtual void TestToTaskAttemptID() { TaskAttemptId taid = MRApps.ToTaskAttemptID("attempt_0_1_m_2_3"); NUnit.Framework.Assert.AreEqual(0, taid.GetTaskId().GetJobId().GetAppId().GetClusterTimestamp ()); NUnit.Framework.Assert.AreEqual(1, taid.GetTaskId().GetJobId().GetAppId().GetId() ); NUnit.Framework.Assert.AreEqual(1, taid.GetTaskId().GetJobId().GetId()); NUnit.Framework.Assert.AreEqual(2, taid.GetTaskId().GetId()); NUnit.Framework.Assert.AreEqual(3, taid.GetId()); }
internal virtual void AddAttempt() { TaskAttempt taskAttempt = new TestRuntimeEstimators.MyTaskAttemptImpl(this, this. taskID, this.attempts.Count, this._enclosing.clock); TaskAttemptId taskAttemptID = taskAttempt.GetID(); this.attempts[taskAttemptID] = taskAttempt; System.Console.Out.WriteLine("TLTRE.MyTaskImpl.addAttempt " + this.GetID()); SpeculatorEvent @event = new SpeculatorEvent(this.taskID, +1); this._enclosing.dispatcher.GetEventHandler().Handle(@event); }
private ExponentiallySmoothedTaskRuntimeEstimator.EstimateVector GetEstimateVector (TaskAttemptId attemptID) { AtomicReference <ExponentiallySmoothedTaskRuntimeEstimator.EstimateVector> vectorRef = estimates[attemptID]; if (vectorRef == null) { return(null); } return(vectorRef.Get()); }
protected override void AttemptLaunched(TaskAttemptId attemptID) { if (attemptID.GetTaskId().GetId() == 0) { GetContext().GetEventHandler().Handle(new JobEvent(attemptID.GetTaskId().GetJobId (), JobEventType.JobKill)); } else { GetContext().GetEventHandler().Handle(new TaskAttemptEvent(attemptID, TaskAttemptEventType .TaDone)); } }
/// <exception cref="System.Exception"/> public virtual void TestMyShutdown() { Log.Info("in test Shutdown"); AppContext mockContext = Org.Mockito.Mockito.Mock <AppContext>(); EventHandler mockEventHandler = Org.Mockito.Mockito.Mock <EventHandler>(); Org.Mockito.Mockito.When(mockContext.GetEventHandler()).ThenReturn(mockEventHandler ); TestContainerLauncherImpl.ContainerManagementProtocolClient mockCM = Org.Mockito.Mockito.Mock <TestContainerLauncherImpl.ContainerManagementProtocolClient>(); TestContainerLauncherImpl.ContainerLauncherImplUnderTest ut = new TestContainerLauncherImpl.ContainerLauncherImplUnderTest (mockContext, mockCM); Configuration conf = new Configuration(); ut.Init(conf); ut.Start(); try { ContainerId contId = MakeContainerId(0l, 0, 0, 1); TaskAttemptId taskAttemptId = MakeTaskAttemptId(0l, 0, 0, TaskType.Map, 0); string cmAddress = "127.0.0.1:8000"; StartContainersResponse startResp = recordFactory.NewRecordInstance <StartContainersResponse >(); startResp.SetAllServicesMetaData(serviceResponse); Log.Info("inserting launch event"); ContainerRemoteLaunchEvent mockLaunchEvent = Org.Mockito.Mockito.Mock <ContainerRemoteLaunchEvent >(); Org.Mockito.Mockito.When(mockLaunchEvent.GetType()).ThenReturn(ContainerLauncher.EventType .ContainerRemoteLaunch); Org.Mockito.Mockito.When(mockLaunchEvent.GetContainerID()).ThenReturn(contId); Org.Mockito.Mockito.When(mockLaunchEvent.GetTaskAttemptID()).ThenReturn(taskAttemptId ); Org.Mockito.Mockito.When(mockLaunchEvent.GetContainerMgrAddress()).ThenReturn(cmAddress ); Org.Mockito.Mockito.When(mockCM.StartContainers(Matchers.Any <StartContainersRequest >())).ThenReturn(startResp); Org.Mockito.Mockito.When(mockLaunchEvent.GetContainerToken()).ThenReturn(CreateNewContainerToken (contId, cmAddress)); ut.Handle(mockLaunchEvent); ut.WaitForPoolToIdle(); Org.Mockito.Mockito.Verify(mockCM).StartContainers(Matchers.Any <StartContainersRequest >()); } finally { // skip cleanup and make sure stop kills the container ut.Stop(); Org.Mockito.Mockito.Verify(mockCM).StopContainers(Matchers.Any <StopContainersRequest >()); } }
// TODO: This isn't really used in any MR code. Ask for removal. /// <exception cref="System.IO.IOException"/> public virtual MapTaskCompletionEventsUpdate GetMapCompletionEvents(JobID jobIdentifier , int startIndex, int maxEvents, TaskAttemptID taskAttemptID) { Log.Info("MapCompletionEvents request from " + taskAttemptID.ToString() + ". startIndex " + startIndex + " maxEvents " + maxEvents); // TODO: shouldReset is never used. See TT. Ask for Removal. bool shouldReset = false; TaskAttemptId attemptID = TypeConverter.ToYarn(taskAttemptID); TaskCompletionEvent[] events = context.GetJob(attemptID.GetTaskId().GetJobId()).GetMapAttemptCompletionEvents (startIndex, maxEvents); taskHeartbeatHandler.Progressing(attemptID); return(new MapTaskCompletionEventsUpdate(events, shouldReset)); }
public virtual void Unregister(TaskAttemptId attemptID, WrappedJvmID jvmID) { // Unregistration also comes from the same TaskAttempt which does the // registration. Events are ordered at TaskAttempt, so unregistration will // always come after registration. // Remove from launchedJVMs before jvmIDToActiveAttemptMap to avoid // synchronization issue with getTask(). getTask should be checking // jvmIDToActiveAttemptMap before it checks launchedJVMs. // remove the mappings if not already removed launchedJVMs.Remove(jvmID); Sharpen.Collections.Remove(jvmIDToActiveAttemptMap, jvmID); //unregister this attempt taskHeartbeatHandler.Unregister(attemptID); }
//TODO_get.set public virtual void TestTaskAttemptIDtoString() { TaskAttemptId taid = RecordFactoryProvider.GetRecordFactory(null).NewRecordInstance <TaskAttemptId>(); taid.SetTaskId(RecordFactoryProvider.GetRecordFactory(null).NewRecordInstance <TaskId >()); taid.GetTaskId().SetTaskType(TaskType.Map); taid.GetTaskId().SetJobId(RecordFactoryProvider.GetRecordFactory(null).NewRecordInstance <JobId>()); taid.GetTaskId().GetJobId().SetAppId(ApplicationId.NewInstance(0, 0)); NUnit.Framework.Assert.AreEqual("attempt_0_0000_m_000000_0", MRApps.ToString(taid )); }
public virtual TaskAttemptId GetSuccessfulAttempt() { MRProtos.TaskReportProtoOrBuilder p = viaProto ? proto : builder; if (this.successfulAttemptId != null) { return(this.successfulAttemptId); } if (!p.HasSuccessfulAttempt()) { return(null); } this.successfulAttemptId = ConvertFromProtoFormat(p.GetSuccessfulAttempt()); return(this.successfulAttemptId); }
/// <exception cref="System.IO.IOException"/> public virtual GetDiagnosticsResponse GetDiagnostics(GetDiagnosticsRequest request ) { TaskAttemptId taskAttemptId = request.GetTaskAttemptId(); Org.Apache.Hadoop.Mapreduce.V2.App.Job.Job job = this.VerifyAndGetJob(taskAttemptId .GetTaskId().GetJobId(), true); GetDiagnosticsResponse response = this.recordFactory.NewRecordInstance <GetDiagnosticsResponse >(); response.AddAllDiagnostics(job.GetTask(taskAttemptId.GetTaskId()).GetAttempt(taskAttemptId ).GetDiagnostics()); return(response); }
public virtual TaskAttemptId GetTaskAttemptId() { MRServiceProtos.KillTaskAttemptRequestProtoOrBuilder p = viaProto ? proto : builder; if (this.taskAttemptId != null) { return(this.taskAttemptId); } if (!p.HasTaskAttemptId()) { return(null); } this.taskAttemptId = ConvertFromProtoFormat(p.GetTaskAttemptId()); return(this.taskAttemptId); }