public virtual void RemoveOrTrackCompletedContainersFromContext(IList <ContainerId > containerIds) { ICollection <ContainerId> removedContainers = new HashSet <ContainerId>(); Sharpen.Collections.AddAll(pendingContainersToRemove, containerIds); IEnumerator <ContainerId> iter = pendingContainersToRemove.GetEnumerator(); while (iter.HasNext()) { ContainerId containerId = iter.Next(); // remove the container only if the container is at DONE state Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container nmContainer = context.GetContainers()[containerId]; if (nmContainer == null) { iter.Remove(); } else { if (nmContainer.GetContainerState().Equals(ContainerState.Done)) { Sharpen.Collections.Remove(context.GetContainers(), containerId); removedContainers.AddItem(containerId); iter.Remove(); } } } if (!removedContainers.IsEmpty()) { Log.Info("Removed completed containers from NM context: " + removedContainers); } pendingCompletedContainers.Clear(); }
/// <summary>Finds the log file with the given filename for the given container.</summary> /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> public static FilePath GetContainerLogFile(ContainerId containerId, string fileName , string remoteUser, Context context) { Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container container = context.GetContainers()[containerId]; Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Application.Application application = GetApplicationForContainer(containerId, context); CheckAccess(remoteUser, application, context); if (container != null) { CheckState(container.GetContainerState()); } try { LocalDirsHandlerService dirsHandler = context.GetLocalDirsHandler(); string relativeContainerLogDir = ContainerLaunch.GetRelativeContainerLogDir(application .GetAppId().ToString(), containerId.ToString()); Path logPath = dirsHandler.GetLogPathToRead(relativeContainerLogDir + Path.Separator + fileName); URI logPathURI = new FilePath(logPath.ToString()).ToURI(); FilePath logFile = new FilePath(logPathURI.GetPath()); return(logFile); } catch (IOException e) { Log.Warn("Failed to find log file", e); throw new NotFoundException("Cannot find this log on the local disk."); } }
/// <summary> /// Finds the local directories that logs for the given container are stored /// on. /// </summary> /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> public static IList <FilePath> GetContainerLogDirs(ContainerId containerId, string remoteUser, Context context) { Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container container = context.GetContainers()[containerId]; Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Application.Application application = GetApplicationForContainer(containerId, context); CheckAccess(remoteUser, application, context); // It is not required to have null check for container ( container == null ) // and throw back exception.Because when container is completed, NodeManager // remove container information from its NMContext.Configuring log // aggregation to false, container log view request is forwarded to NM. NM // does not have completed container information,but still NM serve request for // reading container logs. if (container != null) { CheckState(container.GetContainerState()); } return(GetContainerLogDirs(containerId, context.GetLocalDirsHandler())); }
public virtual int Call() { // dispatcher not typed ContainerLaunchContext launchContext = container.GetLaunchContext(); IDictionary <Path, IList <string> > localResources = null; ContainerId containerID = container.GetContainerId(); string containerIdStr = ConverterUtils.ToString(containerID); IList <string> command = launchContext.GetCommands(); int ret = -1; // CONTAINER_KILLED_ON_REQUEST should not be missed if the container // is already at KILLING if (container.GetContainerState() == ContainerState.Killing) { dispatcher.GetEventHandler().Handle(new ContainerExitEvent(containerID, ContainerEventType .ContainerKilledOnRequest, Shell.Windows ? ContainerExecutor.ExitCode.ForceKilled .GetExitCode() : ContainerExecutor.ExitCode.Terminated.GetExitCode(), "Container terminated before launch." )); return(0); } try { localResources = container.GetLocalizedResources(); if (localResources == null) { throw RPCUtil.GetRemoteException("Unable to get local resources when Container " + containerID + " is at " + container.GetContainerState()); } string user = container.GetUser(); // /////////////////////////// Variable expansion // Before the container script gets written out. IList <string> newCmds = new AList <string>(command.Count); string appIdStr = app.GetAppId().ToString(); string relativeContainerLogDir = Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Launcher.ContainerLaunch .GetRelativeContainerLogDir(appIdStr, containerIdStr); Path containerLogDir = dirsHandler.GetLogPathForWrite(relativeContainerLogDir, false ); foreach (string str in command) { // TODO: Should we instead work via symlinks without this grammar? newCmds.AddItem(ExpandEnvironment(str, containerLogDir)); } launchContext.SetCommands(newCmds); IDictionary <string, string> environment = launchContext.GetEnvironment(); // Make a copy of env to iterate & do variable expansion foreach (KeyValuePair <string, string> entry in environment) { string value = entry.Value; value = ExpandEnvironment(value, containerLogDir); entry.SetValue(value); } // /////////////////////////// End of variable expansion FileContext lfs = FileContext.GetLocalFSFileContext(); Path nmPrivateContainerScriptPath = dirsHandler.GetLocalPathForWrite(GetContainerPrivateDir (appIdStr, containerIdStr) + Path.Separator + ContainerScript); Path nmPrivateTokensPath = dirsHandler.GetLocalPathForWrite(GetContainerPrivateDir (appIdStr, containerIdStr) + Path.Separator + string.Format(ContainerLocalizer.TokenFileNameFmt , containerIdStr)); Path nmPrivateClasspathJarDir = dirsHandler.GetLocalPathForWrite(GetContainerPrivateDir (appIdStr, containerIdStr)); DataOutputStream containerScriptOutStream = null; DataOutputStream tokensOutStream = null; // Select the working directory for the container Path containerWorkDir = dirsHandler.GetLocalPathForWrite(ContainerLocalizer.Usercache + Path.Separator + user + Path.Separator + ContainerLocalizer.Appcache + Path.Separator + appIdStr + Path.Separator + containerIdStr, LocalDirAllocator.SizeUnknown, false ); string pidFileSubpath = GetPidFileSubpath(appIdStr, containerIdStr); // pid file should be in nm private dir so that it is not // accessible by users pidFilePath = dirsHandler.GetLocalPathForWrite(pidFileSubpath); IList <string> localDirs = dirsHandler.GetLocalDirs(); IList <string> logDirs = dirsHandler.GetLogDirs(); IList <string> containerLogDirs = new AList <string>(); foreach (string logDir in logDirs) { containerLogDirs.AddItem(logDir + Path.Separator + relativeContainerLogDir); } if (!dirsHandler.AreDisksHealthy()) { ret = ContainerExitStatus.DisksFailed; throw new IOException("Most of the disks failed. " + dirsHandler.GetDisksHealthReport (false)); } try { // /////////// Write out the container-script in the nmPrivate space. IList <Path> appDirs = new AList <Path>(localDirs.Count); foreach (string localDir in localDirs) { Path usersdir = new Path(localDir, ContainerLocalizer.Usercache); Path userdir = new Path(usersdir, user); Path appsdir = new Path(userdir, ContainerLocalizer.Appcache); appDirs.AddItem(new Path(appsdir, appIdStr)); } containerScriptOutStream = lfs.Create(nmPrivateContainerScriptPath, EnumSet.Of(CreateFlag .Create, CreateFlag.Overwrite)); // Set the token location too. environment[ApplicationConstants.ContainerTokenFileEnvName] = new Path(containerWorkDir , FinalContainerTokensFile).ToUri().GetPath(); // Sanitize the container's environment SanitizeEnv(environment, containerWorkDir, appDirs, containerLogDirs, localResources , nmPrivateClasspathJarDir); // Write out the environment exec.WriteLaunchEnv(containerScriptOutStream, environment, localResources, launchContext .GetCommands()); // /////////// End of writing out container-script // /////////// Write out the container-tokens in the nmPrivate space. tokensOutStream = lfs.Create(nmPrivateTokensPath, EnumSet.Of(CreateFlag.Create, CreateFlag .Overwrite)); Credentials creds = container.GetCredentials(); creds.WriteTokenStorageToStream(tokensOutStream); } finally { // /////////// End of writing out container-tokens IOUtils.Cleanup(Log, containerScriptOutStream, tokensOutStream); } // LaunchContainer is a blocking call. We are here almost means the // container is launched, so send out the event. dispatcher.GetEventHandler().Handle(new ContainerEvent(containerID, ContainerEventType .ContainerLaunched)); context.GetNMStateStore().StoreContainerLaunched(containerID); // Check if the container is signalled to be killed. if (!shouldLaunchContainer.CompareAndSet(false, true)) { Log.Info("Container " + containerIdStr + " not launched as " + "cleanup already called" ); ret = ContainerExecutor.ExitCode.Terminated.GetExitCode(); } else { exec.ActivateContainer(containerID, pidFilePath); ret = exec.LaunchContainer(container, nmPrivateContainerScriptPath, nmPrivateTokensPath , user, appIdStr, containerWorkDir, localDirs, logDirs); } } catch (Exception e) { Log.Warn("Failed to launch container.", e); dispatcher.GetEventHandler().Handle(new ContainerExitEvent(containerID, ContainerEventType .ContainerExitedWithFailure, ret, e.Message)); return(ret); } finally { completed.Set(true); exec.DeactivateContainer(containerID); try { context.GetNMStateStore().StoreContainerCompleted(containerID, ret); } catch (IOException) { Log.Error("Unable to set exit code for container " + containerID); } } if (Log.IsDebugEnabled()) { Log.Debug("Container " + containerIdStr + " completed with exit code " + ret); } if (ret == ContainerExecutor.ExitCode.ForceKilled.GetExitCode() || ret == ContainerExecutor.ExitCode .Terminated.GetExitCode()) { // If the process was killed, Send container_cleanedup_after_kill and // just break out of this method. dispatcher.GetEventHandler().Handle(new ContainerExitEvent(containerID, ContainerEventType .ContainerKilledOnRequest, ret, "Container exited with a non-zero exit code " + ret)); return(ret); } if (ret != 0) { Log.Warn("Container exited with a non-zero exit code " + ret); this.dispatcher.GetEventHandler().Handle(new ContainerExitEvent(containerID, ContainerEventType .ContainerExitedWithFailure, ret, "Container exited with a non-zero exit code " + ret)); return(ret); } Log.Info("Container " + containerIdStr + " succeeded "); dispatcher.GetEventHandler().Handle(new ContainerEvent(containerID, ContainerEventType .ContainerExitedWithSuccess)); return(0); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.Exception"/> public virtual void TestClearLocalDirWhenNodeReboot() { nm = new TestNodeManagerReboot.MyNodeManager(this); nm.Start(); ContainerManagementProtocol containerManager = nm.GetContainerManager(); // create files under fileCache CreateFiles(nmLocalDir.GetAbsolutePath(), ContainerLocalizer.Filecache, 100); localResourceDir.Mkdirs(); ContainerLaunchContext containerLaunchContext = Records.NewRecord <ContainerLaunchContext >(); // Construct the Container-id ContainerId cId = CreateContainerId(); URL localResourceUri = ConverterUtils.GetYarnUrlFromPath(localFS.MakeQualified(new Path(localResourceDir.GetAbsolutePath()))); LocalResource localResource = LocalResource.NewInstance(localResourceUri, LocalResourceType .File, LocalResourceVisibility.Application, -1, localResourceDir.LastModified()); string destinationFile = "dest_file"; IDictionary <string, LocalResource> localResources = new Dictionary <string, LocalResource >(); localResources[destinationFile] = localResource; containerLaunchContext.SetLocalResources(localResources); IList <string> commands = new AList <string>(); containerLaunchContext.SetCommands(commands); NodeId nodeId = nm.GetNMContext().GetNodeId(); StartContainerRequest scRequest = StartContainerRequest.NewInstance(containerLaunchContext , TestContainerManager.CreateContainerToken(cId, 0, nodeId, destinationFile, nm. GetNMContext().GetContainerTokenSecretManager())); IList <StartContainerRequest> list = new AList <StartContainerRequest>(); list.AddItem(scRequest); StartContainersRequest allRequests = StartContainersRequest.NewInstance(list); UserGroupInformation currentUser = UserGroupInformation.CreateRemoteUser(cId.GetApplicationAttemptId ().ToString()); NMTokenIdentifier nmIdentifier = new NMTokenIdentifier(cId.GetApplicationAttemptId (), nodeId, user, 123); currentUser.AddTokenIdentifier(nmIdentifier); currentUser.DoAs(new _PrivilegedExceptionAction_152(this, allRequests)); IList <ContainerId> containerIds = new AList <ContainerId>(); containerIds.AddItem(cId); GetContainerStatusesRequest request = GetContainerStatusesRequest.NewInstance(containerIds ); Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container container = nm.GetNMContext().GetContainers()[request.GetContainerIds()[0]]; int MaxTries = 20; int numTries = 0; while (!container.GetContainerState().Equals(ContainerState.Done) && numTries <= MaxTries) { try { Sharpen.Thread.Sleep(500); } catch (Exception) { } // Do nothing numTries++; } NUnit.Framework.Assert.AreEqual(ContainerState.Done, container.GetContainerState( )); NUnit.Framework.Assert.IsTrue("The container should create a subDir named currentUser: "******"under localDir/usercache", NumOfLocalDirs(nmLocalDir.GetAbsolutePath( ), ContainerLocalizer.Usercache) > 0); NUnit.Framework.Assert.IsTrue("There should be files or Dirs under nm_private when " + "container is launched", NumOfLocalDirs(nmLocalDir.GetAbsolutePath(), ResourceLocalizationService .NmPrivateDir) > 0); // restart the NodeManager RestartNM(MaxTries); CheckNumOfLocalDirs(); Org.Mockito.Mockito.Verify(delService, Org.Mockito.Mockito.Times(1)).Delete((string )Matchers.IsNull(), Matchers.ArgThat(new TestNodeManagerReboot.PathInclude(this, ResourceLocalizationService.NmPrivateDir + "_DEL_"))); Org.Mockito.Mockito.Verify(delService, Org.Mockito.Mockito.Times(1)).Delete((string )Matchers.IsNull(), Matchers.ArgThat(new TestNodeManagerReboot.PathInclude(this, ContainerLocalizer.Filecache + "_DEL_"))); Org.Mockito.Mockito.Verify(delService, Org.Mockito.Mockito.Times(1)).ScheduleFileDeletionTask (Matchers.ArgThat(new TestNodeManagerReboot.FileDeletionInclude(this, user, null , new string[] { destinationFile }))); Org.Mockito.Mockito.Verify(delService, Org.Mockito.Mockito.Times(1)).ScheduleFileDeletionTask (Matchers.ArgThat(new TestNodeManagerReboot.FileDeletionInclude(this, null, ContainerLocalizer .Usercache + "_DEL_", new string[] { }))); // restart the NodeManager again // this time usercache directory should be empty RestartNM(MaxTries); CheckNumOfLocalDirs(); }