Exemple #1
0
        public virtual void RemoveOrTrackCompletedContainersFromContext(IList <ContainerId
                                                                               > containerIds)
        {
            ICollection <ContainerId> removedContainers = new HashSet <ContainerId>();

            Sharpen.Collections.AddAll(pendingContainersToRemove, containerIds);
            IEnumerator <ContainerId> iter = pendingContainersToRemove.GetEnumerator();

            while (iter.HasNext())
            {
                ContainerId containerId = iter.Next();
                // remove the container only if the container is at DONE state
                Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container nmContainer
                    = context.GetContainers()[containerId];
                if (nmContainer == null)
                {
                    iter.Remove();
                }
                else
                {
                    if (nmContainer.GetContainerState().Equals(ContainerState.Done))
                    {
                        Sharpen.Collections.Remove(context.GetContainers(), containerId);
                        removedContainers.AddItem(containerId);
                        iter.Remove();
                    }
                }
            }
            if (!removedContainers.IsEmpty())
            {
                Log.Info("Removed completed containers from NM context: " + removedContainers);
            }
            pendingCompletedContainers.Clear();
        }
 /// <summary>Finds the log file with the given filename for the given container.</summary>
 /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
 public static FilePath GetContainerLogFile(ContainerId containerId, string fileName
                                            , string remoteUser, Context context)
 {
     Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container container
         = context.GetContainers()[containerId];
     Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Application.Application
         application = GetApplicationForContainer(containerId, context);
     CheckAccess(remoteUser, application, context);
     if (container != null)
     {
         CheckState(container.GetContainerState());
     }
     try
     {
         LocalDirsHandlerService dirsHandler = context.GetLocalDirsHandler();
         string relativeContainerLogDir      = ContainerLaunch.GetRelativeContainerLogDir(application
                                                                                          .GetAppId().ToString(), containerId.ToString());
         Path logPath = dirsHandler.GetLogPathToRead(relativeContainerLogDir + Path.Separator
                                                     + fileName);
         URI      logPathURI = new FilePath(logPath.ToString()).ToURI();
         FilePath logFile    = new FilePath(logPathURI.GetPath());
         return(logFile);
     }
     catch (IOException e)
     {
         Log.Warn("Failed to find log file", e);
         throw new NotFoundException("Cannot find this log on the local disk.");
     }
 }
 /// <summary>
 /// Finds the local directories that logs for the given container are stored
 /// on.
 /// </summary>
 /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
 public static IList <FilePath> GetContainerLogDirs(ContainerId containerId, string
                                                    remoteUser, Context context)
 {
     Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container container
         = context.GetContainers()[containerId];
     Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Application.Application
         application = GetApplicationForContainer(containerId, context);
     CheckAccess(remoteUser, application, context);
     // It is not required to have null check for container ( container == null )
     // and throw back exception.Because when container is completed, NodeManager
     // remove container information from its NMContext.Configuring log
     // aggregation to false, container log view request is forwarded to NM. NM
     // does not have completed container information,but still NM serve request for
     // reading container logs.
     if (container != null)
     {
         CheckState(container.GetContainerState());
     }
     return(GetContainerLogDirs(containerId, context.GetLocalDirsHandler()));
 }
        public virtual int Call()
        {
            // dispatcher not typed
            ContainerLaunchContext launchContext = container.GetLaunchContext();
            IDictionary <Path, IList <string> > localResources = null;
            ContainerId    containerID    = container.GetContainerId();
            string         containerIdStr = ConverterUtils.ToString(containerID);
            IList <string> command        = launchContext.GetCommands();
            int            ret            = -1;

            // CONTAINER_KILLED_ON_REQUEST should not be missed if the container
            // is already at KILLING
            if (container.GetContainerState() == ContainerState.Killing)
            {
                dispatcher.GetEventHandler().Handle(new ContainerExitEvent(containerID, ContainerEventType
                                                                           .ContainerKilledOnRequest, Shell.Windows ? ContainerExecutor.ExitCode.ForceKilled
                                                                           .GetExitCode() : ContainerExecutor.ExitCode.Terminated.GetExitCode(), "Container terminated before launch."
                                                                           ));
                return(0);
            }
            try
            {
                localResources = container.GetLocalizedResources();
                if (localResources == null)
                {
                    throw RPCUtil.GetRemoteException("Unable to get local resources when Container "
                                                     + containerID + " is at " + container.GetContainerState());
                }
                string user = container.GetUser();
                // /////////////////////////// Variable expansion
                // Before the container script gets written out.
                IList <string> newCmds  = new AList <string>(command.Count);
                string         appIdStr = app.GetAppId().ToString();
                string         relativeContainerLogDir = Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Launcher.ContainerLaunch
                                                         .GetRelativeContainerLogDir(appIdStr, containerIdStr);
                Path containerLogDir = dirsHandler.GetLogPathForWrite(relativeContainerLogDir, false
                                                                      );
                foreach (string str in command)
                {
                    // TODO: Should we instead work via symlinks without this grammar?
                    newCmds.AddItem(ExpandEnvironment(str, containerLogDir));
                }
                launchContext.SetCommands(newCmds);
                IDictionary <string, string> environment = launchContext.GetEnvironment();
                // Make a copy of env to iterate & do variable expansion
                foreach (KeyValuePair <string, string> entry in environment)
                {
                    string value = entry.Value;
                    value = ExpandEnvironment(value, containerLogDir);
                    entry.SetValue(value);
                }
                // /////////////////////////// End of variable expansion
                FileContext lfs = FileContext.GetLocalFSFileContext();
                Path        nmPrivateContainerScriptPath = dirsHandler.GetLocalPathForWrite(GetContainerPrivateDir
                                                                                                (appIdStr, containerIdStr) + Path.Separator + ContainerScript);
                Path nmPrivateTokensPath = dirsHandler.GetLocalPathForWrite(GetContainerPrivateDir
                                                                                (appIdStr, containerIdStr) + Path.Separator + string.Format(ContainerLocalizer.TokenFileNameFmt
                                                                                                                                            , containerIdStr));
                Path nmPrivateClasspathJarDir = dirsHandler.GetLocalPathForWrite(GetContainerPrivateDir
                                                                                     (appIdStr, containerIdStr));
                DataOutputStream containerScriptOutStream = null;
                DataOutputStream tokensOutStream          = null;
                // Select the working directory for the container
                Path containerWorkDir = dirsHandler.GetLocalPathForWrite(ContainerLocalizer.Usercache
                                                                         + Path.Separator + user + Path.Separator + ContainerLocalizer.Appcache + Path.Separator
                                                                         + appIdStr + Path.Separator + containerIdStr, LocalDirAllocator.SizeUnknown, false
                                                                         );
                string pidFileSubpath = GetPidFileSubpath(appIdStr, containerIdStr);
                // pid file should be in nm private dir so that it is not
                // accessible by users
                pidFilePath = dirsHandler.GetLocalPathForWrite(pidFileSubpath);
                IList <string> localDirs        = dirsHandler.GetLocalDirs();
                IList <string> logDirs          = dirsHandler.GetLogDirs();
                IList <string> containerLogDirs = new AList <string>();
                foreach (string logDir in logDirs)
                {
                    containerLogDirs.AddItem(logDir + Path.Separator + relativeContainerLogDir);
                }
                if (!dirsHandler.AreDisksHealthy())
                {
                    ret = ContainerExitStatus.DisksFailed;
                    throw new IOException("Most of the disks failed. " + dirsHandler.GetDisksHealthReport
                                              (false));
                }
                try
                {
                    // /////////// Write out the container-script in the nmPrivate space.
                    IList <Path> appDirs = new AList <Path>(localDirs.Count);
                    foreach (string localDir in localDirs)
                    {
                        Path usersdir = new Path(localDir, ContainerLocalizer.Usercache);
                        Path userdir  = new Path(usersdir, user);
                        Path appsdir  = new Path(userdir, ContainerLocalizer.Appcache);
                        appDirs.AddItem(new Path(appsdir, appIdStr));
                    }
                    containerScriptOutStream = lfs.Create(nmPrivateContainerScriptPath, EnumSet.Of(CreateFlag
                                                                                                   .Create, CreateFlag.Overwrite));
                    // Set the token location too.
                    environment[ApplicationConstants.ContainerTokenFileEnvName] = new Path(containerWorkDir
                                                                                           , FinalContainerTokensFile).ToUri().GetPath();
                    // Sanitize the container's environment
                    SanitizeEnv(environment, containerWorkDir, appDirs, containerLogDirs, localResources
                                , nmPrivateClasspathJarDir);
                    // Write out the environment
                    exec.WriteLaunchEnv(containerScriptOutStream, environment, localResources, launchContext
                                        .GetCommands());
                    // /////////// End of writing out container-script
                    // /////////// Write out the container-tokens in the nmPrivate space.
                    tokensOutStream = lfs.Create(nmPrivateTokensPath, EnumSet.Of(CreateFlag.Create, CreateFlag
                                                                                 .Overwrite));
                    Credentials creds = container.GetCredentials();
                    creds.WriteTokenStorageToStream(tokensOutStream);
                }
                finally
                {
                    // /////////// End of writing out container-tokens
                    IOUtils.Cleanup(Log, containerScriptOutStream, tokensOutStream);
                }
                // LaunchContainer is a blocking call. We are here almost means the
                // container is launched, so send out the event.
                dispatcher.GetEventHandler().Handle(new ContainerEvent(containerID, ContainerEventType
                                                                       .ContainerLaunched));
                context.GetNMStateStore().StoreContainerLaunched(containerID);
                // Check if the container is signalled to be killed.
                if (!shouldLaunchContainer.CompareAndSet(false, true))
                {
                    Log.Info("Container " + containerIdStr + " not launched as " + "cleanup already called"
                             );
                    ret = ContainerExecutor.ExitCode.Terminated.GetExitCode();
                }
                else
                {
                    exec.ActivateContainer(containerID, pidFilePath);
                    ret = exec.LaunchContainer(container, nmPrivateContainerScriptPath, nmPrivateTokensPath
                                               , user, appIdStr, containerWorkDir, localDirs, logDirs);
                }
            }
            catch (Exception e)
            {
                Log.Warn("Failed to launch container.", e);
                dispatcher.GetEventHandler().Handle(new ContainerExitEvent(containerID, ContainerEventType
                                                                           .ContainerExitedWithFailure, ret, e.Message));
                return(ret);
            }
            finally
            {
                completed.Set(true);
                exec.DeactivateContainer(containerID);
                try
                {
                    context.GetNMStateStore().StoreContainerCompleted(containerID, ret);
                }
                catch (IOException)
                {
                    Log.Error("Unable to set exit code for container " + containerID);
                }
            }
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Container " + containerIdStr + " completed with exit code " + ret);
            }
            if (ret == ContainerExecutor.ExitCode.ForceKilled.GetExitCode() || ret == ContainerExecutor.ExitCode
                .Terminated.GetExitCode())
            {
                // If the process was killed, Send container_cleanedup_after_kill and
                // just break out of this method.
                dispatcher.GetEventHandler().Handle(new ContainerExitEvent(containerID, ContainerEventType
                                                                           .ContainerKilledOnRequest, ret, "Container exited with a non-zero exit code " +
                                                                           ret));
                return(ret);
            }
            if (ret != 0)
            {
                Log.Warn("Container exited with a non-zero exit code " + ret);
                this.dispatcher.GetEventHandler().Handle(new ContainerExitEvent(containerID, ContainerEventType
                                                                                .ContainerExitedWithFailure, ret, "Container exited with a non-zero exit code "
                                                                                + ret));
                return(ret);
            }
            Log.Info("Container " + containerIdStr + " succeeded ");
            dispatcher.GetEventHandler().Handle(new ContainerEvent(containerID, ContainerEventType
                                                                   .ContainerExitedWithSuccess));
            return(0);
        }
Exemple #5
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestClearLocalDirWhenNodeReboot()
        {
            nm = new TestNodeManagerReboot.MyNodeManager(this);
            nm.Start();
            ContainerManagementProtocol containerManager = nm.GetContainerManager();

            // create files under fileCache
            CreateFiles(nmLocalDir.GetAbsolutePath(), ContainerLocalizer.Filecache, 100);
            localResourceDir.Mkdirs();
            ContainerLaunchContext containerLaunchContext = Records.NewRecord <ContainerLaunchContext
                                                                               >();
            // Construct the Container-id
            ContainerId cId = CreateContainerId();
            URL         localResourceUri = ConverterUtils.GetYarnUrlFromPath(localFS.MakeQualified(new
                                                                                                   Path(localResourceDir.GetAbsolutePath())));
            LocalResource localResource = LocalResource.NewInstance(localResourceUri, LocalResourceType
                                                                    .File, LocalResourceVisibility.Application, -1, localResourceDir.LastModified());
            string destinationFile = "dest_file";
            IDictionary <string, LocalResource> localResources = new Dictionary <string, LocalResource
                                                                                 >();

            localResources[destinationFile] = localResource;
            containerLaunchContext.SetLocalResources(localResources);
            IList <string> commands = new AList <string>();

            containerLaunchContext.SetCommands(commands);
            NodeId nodeId = nm.GetNMContext().GetNodeId();
            StartContainerRequest scRequest = StartContainerRequest.NewInstance(containerLaunchContext
                                                                                , TestContainerManager.CreateContainerToken(cId, 0, nodeId, destinationFile, nm.
                                                                                                                            GetNMContext().GetContainerTokenSecretManager()));
            IList <StartContainerRequest> list = new AList <StartContainerRequest>();

            list.AddItem(scRequest);
            StartContainersRequest allRequests = StartContainersRequest.NewInstance(list);
            UserGroupInformation   currentUser = UserGroupInformation.CreateRemoteUser(cId.GetApplicationAttemptId
                                                                                           ().ToString());
            NMTokenIdentifier nmIdentifier = new NMTokenIdentifier(cId.GetApplicationAttemptId
                                                                       (), nodeId, user, 123);

            currentUser.AddTokenIdentifier(nmIdentifier);
            currentUser.DoAs(new _PrivilegedExceptionAction_152(this, allRequests));
            IList <ContainerId> containerIds = new AList <ContainerId>();

            containerIds.AddItem(cId);
            GetContainerStatusesRequest request = GetContainerStatusesRequest.NewInstance(containerIds
                                                                                          );

            Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container container
                = nm.GetNMContext().GetContainers()[request.GetContainerIds()[0]];
            int MaxTries = 20;
            int numTries = 0;

            while (!container.GetContainerState().Equals(ContainerState.Done) && numTries <=
                   MaxTries)
            {
                try
                {
                    Sharpen.Thread.Sleep(500);
                }
                catch (Exception)
                {
                }
                // Do nothing
                numTries++;
            }
            NUnit.Framework.Assert.AreEqual(ContainerState.Done, container.GetContainerState(
                                                ));
            NUnit.Framework.Assert.IsTrue("The container should create a subDir named currentUser: "******"under localDir/usercache", NumOfLocalDirs(nmLocalDir.GetAbsolutePath(
                                                                                                  ), ContainerLocalizer.Usercache) > 0);
            NUnit.Framework.Assert.IsTrue("There should be files or Dirs under nm_private when "
                                          + "container is launched", NumOfLocalDirs(nmLocalDir.GetAbsolutePath(), ResourceLocalizationService
                                                                                    .NmPrivateDir) > 0);
            // restart the NodeManager
            RestartNM(MaxTries);
            CheckNumOfLocalDirs();
            Org.Mockito.Mockito.Verify(delService, Org.Mockito.Mockito.Times(1)).Delete((string
                                                                                         )Matchers.IsNull(), Matchers.ArgThat(new TestNodeManagerReboot.PathInclude(this,
                                                                                                                                                                    ResourceLocalizationService.NmPrivateDir + "_DEL_")));
            Org.Mockito.Mockito.Verify(delService, Org.Mockito.Mockito.Times(1)).Delete((string
                                                                                         )Matchers.IsNull(), Matchers.ArgThat(new TestNodeManagerReboot.PathInclude(this,
                                                                                                                                                                    ContainerLocalizer.Filecache + "_DEL_")));
            Org.Mockito.Mockito.Verify(delService, Org.Mockito.Mockito.Times(1)).ScheduleFileDeletionTask
                (Matchers.ArgThat(new TestNodeManagerReboot.FileDeletionInclude(this, user, null
                                                                                , new string[] { destinationFile })));
            Org.Mockito.Mockito.Verify(delService, Org.Mockito.Mockito.Times(1)).ScheduleFileDeletionTask
                (Matchers.ArgThat(new TestNodeManagerReboot.FileDeletionInclude(this, null, ContainerLocalizer
                                                                                .Usercache + "_DEL_", new string[] {  })));
            // restart the NodeManager again
            // this time usercache directory should be empty
            RestartNM(MaxTries);
            CheckNumOfLocalDirs();
        }