/// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> private void Launch() { Connect(); ContainerId masterContainerID = masterContainer.GetId(); ApplicationSubmissionContext applicationContext = application.GetSubmissionContext (); Log.Info("Setting up container " + masterContainer + " for AM " + application.GetAppAttemptId ()); ContainerLaunchContext launchContext = CreateAMContainerLaunchContext(applicationContext , masterContainerID); StartContainerRequest scRequest = StartContainerRequest.NewInstance(launchContext , masterContainer.GetContainerToken()); IList <StartContainerRequest> list = new AList <StartContainerRequest>(); list.AddItem(scRequest); StartContainersRequest allRequests = StartContainersRequest.NewInstance(list); StartContainersResponse response = containerMgrProxy.StartContainers(allRequests); if (response.GetFailedRequests() != null && response.GetFailedRequests().Contains (masterContainerID)) { Exception t = response.GetFailedRequests()[masterContainerID].DeSerialize(); ParseAndThrowException(t); } else { Log.Info("Done launching container " + masterContainer + " for AM " + application .GetAppAttemptId()); } }
/// <exception cref="System.Exception"/> private void StartContainer(YarnRPC rpc, Token nmToken, Token containerToken, NodeId nodeId, string user) { ContainerLaunchContext context = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <ContainerLaunchContext >(); StartContainerRequest scRequest = StartContainerRequest.NewInstance(context, containerToken ); IList <StartContainerRequest> list = new AList <StartContainerRequest>(); list.AddItem(scRequest); StartContainersRequest allRequests = StartContainersRequest.NewInstance(list); ContainerManagementProtocol proxy = null; try { proxy = GetContainerManagementProtocolProxy(rpc, nmToken, nodeId, user); StartContainersResponse response = proxy.StartContainers(allRequests); foreach (SerializedException ex in response.GetFailedRequests().Values) { ParseAndThrowException(ex.DeSerialize()); } } finally { if (proxy != null) { rpc.StopProxy(proxy, conf); } } }
public virtual void TestMultipleContainersLaunch() { containerManager.Start(); IList <StartContainerRequest> list = new AList <StartContainerRequest>(); ContainerLaunchContext containerLaunchContext = recordFactory.NewRecordInstance <ContainerLaunchContext >(); for (int i = 0; i < 10; i++) { ContainerId cId = CreateContainerId(i); long identifier = 0; if ((i & 1) == 0) { // container with even id fail identifier = ResourceManagerConstants.RmInvalidIdentifier; } else { identifier = DummyRmIdentifier; } Token containerToken = CreateContainerToken(cId, identifier, context.GetNodeId(), user, context.GetContainerTokenSecretManager()); StartContainerRequest request = StartContainerRequest.NewInstance(containerLaunchContext , containerToken); list.AddItem(request); } StartContainersRequest requestList = StartContainersRequest.NewInstance(list); StartContainersResponse response = containerManager.StartContainers(requestList); NUnit.Framework.Assert.AreEqual(5, response.GetSuccessfullyStartedContainers().Count ); foreach (ContainerId id in response.GetSuccessfullyStartedContainers()) { // Containers with odd id should succeed. NUnit.Framework.Assert.AreEqual(1, id.GetContainerId() & 1); } NUnit.Framework.Assert.AreEqual(5, response.GetFailedRequests().Count); foreach (KeyValuePair <ContainerId, SerializedException> entry in response.GetFailedRequests ()) { // Containers with even id should fail. NUnit.Framework.Assert.AreEqual(0, entry.Key.GetContainerId() & 1); NUnit.Framework.Assert.IsTrue(entry.Value.GetMessage().Contains("Container " + entry .Key + " rejected as it is allocated by a previous RM")); } }
public virtual void TestStartContainerFailureWithUnknownAuxService() { conf.SetStrings(YarnConfiguration.NmAuxServices, new string[] { "existService" }); conf.SetClass(string.Format(YarnConfiguration.NmAuxServiceFmt, "existService"), typeof( TestAuxServices.ServiceA), typeof(Org.Apache.Hadoop.Service.Service)); containerManager.Start(); IList <StartContainerRequest> startRequest = new AList <StartContainerRequest>(); ContainerLaunchContext containerLaunchContext = recordFactory.NewRecordInstance <ContainerLaunchContext >(); IDictionary <string, ByteBuffer> serviceData = new Dictionary <string, ByteBuffer>( ); string serviceName = "non_exist_auxService"; serviceData[serviceName] = ByteBuffer.Wrap(Sharpen.Runtime.GetBytesForString(serviceName )); containerLaunchContext.SetServiceData(serviceData); ContainerId cId = CreateContainerId(0); string user = "******"; Token containerToken = CreateContainerToken(cId, DummyRmIdentifier, context.GetNodeId (), user, context.GetContainerTokenSecretManager()); StartContainerRequest request = StartContainerRequest.NewInstance(containerLaunchContext , containerToken); // start containers startRequest.AddItem(request); StartContainersRequest requestList = StartContainersRequest.NewInstance(startRequest ); StartContainersResponse response = containerManager.StartContainers(requestList); NUnit.Framework.Assert.IsTrue(response.GetFailedRequests().Count == 1); NUnit.Framework.Assert.IsTrue(response.GetSuccessfullyStartedContainers().Count == 0); NUnit.Framework.Assert.IsTrue(response.GetFailedRequests().Contains(cId)); NUnit.Framework.Assert.IsTrue(response.GetFailedRequests()[cId].GetMessage().Contains ("The auxService:" + serviceName + " does not exist")); }
public virtual void TestApplicationRecovery() { YarnConfiguration conf = new YarnConfiguration(); conf.SetBoolean(YarnConfiguration.NmRecoveryEnabled, true); conf.Set(YarnConfiguration.NmAddress, "localhost:1234"); conf.SetBoolean(YarnConfiguration.YarnAclEnable, true); conf.Set(YarnConfiguration.YarnAdminAcl, "yarn_admin_user"); NMStateStoreService stateStore = new NMMemoryStateStoreService(); stateStore.Init(conf); stateStore.Start(); Context context = new NodeManager.NMContext(new NMContainerTokenSecretManager(conf ), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf), stateStore ); ContainerManagerImpl cm = CreateContainerManager(context); cm.Init(conf); cm.Start(); // simulate registration with RM MasterKey masterKey = new MasterKeyPBImpl(); masterKey.SetKeyId(123); masterKey.SetBytes(ByteBuffer.Wrap(new byte[] { 123 })); context.GetContainerTokenSecretManager().SetMasterKey(masterKey); context.GetNMTokenSecretManager().SetMasterKey(masterKey); // add an application by starting a container string appUser = "******"; string modUser = "******"; string viewUser = "******"; string enemyUser = "******"; ApplicationId appId = ApplicationId.NewInstance(0, 1); ApplicationAttemptId attemptId = ApplicationAttemptId.NewInstance(appId, 1); ContainerId cid = ContainerId.NewContainerId(attemptId, 1); IDictionary <string, LocalResource> localResources = Collections.EmptyMap(); IDictionary <string, string> containerEnv = Sharpen.Collections.EmptyMap(); IList <string> containerCmds = Sharpen.Collections.EmptyList(); IDictionary <string, ByteBuffer> serviceData = Sharpen.Collections.EmptyMap(); Credentials containerCreds = new Credentials(); DataOutputBuffer dob = new DataOutputBuffer(); containerCreds.WriteTokenStorageToStream(dob); ByteBuffer containerTokens = ByteBuffer.Wrap(dob.GetData(), 0, dob.GetLength()); IDictionary <ApplicationAccessType, string> acls = new Dictionary <ApplicationAccessType , string>(); acls[ApplicationAccessType.ModifyApp] = modUser; acls[ApplicationAccessType.ViewApp] = viewUser; ContainerLaunchContext clc = ContainerLaunchContext.NewInstance(localResources, containerEnv , containerCmds, serviceData, containerTokens, acls); // create the logAggregationContext LogAggregationContext logAggregationContext = LogAggregationContext.NewInstance("includePattern" , "excludePattern", "includePatternInRollingAggregation", "excludePatternInRollingAggregation" ); StartContainersResponse startResponse = StartContainer(context, cm, cid, clc, logAggregationContext ); NUnit.Framework.Assert.IsTrue(startResponse.GetFailedRequests().IsEmpty()); NUnit.Framework.Assert.AreEqual(1, context.GetApplications().Count); Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Application.Application app = context.GetApplications()[appId]; NUnit.Framework.Assert.IsNotNull(app); WaitForAppState(app, ApplicationState.Initing); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(modUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ViewApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(enemyUser), ApplicationAccessType.ViewApp, appUser, appId)); // reset container manager and verify app recovered with proper acls cm.Stop(); context = new NodeManager.NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf), stateStore); cm = CreateContainerManager(context); cm.Init(conf); cm.Start(); NUnit.Framework.Assert.AreEqual(1, context.GetApplications().Count); app = context.GetApplications()[appId]; NUnit.Framework.Assert.IsNotNull(app); // check whether LogAggregationContext is recovered correctly LogAggregationContext recovered = ((ApplicationImpl)app).GetLogAggregationContext (); NUnit.Framework.Assert.IsNotNull(recovered); NUnit.Framework.Assert.AreEqual(logAggregationContext.GetIncludePattern(), recovered .GetIncludePattern()); NUnit.Framework.Assert.AreEqual(logAggregationContext.GetExcludePattern(), recovered .GetExcludePattern()); NUnit.Framework.Assert.AreEqual(logAggregationContext.GetRolledLogsIncludePattern (), recovered.GetRolledLogsIncludePattern()); NUnit.Framework.Assert.AreEqual(logAggregationContext.GetRolledLogsExcludePattern (), recovered.GetRolledLogsExcludePattern()); WaitForAppState(app, ApplicationState.Initing); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(modUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ViewApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(enemyUser), ApplicationAccessType.ViewApp, appUser, appId)); // simulate application completion IList <ApplicationId> finishedApps = new AList <ApplicationId>(); finishedApps.AddItem(appId); cm.Handle(new CMgrCompletedAppsEvent(finishedApps, CMgrCompletedAppsEvent.Reason. ByResourcemanager)); WaitForAppState(app, ApplicationState.ApplicationResourcesCleaningup); // restart and verify app is marked for finishing cm.Stop(); context = new NodeManager.NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf), stateStore); cm = CreateContainerManager(context); cm.Init(conf); cm.Start(); NUnit.Framework.Assert.AreEqual(1, context.GetApplications().Count); app = context.GetApplications()[appId]; NUnit.Framework.Assert.IsNotNull(app); WaitForAppState(app, ApplicationState.ApplicationResourcesCleaningup); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(modUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ViewApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(enemyUser), ApplicationAccessType.ViewApp, appUser, appId)); // simulate log aggregation completion app.Handle(new ApplicationEvent(app.GetAppId(), ApplicationEventType.ApplicationResourcesCleanedup )); NUnit.Framework.Assert.AreEqual(app.GetApplicationState(), ApplicationState.Finished ); app.Handle(new ApplicationEvent(app.GetAppId(), ApplicationEventType.ApplicationLogHandlingFinished )); // restart and verify app is no longer present after recovery cm.Stop(); context = new NodeManager.NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf), stateStore); cm = CreateContainerManager(context); cm.Init(conf); cm.Start(); NUnit.Framework.Assert.IsTrue(context.GetApplications().IsEmpty()); cm.Stop(); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public override IDictionary <string, ByteBuffer> StartContainer(Container container , ContainerLaunchContext containerLaunchContext) { // Do synchronization on StartedContainer to prevent race condition // between startContainer and stopContainer only when startContainer is // in progress for a given container. NMClientImpl.StartedContainer startingContainer = CreateStartedContainer(container ); lock (startingContainer) { AddStartingContainer(startingContainer); IDictionary <string, ByteBuffer> allServiceResponse; ContainerManagementProtocolProxy.ContainerManagementProtocolProxyData proxy = null; try { proxy = cmProxy.GetProxy(container.GetNodeId().ToString(), container.GetId()); StartContainerRequest scRequest = StartContainerRequest.NewInstance(containerLaunchContext , container.GetContainerToken()); IList <StartContainerRequest> list = new AList <StartContainerRequest>(); list.AddItem(scRequest); StartContainersRequest allRequests = StartContainersRequest.NewInstance(list); StartContainersResponse response = proxy.GetContainerManagementProtocol().StartContainers (allRequests); if (response.GetFailedRequests() != null && response.GetFailedRequests().Contains (container.GetId())) { Exception t = response.GetFailedRequests()[container.GetId()].DeSerialize(); ParseAndThrowException(t); } allServiceResponse = response.GetAllServicesMetaData(); startingContainer.state = ContainerState.Running; } catch (YarnException e) { startingContainer.state = ContainerState.Complete; // Remove the started container if it failed to start RemoveStartedContainer(startingContainer); throw; } catch (IOException e) { startingContainer.state = ContainerState.Complete; RemoveStartedContainer(startingContainer); throw; } catch (Exception t) { startingContainer.state = ContainerState.Complete; RemoveStartedContainer(startingContainer); throw RPCUtil.GetRemoteException(t); } finally { if (proxy != null) { cmProxy.MayBeCloseProxy(proxy); } } return(allServiceResponse); } }
public virtual void TestContainerLaunchFromPreviousRM() { containerManager.Start(); ContainerLaunchContext containerLaunchContext = recordFactory.NewRecordInstance <ContainerLaunchContext >(); ContainerId cId1 = CreateContainerId(0); ContainerId cId2 = CreateContainerId(0); containerLaunchContext.SetLocalResources(new Dictionary <string, LocalResource>()); // Construct the Container with Invalid RMIdentifier StartContainerRequest startRequest1 = StartContainerRequest.NewInstance(containerLaunchContext , CreateContainerToken(cId1, ResourceManagerConstants.RmInvalidIdentifier, context .GetNodeId(), user, context.GetContainerTokenSecretManager())); IList <StartContainerRequest> list = new AList <StartContainerRequest>(); list.AddItem(startRequest1); StartContainersRequest allRequests = StartContainersRequest.NewInstance(list); containerManager.StartContainers(allRequests); bool catchException = false; try { StartContainersResponse response = containerManager.StartContainers(allRequests); if (response.GetFailedRequests().Contains(cId1)) { throw response.GetFailedRequests()[cId1].DeSerialize(); } } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); catchException = true; NUnit.Framework.Assert.IsTrue(e.Message.Contains("Container " + cId1 + " rejected as it is allocated by a previous RM" )); NUnit.Framework.Assert.IsTrue(Sharpen.Runtime.EqualsIgnoreCase(e.GetType().FullName , typeof(InvalidContainerException).FullName)); } // Verify that startContainer fail because of invalid container request NUnit.Framework.Assert.IsTrue(catchException); // Construct the Container with a RMIdentifier within current RM StartContainerRequest startRequest2 = StartContainerRequest.NewInstance(containerLaunchContext , CreateContainerToken(cId2, DummyRmIdentifier, context.GetNodeId(), user, context .GetContainerTokenSecretManager())); IList <StartContainerRequest> list2 = new AList <StartContainerRequest>(); list.AddItem(startRequest2); StartContainersRequest allRequests2 = StartContainersRequest.NewInstance(list2); containerManager.StartContainers(allRequests2); bool noException = true; try { containerManager.StartContainers(allRequests2); } catch (YarnException) { noException = false; } // Verify that startContainer get no YarnException NUnit.Framework.Assert.IsTrue(noException); }
public virtual void Launch(ContainerRemoteLaunchEvent @event) { lock (this) { ContainerLauncherImpl.Log.Info("Launching " + this.taskAttemptID); if (this.state == ContainerLauncherImpl.ContainerState.KilledBeforeLaunch) { this.state = ContainerLauncherImpl.ContainerState.Done; this._enclosing.SendContainerLaunchFailedMsg(this.taskAttemptID, "Container was killed before it was launched" ); return; } ContainerManagementProtocolProxy.ContainerManagementProtocolProxyData proxy = null; try { proxy = this._enclosing.GetCMProxy(this.containerMgrAddress, this.containerID); // Construct the actual Container ContainerLaunchContext containerLaunchContext = @event.GetContainerLaunchContext( ); // Now launch the actual container StartContainerRequest startRequest = StartContainerRequest.NewInstance(containerLaunchContext , @event.GetContainerToken()); IList <StartContainerRequest> list = new AList <StartContainerRequest>(); list.AddItem(startRequest); StartContainersRequest requestList = StartContainersRequest.NewInstance(list); StartContainersResponse response = proxy.GetContainerManagementProtocol().StartContainers (requestList); if (response.GetFailedRequests() != null && response.GetFailedRequests().Contains (this.containerID)) { throw response.GetFailedRequests()[this.containerID].DeSerialize(); } ByteBuffer portInfo = response.GetAllServicesMetaData()[ShuffleHandler.MapreduceShuffleServiceid ]; int port = -1; if (portInfo != null) { port = ShuffleHandler.DeserializeMetaData(portInfo); } ContainerLauncherImpl.Log.Info("Shuffle port returned by ContainerManager for " + this.taskAttemptID + " : " + port); if (port < 0) { this.state = ContainerLauncherImpl.ContainerState.Failed; throw new InvalidOperationException("Invalid shuffle port number " + port + " returned for " + this.taskAttemptID); } // after launching, send launched event to task attempt to move // it from ASSIGNED to RUNNING state this._enclosing.context.GetEventHandler().Handle(new TaskAttemptContainerLaunchedEvent (this.taskAttemptID, port)); this.state = ContainerLauncherImpl.ContainerState.Running; } catch (Exception t) { string message = "Container launch failed for " + this.containerID + " : " + StringUtils .StringifyException(t); this.state = ContainerLauncherImpl.ContainerState.Failed; this._enclosing.SendContainerLaunchFailedMsg(this.taskAttemptID, message); } finally { if (proxy != null) { this._enclosing.cmProxy.MayBeCloseProxy(proxy); } } } }