/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual void TestAllocateOnHA() { AllocateRequest request = AllocateRequest.NewInstance(0, 50f, new AList <ResourceRequest >(), new AList <ContainerId>(), ResourceBlacklistRequest.NewInstance(new AList <string >(), new AList <string>())); AllocateResponse response = amClient.Allocate(request); NUnit.Framework.Assert.AreEqual(response, this.cluster.CreateFakeAllocateResponse ()); }
public virtual void TestValidateResourceBlacklistRequest() { TestAMAuthorization.MyContainerManager containerManager = new TestAMAuthorization.MyContainerManager (); TestAMAuthorization.MockRMWithAMS rm = new TestAMAuthorization.MockRMWithAMS(new YarnConfiguration(), containerManager); rm.Start(); MockNM nm1 = rm.RegisterNode("localhost:1234", 5120); IDictionary <ApplicationAccessType, string> acls = new Dictionary <ApplicationAccessType , string>(2); acls[ApplicationAccessType.ViewApp] = "*"; RMApp app = rm.SubmitApp(1024, "appname", "appuser", acls); nm1.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); ApplicationAttemptId applicationAttemptId = attempt.GetAppAttemptId(); WaitForLaunchedState(attempt); // Create a client to the RM. Configuration conf = rm.GetConfig(); YarnRPC rpc = YarnRPC.Create(conf); UserGroupInformation currentUser = UserGroupInformation.CreateRemoteUser(applicationAttemptId .ToString()); Credentials credentials = containerManager.GetContainerCredentials(); IPEndPoint rmBindAddress = rm.GetApplicationMasterService().GetBindAddress(); Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> amRMToken = TestAMAuthorization.MockRMWithAMS .SetupAndReturnAMRMToken(rmBindAddress, credentials.GetAllTokens()); currentUser.AddToken(amRMToken); ApplicationMasterProtocol client = currentUser.DoAs(new _PrivilegedAction_626(rpc , rmBindAddress, conf)); RegisterApplicationMasterRequest request = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterApplicationMasterRequest>(); client.RegisterApplicationMaster(request); ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance( Sharpen.Collections.SingletonList(ResourceRequest.Any), null); AllocateRequest allocateRequest = AllocateRequest.NewInstance(0, 0.0f, null, null , blacklistRequest); bool error = false; try { client.Allocate(allocateRequest); } catch (InvalidResourceBlacklistRequestException) { error = true; } rm.Stop(); NUnit.Framework.Assert.IsTrue("Didn't not catch InvalidResourceBlacklistRequestException" , error); }
public virtual void TestRMConnectionRetry() { // verify the connection exception is thrown // if we haven't exhausted the retry interval ApplicationMasterProtocol mockScheduler = Org.Mockito.Mockito.Mock <ApplicationMasterProtocol >(); Org.Mockito.Mockito.When(mockScheduler.Allocate(Matchers.IsA <AllocateRequest>())) .ThenThrow(RPCUtil.GetRemoteException(new IOException("forcefail"))); Configuration conf = new Configuration(); LocalContainerAllocator lca = new TestLocalContainerAllocator.StubbedLocalContainerAllocator (mockScheduler); lca.Init(conf); lca.Start(); try { lca.Heartbeat(); NUnit.Framework.Assert.Fail("heartbeat was supposed to throw"); } catch (YarnException) { } finally { // YarnException is expected lca.Stop(); } // verify YarnRuntimeException is thrown when the retry interval has expired conf.SetLong(MRJobConfig.MrAmToRmWaitIntervalMs, 0); lca = new TestLocalContainerAllocator.StubbedLocalContainerAllocator(mockScheduler ); lca.Init(conf); lca.Start(); try { lca.Heartbeat(); NUnit.Framework.Assert.Fail("heartbeat was supposed to throw"); } catch (YarnRuntimeException) { } finally { // YarnRuntimeException is expected lca.Stop(); } }
/// <exception cref="Com.Google.Protobuf.ServiceException"/> public virtual YarnServiceProtos.AllocateResponseProto Allocate(RpcController arg0 , YarnServiceProtos.AllocateRequestProto proto) { AllocateRequestPBImpl request = new AllocateRequestPBImpl(proto); try { AllocateResponse response = real.Allocate(request); return(((AllocateResponsePBImpl)response).GetProto()); } catch (YarnException e) { throw new ServiceException(e); } catch (IOException e) { throw new ServiceException(e); } }
public virtual void TestTokenExpiry() { TestAMAuthorization.MyContainerManager containerManager = new TestAMAuthorization.MyContainerManager (); TestAMAuthorization.MockRMWithAMS rm = new TestAMAuthorization.MockRMWithAMS(conf , containerManager); rm.Start(); Configuration conf = rm.GetConfig(); YarnRPC rpc = YarnRPC.Create(conf); ApplicationMasterProtocol rmClient = null; try { MockNM nm1 = rm.RegisterNode("localhost:1234", 5120); RMApp app = rm.SubmitApp(1024); nm1.NodeHeartbeat(true); int waitCount = 0; while (containerManager.containerTokens == null && waitCount++ < 20) { Log.Info("Waiting for AM Launch to happen.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsNotNull(containerManager.containerTokens); RMAppAttempt attempt = app.GetCurrentAppAttempt(); ApplicationAttemptId applicationAttemptId = attempt.GetAppAttemptId(); // Create a client to the RM. UserGroupInformation currentUser = UserGroupInformation.CreateRemoteUser(applicationAttemptId .ToString()); Credentials credentials = containerManager.GetContainerCredentials(); IPEndPoint rmBindAddress = rm.GetApplicationMasterService().GetBindAddress(); Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> amRMToken = TestAMAuthorization.MockRMWithAMS .SetupAndReturnAMRMToken(rmBindAddress, credentials.GetAllTokens()); currentUser.AddToken(amRMToken); rmClient = CreateRMClient(rm, conf, rpc, currentUser); RegisterApplicationMasterRequest request = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterApplicationMasterRequest>(); rmClient.RegisterApplicationMaster(request); FinishApplicationMasterRequest finishAMRequest = Org.Apache.Hadoop.Yarn.Util.Records .NewRecord <FinishApplicationMasterRequest>(); finishAMRequest.SetFinalApplicationStatus(FinalApplicationStatus.Succeeded); finishAMRequest.SetDiagnostics("diagnostics"); finishAMRequest.SetTrackingUrl("url"); rmClient.FinishApplicationMaster(finishAMRequest); // Send RMAppAttemptEventType.CONTAINER_FINISHED to transit RMAppAttempt // from Finishing state to Finished State. Both AMRMToken and // ClientToAMToken will be removed. ContainerStatus containerStatus = BuilderUtils.NewContainerStatus(attempt.GetMasterContainer ().GetId(), ContainerState.Complete, "AM Container Finished", 0); rm.GetRMContext().GetDispatcher().GetEventHandler().Handle(new RMAppAttemptContainerFinishedEvent (applicationAttemptId, containerStatus, nm1.GetNodeId())); // Make sure the RMAppAttempt is at Finished State. // Both AMRMToken and ClientToAMToken have been removed. int count = 0; while (attempt.GetState() != RMAppAttemptState.Finished && count < maxWaitAttempts ) { Sharpen.Thread.Sleep(100); count++; } NUnit.Framework.Assert.IsTrue(attempt.GetState() == RMAppAttemptState.Finished); // Now simulate trying to allocate. RPC call itself should throw auth // exception. rpc.StopProxy(rmClient, conf); // To avoid using cached client rmClient = CreateRMClient(rm, conf, rpc, currentUser); AllocateRequest allocateRequest = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest >(); try { rmClient.Allocate(allocateRequest); NUnit.Framework.Assert.Fail("You got to be kidding me! " + "Using App tokens after app-finish should fail!" ); } catch (Exception t) { Log.Info("Exception found is ", t); // The exception will still have the earlier appAttemptId as it picks it // up from the token. NUnit.Framework.Assert.IsTrue(t.InnerException.Message.Contains(applicationAttemptId .ToString() + " not found in AMRMTokenSecretManager.")); } } finally { rm.Stop(); if (rmClient != null) { rpc.StopProxy(rmClient, conf); } } }
public virtual void TestMasterKeyRollOver() { conf.SetLong(YarnConfiguration.RmAmrmTokenMasterKeyRollingIntervalSecs, rolling_interval_sec ); conf.SetLong(YarnConfiguration.RmAmExpiryIntervalMs, am_expire_ms); TestAMAuthorization.MyContainerManager containerManager = new TestAMAuthorization.MyContainerManager (); TestAMAuthorization.MockRMWithAMS rm = new TestAMAuthorization.MockRMWithAMS(conf , containerManager); rm.Start(); long startTime = Runtime.CurrentTimeMillis(); Configuration conf = rm.GetConfig(); YarnRPC rpc = YarnRPC.Create(conf); ApplicationMasterProtocol rmClient = null; AMRMTokenSecretManager appTokenSecretManager = rm.GetRMContext().GetAMRMTokenSecretManager (); MasterKeyData oldKey = appTokenSecretManager.GetMasterKey(); NUnit.Framework.Assert.IsNotNull(oldKey); try { MockNM nm1 = rm.RegisterNode("localhost:1234", 5120); RMApp app = rm.SubmitApp(1024); nm1.NodeHeartbeat(true); int waitCount = 0; while (containerManager.containerTokens == null && waitCount++ < maxWaitAttempts) { Log.Info("Waiting for AM Launch to happen.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsNotNull(containerManager.containerTokens); RMAppAttempt attempt = app.GetCurrentAppAttempt(); ApplicationAttemptId applicationAttemptId = attempt.GetAppAttemptId(); // Create a client to the RM. UserGroupInformation currentUser = UserGroupInformation.CreateRemoteUser(applicationAttemptId .ToString()); Credentials credentials = containerManager.GetContainerCredentials(); IPEndPoint rmBindAddress = rm.GetApplicationMasterService().GetBindAddress(); Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> amRMToken = TestAMAuthorization.MockRMWithAMS .SetupAndReturnAMRMToken(rmBindAddress, credentials.GetAllTokens()); currentUser.AddToken(amRMToken); rmClient = CreateRMClient(rm, conf, rpc, currentUser); RegisterApplicationMasterRequest request = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterApplicationMasterRequest>(); rmClient.RegisterApplicationMaster(request); // One allocate call. AllocateRequest allocateRequest = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest >(); NUnit.Framework.Assert.IsTrue(rmClient.Allocate(allocateRequest).GetAMCommand() == null); // Wait for enough time and make sure the roll_over happens // At mean time, the old AMRMToken should continue to work while (Runtime.CurrentTimeMillis() - startTime < rolling_interval_sec * 1000) { rmClient.Allocate(allocateRequest); Sharpen.Thread.Sleep(500); } MasterKeyData newKey = appTokenSecretManager.GetMasterKey(); NUnit.Framework.Assert.IsNotNull(newKey); NUnit.Framework.Assert.IsFalse("Master key should have changed!", oldKey.Equals(newKey )); // Another allocate call with old AMRMToken. Should continue to work. rpc.StopProxy(rmClient, conf); // To avoid using cached client rmClient = CreateRMClient(rm, conf, rpc, currentUser); NUnit.Framework.Assert.IsTrue(rmClient.Allocate(allocateRequest).GetAMCommand() == null); waitCount = 0; while (waitCount++ <= maxWaitAttempts) { if (appTokenSecretManager.GetCurrnetMasterKeyData() != oldKey) { break; } try { rmClient.Allocate(allocateRequest); } catch (Exception) { break; } Sharpen.Thread.Sleep(200); } // active the nextMasterKey, and replace the currentMasterKey NUnit.Framework.Assert.IsTrue(appTokenSecretManager.GetCurrnetMasterKeyData().Equals (newKey)); NUnit.Framework.Assert.IsTrue(appTokenSecretManager.GetMasterKey().Equals(newKey) ); NUnit.Framework.Assert.IsTrue(appTokenSecretManager.GetNextMasterKeyData() == null ); // Create a new Token Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> newToken = appTokenSecretManager .CreateAndGetAMRMToken(applicationAttemptId); SecurityUtil.SetTokenService(newToken, rmBindAddress); currentUser.AddToken(newToken); // Another allocate call. Should continue to work. rpc.StopProxy(rmClient, conf); // To avoid using cached client rmClient = CreateRMClient(rm, conf, rpc, currentUser); allocateRequest = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest>( ); NUnit.Framework.Assert.IsTrue(rmClient.Allocate(allocateRequest).GetAMCommand() == null); // Should not work by using the old AMRMToken. rpc.StopProxy(rmClient, conf); // To avoid using cached client try { currentUser.AddToken(amRMToken); rmClient = CreateRMClient(rm, conf, rpc, currentUser); allocateRequest = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest>( ); NUnit.Framework.Assert.IsTrue(rmClient.Allocate(allocateRequest).GetAMCommand() == null); NUnit.Framework.Assert.Fail("The old Token should not work"); } catch (Exception) { } } finally { // expect exception rm.Stop(); if (rmClient != null) { rpc.StopProxy(rmClient, conf); } } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public override AllocateResponse Allocate(float progressIndicator) { Preconditions.CheckArgument(progressIndicator >= 0, "Progress indicator should not be negative" ); AllocateResponse allocateResponse = null; IList <ResourceRequest> askList = null; IList <ContainerId> releaseList = null; AllocateRequest allocateRequest = null; IList <string> blacklistToAdd = new AList <string>(); IList <string> blacklistToRemove = new AList <string>(); try { lock (this) { askList = new AList <ResourceRequest>(ask.Count); foreach (ResourceRequest r in ask) { // create a copy of ResourceRequest as we might change it while the // RPC layer is using it to send info across askList.AddItem(ResourceRequest.NewInstance(r.GetPriority(), r.GetResourceName(), r.GetCapability(), r.GetNumContainers(), r.GetRelaxLocality(), r.GetNodeLabelExpression ())); } releaseList = new AList <ContainerId>(release); // optimistically clear this collection assuming no RPC failure ask.Clear(); release.Clear(); Sharpen.Collections.AddAll(blacklistToAdd, blacklistAdditions); Sharpen.Collections.AddAll(blacklistToRemove, blacklistRemovals); ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance( blacklistToAdd, blacklistToRemove); allocateRequest = AllocateRequest.NewInstance(lastResponseId, progressIndicator, askList, releaseList, blacklistRequest); // clear blacklistAdditions and blacklistRemovals before // unsynchronized part blacklistAdditions.Clear(); blacklistRemovals.Clear(); } try { allocateResponse = rmClient.Allocate(allocateRequest); } catch (ApplicationMasterNotRegisteredException) { Log.Warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing." ); lock (this) { Sharpen.Collections.AddAll(release, this.pendingRelease); Sharpen.Collections.AddAll(blacklistAdditions, this.blacklistedNodes); foreach (IDictionary <string, SortedDictionary <Resource, AMRMClientImpl.ResourceRequestInfo > > rr in remoteRequestsTable.Values) { foreach (IDictionary <Resource, AMRMClientImpl.ResourceRequestInfo> capabalities in rr.Values) { foreach (AMRMClientImpl.ResourceRequestInfo request in capabalities.Values) { AddResourceRequestToAsk(request.remoteRequest); } } } } // re register with RM RegisterApplicationMaster(); allocateResponse = Allocate(progressIndicator); return(allocateResponse); } lock (this) { // update these on successful RPC clusterNodeCount = allocateResponse.GetNumClusterNodes(); lastResponseId = allocateResponse.GetResponseId(); clusterAvailableResources = allocateResponse.GetAvailableResources(); if (!allocateResponse.GetNMTokens().IsEmpty()) { PopulateNMTokens(allocateResponse.GetNMTokens()); } if (allocateResponse.GetAMRMToken() != null) { UpdateAMRMToken(allocateResponse.GetAMRMToken()); } if (!pendingRelease.IsEmpty() && !allocateResponse.GetCompletedContainersStatuses ().IsEmpty()) { RemovePendingReleaseRequests(allocateResponse.GetCompletedContainersStatuses()); } } } finally { // TODO how to differentiate remote yarn exception vs error in rpc if (allocateResponse == null) { // we hit an exception in allocate() // preserve ask and release for next call to allocate() lock (this) { Sharpen.Collections.AddAll(release, releaseList); // requests could have been added or deleted during call to allocate // If requests were added/removed then there is nothing to do since // the ResourceRequest object in ask would have the actual new value. // If ask does not have this ResourceRequest then it was unchanged and // so we can add the value back safely. // This assumes that there will no concurrent calls to allocate() and // so we dont have to worry about ask being changed in the // synchronized block at the beginning of this method. foreach (ResourceRequest oldAsk in askList) { if (!ask.Contains(oldAsk)) { ask.AddItem(oldAsk); } } Sharpen.Collections.AddAll(blacklistAdditions, blacklistToAdd); Sharpen.Collections.AddAll(blacklistRemovals, blacklistToRemove); } } } return(allocateResponse); }