public virtual void TestRecovery() { FilePath[] localLogDirs = GetLocalLogDirFiles(this.GetType().FullName, 2); string localLogDirsString = localLogDirs[0].GetAbsolutePath() + "," + localLogDirs [1].GetAbsolutePath(); conf.Set(YarnConfiguration.NmLogDirs, localLogDirsString); conf.SetBoolean(YarnConfiguration.LogAggregationEnabled, false); conf.SetLong(YarnConfiguration.NmLogRetainSeconds, YarnConfiguration.DefaultNmLogRetainSeconds ); dirsHandler.Init(conf); NMStateStoreService stateStore = new NMMemoryStateStoreService(); stateStore.Init(conf); stateStore.Start(); TestNonAggregatingLogHandler.NonAggregatingLogHandlerWithMockExecutor logHandler = new TestNonAggregatingLogHandler.NonAggregatingLogHandlerWithMockExecutor(this, dispatcher, mockDelService, dirsHandler, stateStore); logHandler.Init(conf); logHandler.Start(); logHandler.Handle(new LogHandlerAppStartedEvent(appId, user, null, ContainerLogsRetentionPolicy .AllContainers, null)); logHandler.Handle(new LogHandlerContainerFinishedEvent(container11, 0)); logHandler.Handle(new LogHandlerAppFinishedEvent(appId)); // simulate a restart and verify deletion is rescheduled logHandler.Close(); logHandler = new TestNonAggregatingLogHandler.NonAggregatingLogHandlerWithMockExecutor (this, dispatcher, mockDelService, dirsHandler, stateStore); logHandler.Init(conf); logHandler.Start(); ArgumentCaptor <Runnable> schedArg = ArgumentCaptor.ForClass <Runnable>(); Org.Mockito.Mockito.Verify(logHandler.mockSched).Schedule(schedArg.Capture(), Matchers.AnyLong (), Matchers.Eq(TimeUnit.Milliseconds)); // execute the runnable and verify another restart has nothing scheduled schedArg.GetValue().Run(); logHandler.Close(); logHandler = new TestNonAggregatingLogHandler.NonAggregatingLogHandlerWithMockExecutor (this, dispatcher, mockDelService, dirsHandler, stateStore); logHandler.Init(conf); logHandler.Start(); Org.Mockito.Mockito.Verify(logHandler.mockSched, Org.Mockito.Mockito.Never()).Schedule (Matchers.Any <Runnable>(), Matchers.AnyLong(), Matchers.Any <TimeUnit>()); logHandler.Close(); }
public virtual void TestRecovery() { YarnConfiguration conf = new YarnConfiguration(); conf.SetBoolean(YarnConfiguration.NmRecoveryEnabled, true); NodeId nodeId = NodeId.NewInstance("somehost", 1234); ContainerId cid1 = BuilderUtils.NewContainerId(1, 1, 1, 1); ContainerId cid2 = BuilderUtils.NewContainerId(2, 2, 2, 2); TestNMContainerTokenSecretManager.ContainerTokenKeyGeneratorForTest keygen = new TestNMContainerTokenSecretManager.ContainerTokenKeyGeneratorForTest(conf); NMMemoryStateStoreService stateStore = new NMMemoryStateStoreService(); stateStore.Init(conf); stateStore.Start(); NMContainerTokenSecretManager secretMgr = new NMContainerTokenSecretManager(conf, stateStore); secretMgr.SetNodeId(nodeId); MasterKey currentKey = keygen.GenerateKey(); secretMgr.SetMasterKey(currentKey); ContainerTokenIdentifier tokenId1 = CreateContainerTokenId(cid1, nodeId, "user1", secretMgr); ContainerTokenIdentifier tokenId2 = CreateContainerTokenId(cid2, nodeId, "user2", secretMgr); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(tokenId1)); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(tokenId2)); // restart and verify tokens still valid secretMgr = new NMContainerTokenSecretManager(conf, stateStore); secretMgr.SetNodeId(nodeId); secretMgr.Recover(); NUnit.Framework.Assert.AreEqual(currentKey, secretMgr.GetCurrentKey()); NUnit.Framework.Assert.IsTrue(secretMgr.IsValidStartContainerRequest(tokenId1)); NUnit.Framework.Assert.IsTrue(secretMgr.IsValidStartContainerRequest(tokenId2)); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(tokenId1)); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(tokenId2)); // roll master key and start a container secretMgr.StartContainerSuccessful(tokenId2); currentKey = keygen.GenerateKey(); secretMgr.SetMasterKey(currentKey); // restart and verify tokens still valid due to prev key persist secretMgr = new NMContainerTokenSecretManager(conf, stateStore); secretMgr.SetNodeId(nodeId); secretMgr.Recover(); NUnit.Framework.Assert.AreEqual(currentKey, secretMgr.GetCurrentKey()); NUnit.Framework.Assert.IsTrue(secretMgr.IsValidStartContainerRequest(tokenId1)); NUnit.Framework.Assert.IsFalse(secretMgr.IsValidStartContainerRequest(tokenId2)); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(tokenId1)); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(tokenId2)); // roll master key again, restart, and verify keys no longer valid currentKey = keygen.GenerateKey(); secretMgr.SetMasterKey(currentKey); secretMgr = new NMContainerTokenSecretManager(conf, stateStore); secretMgr.SetNodeId(nodeId); secretMgr.Recover(); NUnit.Framework.Assert.AreEqual(currentKey, secretMgr.GetCurrentKey()); NUnit.Framework.Assert.IsTrue(secretMgr.IsValidStartContainerRequest(tokenId1)); NUnit.Framework.Assert.IsFalse(secretMgr.IsValidStartContainerRequest(tokenId2)); try { secretMgr.RetrievePassword(tokenId1); NUnit.Framework.Assert.Fail("token should not be valid"); } catch (SecretManager.InvalidToken) { } // expected try { secretMgr.RetrievePassword(tokenId2); NUnit.Framework.Assert.Fail("token should not be valid"); } catch (SecretManager.InvalidToken) { } // expected stateStore.Close(); }
public virtual void TestApplicationRecovery() { YarnConfiguration conf = new YarnConfiguration(); conf.SetBoolean(YarnConfiguration.NmRecoveryEnabled, true); conf.Set(YarnConfiguration.NmAddress, "localhost:1234"); conf.SetBoolean(YarnConfiguration.YarnAclEnable, true); conf.Set(YarnConfiguration.YarnAdminAcl, "yarn_admin_user"); NMStateStoreService stateStore = new NMMemoryStateStoreService(); stateStore.Init(conf); stateStore.Start(); Context context = new NodeManager.NMContext(new NMContainerTokenSecretManager(conf ), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf), stateStore ); ContainerManagerImpl cm = CreateContainerManager(context); cm.Init(conf); cm.Start(); // simulate registration with RM MasterKey masterKey = new MasterKeyPBImpl(); masterKey.SetKeyId(123); masterKey.SetBytes(ByteBuffer.Wrap(new byte[] { 123 })); context.GetContainerTokenSecretManager().SetMasterKey(masterKey); context.GetNMTokenSecretManager().SetMasterKey(masterKey); // add an application by starting a container string appUser = "******"; string modUser = "******"; string viewUser = "******"; string enemyUser = "******"; ApplicationId appId = ApplicationId.NewInstance(0, 1); ApplicationAttemptId attemptId = ApplicationAttemptId.NewInstance(appId, 1); ContainerId cid = ContainerId.NewContainerId(attemptId, 1); IDictionary <string, LocalResource> localResources = Collections.EmptyMap(); IDictionary <string, string> containerEnv = Sharpen.Collections.EmptyMap(); IList <string> containerCmds = Sharpen.Collections.EmptyList(); IDictionary <string, ByteBuffer> serviceData = Sharpen.Collections.EmptyMap(); Credentials containerCreds = new Credentials(); DataOutputBuffer dob = new DataOutputBuffer(); containerCreds.WriteTokenStorageToStream(dob); ByteBuffer containerTokens = ByteBuffer.Wrap(dob.GetData(), 0, dob.GetLength()); IDictionary <ApplicationAccessType, string> acls = new Dictionary <ApplicationAccessType , string>(); acls[ApplicationAccessType.ModifyApp] = modUser; acls[ApplicationAccessType.ViewApp] = viewUser; ContainerLaunchContext clc = ContainerLaunchContext.NewInstance(localResources, containerEnv , containerCmds, serviceData, containerTokens, acls); // create the logAggregationContext LogAggregationContext logAggregationContext = LogAggregationContext.NewInstance("includePattern" , "excludePattern", "includePatternInRollingAggregation", "excludePatternInRollingAggregation" ); StartContainersResponse startResponse = StartContainer(context, cm, cid, clc, logAggregationContext ); NUnit.Framework.Assert.IsTrue(startResponse.GetFailedRequests().IsEmpty()); NUnit.Framework.Assert.AreEqual(1, context.GetApplications().Count); Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Application.Application app = context.GetApplications()[appId]; NUnit.Framework.Assert.IsNotNull(app); WaitForAppState(app, ApplicationState.Initing); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(modUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ViewApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(enemyUser), ApplicationAccessType.ViewApp, appUser, appId)); // reset container manager and verify app recovered with proper acls cm.Stop(); context = new NodeManager.NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf), stateStore); cm = CreateContainerManager(context); cm.Init(conf); cm.Start(); NUnit.Framework.Assert.AreEqual(1, context.GetApplications().Count); app = context.GetApplications()[appId]; NUnit.Framework.Assert.IsNotNull(app); // check whether LogAggregationContext is recovered correctly LogAggregationContext recovered = ((ApplicationImpl)app).GetLogAggregationContext (); NUnit.Framework.Assert.IsNotNull(recovered); NUnit.Framework.Assert.AreEqual(logAggregationContext.GetIncludePattern(), recovered .GetIncludePattern()); NUnit.Framework.Assert.AreEqual(logAggregationContext.GetExcludePattern(), recovered .GetExcludePattern()); NUnit.Framework.Assert.AreEqual(logAggregationContext.GetRolledLogsIncludePattern (), recovered.GetRolledLogsIncludePattern()); NUnit.Framework.Assert.AreEqual(logAggregationContext.GetRolledLogsExcludePattern (), recovered.GetRolledLogsExcludePattern()); WaitForAppState(app, ApplicationState.Initing); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(modUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ViewApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(enemyUser), ApplicationAccessType.ViewApp, appUser, appId)); // simulate application completion IList <ApplicationId> finishedApps = new AList <ApplicationId>(); finishedApps.AddItem(appId); cm.Handle(new CMgrCompletedAppsEvent(finishedApps, CMgrCompletedAppsEvent.Reason. ByResourcemanager)); WaitForAppState(app, ApplicationState.ApplicationResourcesCleaningup); // restart and verify app is marked for finishing cm.Stop(); context = new NodeManager.NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf), stateStore); cm = CreateContainerManager(context); cm.Init(conf); cm.Start(); NUnit.Framework.Assert.AreEqual(1, context.GetApplications().Count); app = context.GetApplications()[appId]; NUnit.Framework.Assert.IsNotNull(app); WaitForAppState(app, ApplicationState.ApplicationResourcesCleaningup); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(modUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ModifyApp, appUser, appId)); NUnit.Framework.Assert.IsTrue(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(viewUser), ApplicationAccessType.ViewApp, appUser, appId)); NUnit.Framework.Assert.IsFalse(context.GetApplicationACLsManager().CheckAccess(UserGroupInformation .CreateRemoteUser(enemyUser), ApplicationAccessType.ViewApp, appUser, appId)); // simulate log aggregation completion app.Handle(new ApplicationEvent(app.GetAppId(), ApplicationEventType.ApplicationResourcesCleanedup )); NUnit.Framework.Assert.AreEqual(app.GetApplicationState(), ApplicationState.Finished ); app.Handle(new ApplicationEvent(app.GetAppId(), ApplicationEventType.ApplicationLogHandlingFinished )); // restart and verify app is no longer present after recovery cm.Stop(); context = new NodeManager.NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf), stateStore); cm = CreateContainerManager(context); cm.Init(conf); cm.Start(); NUnit.Framework.Assert.IsTrue(context.GetApplications().IsEmpty()); cm.Stop(); }
public virtual void TestRecovery() { YarnConfiguration conf = new YarnConfiguration(); conf.SetBoolean(YarnConfiguration.NmRecoveryEnabled, true); NodeId nodeId = NodeId.NewInstance("somehost", 1234); ApplicationAttemptId attempt1 = ApplicationAttemptId.NewInstance(ApplicationId.NewInstance (1, 1), 1); ApplicationAttemptId attempt2 = ApplicationAttemptId.NewInstance(ApplicationId.NewInstance (2, 2), 2); TestNMTokenSecretManagerInNM.NMTokenKeyGeneratorForTest keygen = new TestNMTokenSecretManagerInNM.NMTokenKeyGeneratorForTest (); NMMemoryStateStoreService stateStore = new NMMemoryStateStoreService(); stateStore.Init(conf); stateStore.Start(); NMTokenSecretManagerInNM secretMgr = new NMTokenSecretManagerInNM(stateStore); secretMgr.SetNodeId(nodeId); MasterKey currentKey = keygen.GenerateKey(); secretMgr.SetMasterKey(currentKey); NMTokenIdentifier attemptToken1 = GetNMTokenId(secretMgr.CreateNMToken(attempt1, nodeId, "user1")); NMTokenIdentifier attemptToken2 = GetNMTokenId(secretMgr.CreateNMToken(attempt2, nodeId, "user2")); secretMgr.AppAttemptStartContainer(attemptToken1); secretMgr.AppAttemptStartContainer(attemptToken2); NUnit.Framework.Assert.IsTrue(secretMgr.IsAppAttemptNMTokenKeyPresent(attempt1)); NUnit.Framework.Assert.IsTrue(secretMgr.IsAppAttemptNMTokenKeyPresent(attempt2)); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(attemptToken1)); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(attemptToken2)); // restart and verify key is still there and token still valid secretMgr = new NMTokenSecretManagerInNM(stateStore); secretMgr.Recover(); secretMgr.SetNodeId(nodeId); NUnit.Framework.Assert.AreEqual(currentKey, secretMgr.GetCurrentKey()); NUnit.Framework.Assert.IsTrue(secretMgr.IsAppAttemptNMTokenKeyPresent(attempt1)); NUnit.Framework.Assert.IsTrue(secretMgr.IsAppAttemptNMTokenKeyPresent(attempt2)); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(attemptToken1)); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(attemptToken2)); // roll master key and remove an app currentKey = keygen.GenerateKey(); secretMgr.SetMasterKey(currentKey); secretMgr.AppFinished(attempt1.GetApplicationId()); // restart and verify attempt1 key is still valid due to prev key persist secretMgr = new NMTokenSecretManagerInNM(stateStore); secretMgr.Recover(); secretMgr.SetNodeId(nodeId); NUnit.Framework.Assert.AreEqual(currentKey, secretMgr.GetCurrentKey()); NUnit.Framework.Assert.IsFalse(secretMgr.IsAppAttemptNMTokenKeyPresent(attempt1)); NUnit.Framework.Assert.IsTrue(secretMgr.IsAppAttemptNMTokenKeyPresent(attempt2)); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(attemptToken1)); NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(attemptToken2)); // roll master key again, restart, and verify attempt1 key is bad but // attempt2 is still good due to app key persist currentKey = keygen.GenerateKey(); secretMgr.SetMasterKey(currentKey); secretMgr = new NMTokenSecretManagerInNM(stateStore); secretMgr.Recover(); secretMgr.SetNodeId(nodeId); NUnit.Framework.Assert.AreEqual(currentKey, secretMgr.GetCurrentKey()); NUnit.Framework.Assert.IsFalse(secretMgr.IsAppAttemptNMTokenKeyPresent(attempt1)); NUnit.Framework.Assert.IsTrue(secretMgr.IsAppAttemptNMTokenKeyPresent(attempt2)); try { secretMgr.RetrievePassword(attemptToken1); NUnit.Framework.Assert.Fail("attempt token should not still be valid"); } catch (SecretManager.InvalidToken) { } // expected NUnit.Framework.Assert.IsNotNull(secretMgr.RetrievePassword(attemptToken2)); // remove last attempt, restart, verify both tokens are now bad secretMgr.AppFinished(attempt2.GetApplicationId()); secretMgr = new NMTokenSecretManagerInNM(stateStore); secretMgr.Recover(); secretMgr.SetNodeId(nodeId); NUnit.Framework.Assert.AreEqual(currentKey, secretMgr.GetCurrentKey()); NUnit.Framework.Assert.IsFalse(secretMgr.IsAppAttemptNMTokenKeyPresent(attempt1)); NUnit.Framework.Assert.IsFalse(secretMgr.IsAppAttemptNMTokenKeyPresent(attempt2)); try { secretMgr.RetrievePassword(attemptToken1); NUnit.Framework.Assert.Fail("attempt token should not still be valid"); } catch (SecretManager.InvalidToken) { } // expected try { secretMgr.RetrievePassword(attemptToken2); NUnit.Framework.Assert.Fail("attempt token should not still be valid"); } catch (SecretManager.InvalidToken) { } // expected stateStore.Close(); }
public virtual void TestRecovery() { Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); System.Console.Out.WriteLine("SEED: " + seed); IList <Path> baseDirs = BuildDirs(r, @base, 4); CreateDirs(new Path("."), baseDirs); IList <Path> content = BuildDirs(r, new Path("."), 10); foreach (Path b in baseDirs) { CreateDirs(b, content); } Configuration conf = new YarnConfiguration(); conf.SetBoolean(YarnConfiguration.NmRecoveryEnabled, true); conf.SetInt(YarnConfiguration.DebugNmDeleteDelaySec, 1); NMMemoryStateStoreService stateStore = new NMMemoryStateStoreService(); stateStore.Init(conf); stateStore.Start(); DeletionService del = new DeletionService(new TestDeletionService.FakeDefaultContainerExecutor (), stateStore); try { del.Init(conf); del.Start(); foreach (Path p in content) { NUnit.Framework.Assert.IsTrue(lfs.Util().Exists(new Path(baseDirs[0], p))); del.Delete((long.Parse(p.GetName()) % 2) == 0 ? null : "dingo", p, Sharpen.Collections.ToArray (baseDirs, new Path[4])); } // restart the deletion service del.Stop(); del = new DeletionService(new TestDeletionService.FakeDefaultContainerExecutor(), stateStore); del.Init(conf); del.Start(); // verify paths are still eventually deleted int msecToWait = 10 * 1000; foreach (Path p_1 in baseDirs) { foreach (Path q in content) { Path fp = new Path(p_1, q); while (msecToWait > 0 && lfs.Util().Exists(fp)) { Sharpen.Thread.Sleep(100); msecToWait -= 100; } NUnit.Framework.Assert.IsFalse(lfs.Util().Exists(fp)); } } } finally { del.Close(); stateStore.Close(); } }