private RMContainer RecoverAndCreateContainer(NMContainerStatus status, RMNode node
                                                      )
        {
            Container container = Container.NewInstance(status.GetContainerId(), node.GetNodeID
                                                            (), node.GetHttpAddress(), status.GetAllocatedResource(), status.GetPriority(),
                                                        null);
            ApplicationAttemptId attemptId   = container.GetId().GetApplicationAttemptId();
            RMContainer          rmContainer = new RMContainerImpl(container, attemptId, node.GetNodeID
                                                                       (), applications[attemptId.GetApplicationId()].GetUser(), rmContext, status.GetCreationTime
                                                                       ());

            return(rmContainer);
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestAppCleanupWhenRMRestartedBeforeAppFinished()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start RM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 1024, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            MockNM nm2 = new MockNM("127.0.0.1:5678", 1024, rm1.GetResourceTrackerService());

            nm2.RegisterNode();
            // create app and launch the AM
            RMApp  app0 = rm1.SubmitApp(200);
            MockAM am0  = LaunchAM(app0, rm1, nm1);
            // alloc another container on nm2
            AllocateResponse allocResponse = am0.Allocate(Arrays.AsList(ResourceRequest.NewInstance
                                                                            (Priority.NewInstance(1), "*", Resource.NewInstance(1024, 0), 1)), null);

            while (null == allocResponse.GetAllocatedContainers() || allocResponse.GetAllocatedContainers
                       ().IsEmpty())
            {
                nm2.NodeHeartbeat(true);
                allocResponse = am0.Allocate(null, null);
                Sharpen.Thread.Sleep(1000);
            }
            // start new RM
            MockRM rm2 = new MockRM(conf, memStore);

            rm2.Start();
            // nm1/nm2 register to rm2, and do a heartbeat
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm1.RegisterNode(Arrays.AsList(NMContainerStatus.NewInstance(ContainerId.NewContainerId
                                                                             (am0.GetApplicationAttemptId(), 1), ContainerState.Complete, Resource.NewInstance
                                                                             (1024, 1), string.Empty, 0, Priority.NewInstance(0), 1234)), Arrays.AsList(app0.
                                                                                                                                                        GetApplicationId()));
            nm2.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm2.RegisterNode(Arrays.AsList(app0.GetApplicationId()));
            // assert app state has been saved.
            rm2.WaitForState(app0.GetApplicationId(), RMAppState.Failed);
            // wait for application cleanup message received on NM1
            WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId());
            // wait for application cleanup message received on NM2
            WaitForAppCleanupMessageRecved(nm2, app0.GetApplicationId());
            rm1.Stop();
            rm2.Stop();
        }
Example #3
0
 /// <exception cref="System.Exception"/>
 public virtual void WaitForContainerToComplete(RMAppAttempt attempt, NMContainerStatus
                                                completedContainer)
 {
     while (true)
     {
         IList <ContainerStatus> containers = attempt.GetJustFinishedContainers();
         System.Console.Out.WriteLine("Received completed containers " + containers);
         foreach (ContainerStatus container in containers)
         {
             if (container.GetContainerId().Equals(completedContainer.GetContainerId()))
             {
                 return;
             }
         }
         Sharpen.Thread.Sleep(200);
     }
 }
        internal virtual void HandleNMContainerStatus(NMContainerStatus containerStatus,
                                                      NodeId nodeId)
        {
            ApplicationAttemptId appAttemptId = containerStatus.GetContainerId().GetApplicationAttemptId
                                                    ();
            RMApp rmApp = rmContext.GetRMApps()[appAttemptId.GetApplicationId()];

            if (rmApp == null)
            {
                Log.Error("Received finished container : " + containerStatus.GetContainerId() + " for unknown application "
                          + appAttemptId.GetApplicationId() + " Skipping.");
                return;
            }
            if (rmApp.GetApplicationSubmissionContext().GetUnmanagedAM())
            {
                if (Log.IsDebugEnabled())
                {
                    Log.Debug("Ignoring container completion status for unmanaged AM " + rmApp.GetApplicationId
                                  ());
                }
                return;
            }
            RMAppAttempt rmAppAttempt    = rmApp.GetRMAppAttempt(appAttemptId);
            Container    masterContainer = rmAppAttempt.GetMasterContainer();

            if (masterContainer.GetId().Equals(containerStatus.GetContainerId()) && containerStatus
                .GetContainerState() == ContainerState.Complete)
            {
                ContainerStatus status = ContainerStatus.NewInstance(containerStatus.GetContainerId
                                                                         (), containerStatus.GetContainerState(), containerStatus.GetDiagnostics(), containerStatus
                                                                     .GetContainerExitStatus());
                // sending master container finished event.
                RMAppAttemptContainerFinishedEvent evt = new RMAppAttemptContainerFinishedEvent(appAttemptId
                                                                                                , status, nodeId);
                rmContext.GetDispatcher().GetEventHandler().Handle(evt);
            }
        }
Example #5
0
        /// <exception cref="System.Exception"/>
        public virtual void TestRMAppAttemptFailuresValidityInterval()
        {
            YarnConfiguration conf = new YarnConfiguration();

            conf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler), typeof(ResourceScheduler
                                                                                           ));
            conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true);
            conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false);
            conf.Set(YarnConfiguration.RmStore, typeof(MemoryRMStateStore).FullName);
            // explicitly set max-am-retry count as 2.
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 2);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            // set window size to a larger number : 20s
            // we will verify the app should be failed if
            // two continuous attempts failed in 20s.
            RMApp  app = rm1.SubmitApp(200, 20000);
            MockAM am  = MockRM.LaunchAM(app, rm1, nm1);

            // Fail current attempt normally
            nm1.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am.WaitForState(RMAppAttemptState.Failed);
            // launch the second attempt
            rm1.WaitForState(app.GetApplicationId(), RMAppState.Accepted);
            NUnit.Framework.Assert.AreEqual(2, app.GetAppAttempts().Count);
            NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)app.GetCurrentAppAttempt()).MayBeLastAttempt
                                              ());
            MockAM am_2 = MockRM.LaunchAndRegisterAM(app, rm1, nm1);

            am_2.WaitForState(RMAppAttemptState.Running);
            nm1.NodeHeartbeat(am_2.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am_2.WaitForState(RMAppAttemptState.Failed);
            // current app should be failed.
            rm1.WaitForState(app.GetApplicationId(), RMAppState.Failed);
            ControlledClock clock = new ControlledClock(new SystemClock());
            // set window size to 6s
            RMAppImpl app1 = (RMAppImpl)rm1.SubmitApp(200, 6000);

            app1.SetSystemClock(clock);
            MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);

            // Fail attempt1 normally
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am1.WaitForState(RMAppAttemptState.Failed);
            // launch the second attempt
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            NUnit.Framework.Assert.AreEqual(2, app1.GetAppAttempts().Count);
            RMAppAttempt attempt2 = app1.GetCurrentAppAttempt();

            NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt2).MayBeLastAttempt());
            MockAM am2 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);

            am2.WaitForState(RMAppAttemptState.Running);
            // wait for 6 seconds
            clock.SetTime(Runtime.CurrentTimeMillis() + 6 * 1000);
            // Fail attempt2 normally
            nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am2.WaitForState(RMAppAttemptState.Failed);
            // can launch the third attempt successfully
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            NUnit.Framework.Assert.AreEqual(3, app1.GetAppAttempts().Count);
            RMAppAttempt attempt3 = app1.GetCurrentAppAttempt();

            clock.Reset();
            MockAM am3 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);

            am3.WaitForState(RMAppAttemptState.Running);
            // Restart rm.
            MockRM rm2 = new MockRM(conf, memStore);

            rm2.Start();
            // re-register the NM
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            NMContainerStatus status = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NMContainerStatus
                                                                                      >();

            status.SetContainerExitStatus(ContainerExitStatus.KilledByResourcemanager);
            status.SetContainerId(attempt3.GetMasterContainer().GetId());
            status.SetContainerState(ContainerState.Complete);
            status.SetDiagnostics(string.Empty);
            nm1.RegisterNode(Sharpen.Collections.SingletonList(status), null);
            rm2.WaitForState(attempt3.GetAppAttemptId(), RMAppAttemptState.Failed);
            rm2.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            // Lauch Attempt 4
            MockAM am4 = rm2.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 4, nm1);

            // wait for 6 seconds
            clock.SetTime(Runtime.CurrentTimeMillis() + 6 * 1000);
            // Fail attempt4 normally
            nm1.NodeHeartbeat(am4.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am4.WaitForState(RMAppAttemptState.Failed);
            // can launch the 5th attempt successfully
            rm2.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            MockAM am5 = rm2.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 5, nm1);

            clock.Reset();
            am5.WaitForState(RMAppAttemptState.Running);
            // Fail attempt5 normally
            nm1.NodeHeartbeat(am5.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am5.WaitForState(RMAppAttemptState.Failed);
            rm2.WaitForState(app1.GetApplicationId(), RMAppState.Failed);
            rm1.Stop();
            rm2.Stop();
        }
 private YarnServerCommonServiceProtos.NMContainerStatusProto ConvertToProtoFormat
     (NMContainerStatus c)
 {
     return(((NMContainerStatusPBImpl)c).GetProto());
 }
Example #7
0
 public RMContainerRecoverEvent(ContainerId containerId, NMContainerStatus containerReport
                                )
     : base(containerId, RMContainerEventType.Recover)
 {
     this.containerReport = containerReport;
 }
        // Test verify for
        // 1. AM try to unregister without registering
        // 2. AM register to RM, and try to unregister immediately after RM restart
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRMClientForUnregisterAMOnRMRestart()
        {
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // Phase-1 Start 1st RM
            TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm1.Start();
            DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher();
            // Submit the application
            RMApp app = rm1.SubmitApp(1024);

            dispatcher.Await();
            MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId();

            rm1.SendAMLaunched(appAttemptId);
            dispatcher.Await();
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext
                                                                                     ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken
                                                                                     ();
            UserGroupInformation ugi = UserGroupInformation.GetCurrentUser();

            ugi.AddTokenIdentifier(token.DecodeIdentifier());
            AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl
                                                                    (rm1);

            amClient.Init(conf);
            amClient.Start();
            amClient.RegisterApplicationMaster("h1", 10000, string.Empty);
            amClient.Allocate(0.1f);
            // Phase-2 start 2nd RM is up
            TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm2.Start();
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2);
            dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher();
            // NM should be rebooted on heartbeat, even first heartbeat for nm2
            NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction());
            // new NM to represent NM re-register
            nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService());
            ContainerId       containerId     = ContainerId.NewContainerId(appAttemptId, 1);
            NMContainerStatus containerReport = NMContainerStatus.NewInstance(containerId, ContainerState
                                                                              .Running, Resource.NewInstance(1024, 1), "recover container", 0, Priority.NewInstance
                                                                                  (0), 0);

            nm1.RegisterNode(Arrays.AsList(containerReport), null);
            nm1.NodeHeartbeat(true);
            dispatcher.Await();
            amClient.UnregisterApplicationMaster(FinalApplicationStatus.Succeeded, null, null
                                                 );
            rm2.WaitForState(appAttemptId, RMAppAttemptState.Finishing);
            nm1.NodeHeartbeat(appAttemptId, 1, ContainerState.Complete);
            rm2.WaitForState(appAttemptId, RMAppAttemptState.Finished);
            rm2.WaitForState(app.GetApplicationId(), RMAppState.Finished);
            amClient.Stop();
            rm1.Stop();
            rm2.Stop();
        }