Exemple #1
0
 public AppAttemptInfo(ResourceManager rm, RMAppAttempt attempt, string user, string
                       schemePrefix)
 {
     this.startTime        = 0;
     this.containerId      = string.Empty;
     this.nodeHttpAddress  = string.Empty;
     this.nodeId           = string.Empty;
     this.logsLink         = string.Empty;
     this.blacklistedNodes = string.Empty;
     if (attempt != null)
     {
         this.id        = attempt.GetAppAttemptId().GetAttemptId();
         this.startTime = attempt.GetStartTime();
         Container masterContainer = attempt.GetMasterContainer();
         if (masterContainer != null)
         {
             this.containerId     = masterContainer.GetId().ToString();
             this.nodeHttpAddress = masterContainer.GetNodeHttpAddress();
             this.nodeId          = masterContainer.GetNodeId().ToString();
             this.logsLink        = WebAppUtils.GetRunningLogURL(schemePrefix + masterContainer.GetNodeHttpAddress
                                                                     (), ConverterUtils.ToString(masterContainer.GetId()), user);
             if (rm.GetResourceScheduler() is AbstractYarnScheduler)
             {
                 AbstractYarnScheduler ayScheduler = (AbstractYarnScheduler)rm.GetResourceScheduler
                                                         ();
                 SchedulerApplicationAttempt sattempt = ayScheduler.GetApplicationAttempt(attempt.
                                                                                          GetAppAttemptId());
                 if (sattempt != null)
                 {
                     blacklistedNodes = StringUtils.Join(sattempt.GetBlacklistedNodes(), ", ");
                 }
             }
         }
     }
 }
 public override void TransferStateFromPreviousAttempt(SchedulerApplicationAttempt
                                                       appAttempt)
 {
     lock (this)
     {
         base.TransferStateFromPreviousAttempt(appAttempt);
         this.headroomProvider = ((Org.Apache.Hadoop.Yarn.Server.Resourcemanager.Scheduler.Common.Fica.FiCaSchedulerApp
                                   )appAttempt).GetHeadroomProvider();
     }
 }
Exemple #3
0
 public void RecoverContainer(Org.Apache.Hadoop.Yarn.Api.Records.Resource clusterResource
                              , SchedulerApplicationAttempt schedulerAttempt, RMContainer rmContainer)
 {
     if (rmContainer.GetState().Equals(RMContainerState.Completed))
     {
         return;
     }
     this._enclosing.IncreaseUsedResources(rmContainer);
     this._enclosing.UpdateAppHeadRoom(schedulerAttempt);
     this._enclosing.UpdateAvailableResourcesMetrics();
 }
Exemple #4
0
 public static ICollection <string> GetBlacklistedNodes(ResourceManager rm, ApplicationAttemptId
                                                        appid)
 {
     if (rm.GetResourceScheduler() is AbstractYarnScheduler)
     {
         AbstractYarnScheduler ayScheduler = (AbstractYarnScheduler)rm.GetResourceScheduler
                                                 ();
         SchedulerApplicationAttempt attempt = ayScheduler.GetApplicationAttempt(appid);
         if (attempt != null)
         {
             return(attempt.GetBlacklistedNodes());
         }
     }
     return(null);
 }
 public override void UnreserveResource(SchedulerApplicationAttempt application)
 {
     lock (this)
     {
         // Cannot unreserve for wrong application...
         ApplicationAttemptId reservedApplication = GetReservedContainer().GetContainer().
                                                    GetId().GetApplicationAttemptId();
         if (!reservedApplication.Equals(application.GetApplicationAttemptId()))
         {
             throw new InvalidOperationException("Trying to unreserve " + " for application "
                                                 + application.GetApplicationId() + " when currently reserved " + " for application "
                                                 + reservedApplication.GetApplicationId() + " on node " + this);
         }
         SetReservedContainer(null);
         this.reservedAppSchedulable = null;
     }
 }
 public override void ReserveResource(SchedulerApplicationAttempt application, Priority
                                      priority, RMContainer container)
 {
     lock (this)
     {
         // Check if it's already reserved
         RMContainer reservedContainer = GetReservedContainer();
         if (reservedContainer != null)
         {
             // Sanity check
             if (!container.GetContainer().GetNodeId().Equals(GetNodeID()))
             {
                 throw new InvalidOperationException("Trying to reserve" + " container " + container
                                                     + " on node " + container.GetReservedNode() + " when currently" + " reserved resource "
                                                     + reservedContainer + " on node " + reservedContainer.GetReservedNode());
             }
             // Cannot reserve more than one application attempt on a given node!
             // Reservation is still against attempt.
             if (!reservedContainer.GetContainer().GetId().GetApplicationAttemptId().Equals(container
                                                                                            .GetContainer().GetId().GetApplicationAttemptId()))
             {
                 throw new InvalidOperationException("Trying to reserve" + " container " + container
                                                     + " for application " + application.GetApplicationAttemptId() + " when currently"
                                                     + " reserved container " + reservedContainer + " on node " + this);
             }
             if (Log.IsDebugEnabled())
             {
                 Log.Debug("Updated reserved container " + container.GetContainer().GetId() + " on node "
                           + this + " for application attempt " + application.GetApplicationAttemptId());
             }
         }
         else
         {
             if (Log.IsDebugEnabled())
             {
                 Log.Debug("Reserved container " + container.GetContainer().GetId() + " on node "
                           + this + " for application attempt " + application.GetApplicationAttemptId());
             }
         }
         SetReservedContainer(container);
     }
 }
Exemple #7
0
 public override void RecoverContainer(Org.Apache.Hadoop.Yarn.Api.Records.Resource
                                       clusterResource, SchedulerApplicationAttempt attempt, RMContainer rmContainer)
 {
     if (rmContainer.GetState().Equals(RMContainerState.Completed))
     {
         return;
     }
     // Careful! Locking order is important!
     lock (this)
     {
         FiCaSchedulerNode node = scheduler.GetNode(rmContainer.GetContainer().GetNodeId()
                                                    );
         base.AllocateResource(clusterResource, rmContainer.GetContainer().GetResource(),
                               node.GetLabels());
     }
     if (parent != null)
     {
         parent.RecoverContainer(clusterResource, attempt, rmContainer);
     }
 }
 public override void UnreserveResource(SchedulerApplicationAttempt application)
 {
     lock (this)
     {
         // adding NP checks as this can now be called for preemption
         if (GetReservedContainer() != null && GetReservedContainer().GetContainer() != null &&
             GetReservedContainer().GetContainer().GetId() != null && GetReservedContainer
                 ().GetContainer().GetId().GetApplicationAttemptId() != null)
         {
             // Cannot unreserve for wrong application...
             ApplicationAttemptId reservedApplication = GetReservedContainer().GetContainer().
                                                        GetId().GetApplicationAttemptId();
             if (!reservedApplication.Equals(application.GetApplicationAttemptId()))
             {
                 throw new InvalidOperationException("Trying to unreserve " + " for application "
                                                     + application.GetApplicationAttemptId() + " when currently reserved " + " for application "
                                                     + reservedApplication.GetApplicationId() + " on node " + this);
             }
         }
         SetReservedContainer(null);
     }
 }
 public override void ReserveResource(SchedulerApplicationAttempt application, Priority
                                      priority, RMContainer container)
 {
     lock (this)
     {
         // Check if it's already reserved
         RMContainer reservedContainer = GetReservedContainer();
         if (reservedContainer != null)
         {
             // Sanity check
             if (!container.GetContainer().GetNodeId().Equals(GetNodeID()))
             {
                 throw new InvalidOperationException("Trying to reserve" + " container " + container
                                                     + " on node " + container.GetReservedNode() + " when currently" + " reserved resource "
                                                     + reservedContainer + " on node " + reservedContainer.GetReservedNode());
             }
             // Cannot reserve more than one application on a given node!
             if (!reservedContainer.GetContainer().GetId().GetApplicationAttemptId().Equals(container
                                                                                            .GetContainer().GetId().GetApplicationAttemptId()))
             {
                 throw new InvalidOperationException("Trying to reserve" + " container " + container
                                                     + " for application " + application.GetApplicationId() + " when currently" + " reserved container "
                                                     + reservedContainer + " on node " + this);
             }
             Log.Info("Updated reserved container " + container.GetContainer().GetId() + " on node "
                      + this + " for application " + application);
         }
         else
         {
             Log.Info("Reserved container " + container.GetContainer().GetId() + " on node " +
                      this + " for application " + application);
         }
         SetReservedContainer(container);
         this.reservedAppSchedulable = (FSAppAttempt)application;
     }
 }
Exemple #10
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRestartWithExistingContainers()
        {
            YarnConfiguration conf = new YarnConfiguration();

            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 2);
            MockRM rm1 = new MockRM(conf);

            rm1.Start();
            RMApp app1 = rm1.SubmitApp(200, "name", "user", new Dictionary <ApplicationAccessType
                                                                            , string>(), false, "default", -1, null, "MAPREDUCE", false, true);
            MockNM nm1 = new MockNM("127.0.0.1:1234", 10240, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            MockNM nm2 = new MockNM("127.0.0.1:2351", 4089, rm1.GetResourceTrackerService());

            nm2.RegisterNode();
            MockAM am1           = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);
            int    NumContainers = 3;

            // allocate NUM_CONTAINERS containers
            am1.Allocate("127.0.0.1", 1024, NumContainers, new AList <ContainerId>());
            nm1.NodeHeartbeat(true);
            // wait for containers to be allocated.
            IList <Container> containers = am1.Allocate(new AList <ResourceRequest>(), new AList
                                                        <ContainerId>()).GetAllocatedContainers();

            while (containers.Count != NumContainers)
            {
                nm1.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, am1.Allocate(new AList <ResourceRequest>(),
                                                                    new AList <ContainerId>()).GetAllocatedContainers());
                Sharpen.Thread.Sleep(200);
            }
            // launch the 2nd container, for testing running container transferred.
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 2, ContainerState.Running);
            ContainerId containerId2 = ContainerId.NewContainerId(am1.GetApplicationAttemptId
                                                                      (), 2);

            rm1.WaitForState(nm1, containerId2, RMContainerState.Running);
            // launch the 3rd container, for testing container allocated by previous
            // attempt is completed by the next new attempt/
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 3, ContainerState.Running);
            ContainerId containerId3 = ContainerId.NewContainerId(am1.GetApplicationAttemptId
                                                                      (), 3);

            rm1.WaitForState(nm1, containerId3, RMContainerState.Running);
            // 4th container still in AQUIRED state. for testing Acquired container is
            // always killed.
            ContainerId containerId4 = ContainerId.NewContainerId(am1.GetApplicationAttemptId
                                                                      (), 4);

            rm1.WaitForState(nm1, containerId4, RMContainerState.Acquired);
            // 5th container is in Allocated state. for testing allocated container is
            // always killed.
            am1.Allocate("127.0.0.1", 1024, 1, new AList <ContainerId>());
            nm1.NodeHeartbeat(true);
            ContainerId containerId5 = ContainerId.NewContainerId(am1.GetApplicationAttemptId
                                                                      (), 5);

            rm1.WaitForContainerAllocated(nm1, containerId5);
            rm1.WaitForState(nm1, containerId5, RMContainerState.Allocated);
            // 6th container is in Reserved state.
            am1.Allocate("127.0.0.1", 6000, 1, new AList <ContainerId>());
            ContainerId containerId6 = ContainerId.NewContainerId(am1.GetApplicationAttemptId
                                                                      (), 6);

            nm1.NodeHeartbeat(true);
            SchedulerApplicationAttempt schedulerAttempt = ((AbstractYarnScheduler)rm1.GetResourceScheduler
                                                                ()).GetCurrentAttemptForContainer(containerId6);

            while (schedulerAttempt.GetReservedContainers().IsEmpty())
            {
                System.Console.Out.WriteLine("Waiting for container " + containerId6 + " to be reserved."
                                             );
                nm1.NodeHeartbeat(true);
                Sharpen.Thread.Sleep(200);
            }
            // assert containerId6 is reserved.
            NUnit.Framework.Assert.AreEqual(containerId6, schedulerAttempt.GetReservedContainers
                                                ()[0].GetContainerId());
            // fail the AM by sending CONTAINER_FINISHED event without registering.
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am1.WaitForState(RMAppAttemptState.Failed);
            // wait for some time. previous AM's running containers should still remain
            // in scheduler even though am failed
            Sharpen.Thread.Sleep(3000);
            rm1.WaitForState(nm1, containerId2, RMContainerState.Running);
            // acquired/allocated containers are cleaned up.
            NUnit.Framework.Assert.IsNull(rm1.GetResourceScheduler().GetRMContainer(containerId4
                                                                                    ));
            NUnit.Framework.Assert.IsNull(rm1.GetResourceScheduler().GetRMContainer(containerId5
                                                                                    ));
            // wait for app to start a new attempt.
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            // assert this is a new AM.
            ApplicationAttemptId newAttemptId = app1.GetCurrentAppAttempt().GetAppAttemptId();

            NUnit.Framework.Assert.IsFalse(newAttemptId.Equals(am1.GetApplicationAttemptId())
                                           );
            // launch the new AM
            RMAppAttempt attempt2 = app1.GetCurrentAppAttempt();

            nm1.NodeHeartbeat(true);
            MockAM am2 = rm1.SendAMLaunched(attempt2.GetAppAttemptId());
            RegisterApplicationMasterResponse registerResponse = am2.RegisterAppAttempt();

            // Assert two containers are running: container2 and container3;
            NUnit.Framework.Assert.AreEqual(2, registerResponse.GetContainersFromPreviousAttempts
                                                ().Count);
            bool containerId2Exists = false;
            bool containerId3Exists = false;

            foreach (Container container in registerResponse.GetContainersFromPreviousAttempts
                         ())
            {
                if (container.GetId().Equals(containerId2))
                {
                    containerId2Exists = true;
                }
                if (container.GetId().Equals(containerId3))
                {
                    containerId3Exists = true;
                }
            }
            NUnit.Framework.Assert.IsTrue(containerId2Exists && containerId3Exists);
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Running);
            // complete container by sending the container complete event which has earlier
            // attempt's attemptId
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 3, ContainerState.Complete);
            // Even though the completed container containerId3 event was sent to the
            // earlier failed attempt, new RMAppAttempt can also capture this container
            // info.
            // completed containerId4 is also transferred to the new attempt.
            RMAppAttempt newAttempt = app1.GetRMAppAttempt(am2.GetApplicationAttemptId());

            // 4 containers finished, acquired/allocated/reserved/completed.
            WaitForContainersToFinish(4, newAttempt);
            bool container3Exists = false;
            bool container4Exists = false;
            bool container5Exists = false;
            bool container6Exists = false;

            foreach (ContainerStatus status in newAttempt.GetJustFinishedContainers())
            {
                if (status.GetContainerId().Equals(containerId3))
                {
                    // containerId3 is the container ran by previous attempt but finished by the
                    // new attempt.
                    container3Exists = true;
                }
                if (status.GetContainerId().Equals(containerId4))
                {
                    // containerId4 is the Acquired Container killed by the previous attempt,
                    // it's now inside new attempt's finished container list.
                    container4Exists = true;
                }
                if (status.GetContainerId().Equals(containerId5))
                {
                    // containerId5 is the Allocated container killed by previous failed attempt.
                    container5Exists = true;
                }
                if (status.GetContainerId().Equals(containerId6))
                {
                    // containerId6 is the reserved container killed by previous failed attempt.
                    container6Exists = true;
                }
            }
            NUnit.Framework.Assert.IsTrue(container3Exists && container4Exists && container5Exists &&
                                          container6Exists);
            // New SchedulerApplicationAttempt also has the containers info.
            rm1.WaitForState(nm1, containerId2, RMContainerState.Running);
            // record the scheduler attempt for testing.
            SchedulerApplicationAttempt schedulerNewAttempt = ((AbstractYarnScheduler)rm1.GetResourceScheduler
                                                                   ()).GetCurrentAttemptForContainer(containerId2);

            // finish this application
            MockRM.FinishAMAndVerifyAppState(app1, rm1, nm1, am2);
            // the 2nd attempt released the 1st attempt's running container, when the
            // 2nd attempt finishes.
            NUnit.Framework.Assert.IsFalse(schedulerNewAttempt.GetLiveContainers().Contains(containerId2
                                                                                            ));
            // all 4 normal containers finished.
            System.Console.Out.WriteLine("New attempt's just finished containers: " + newAttempt
                                         .GetJustFinishedContainers());
            WaitForContainersToFinish(5, newAttempt);
            rm1.Stop();
        }
Exemple #11
0
 public override void RecoverContainer(Org.Apache.Hadoop.Yarn.Api.Records.Resource
                                       clusterResource, SchedulerApplicationAttempt schedulerAttempt, RMContainer rmContainer
                                       )
 {
 }
Exemple #12
0
 private void UpdateAppHeadRoom(SchedulerApplicationAttempt schedulerAttempt)
 {
     schedulerAttempt.SetHeadroom(Resources.Subtract(clusterResource, usedResource));
 }
Exemple #13
0
 public abstract void RecoverContainer(Org.Apache.Hadoop.Yarn.Api.Records.Resource
                                       arg1, SchedulerApplicationAttempt arg2, RMContainer arg3);