public virtual void TestContainerId() { ContainerId c1 = NewContainerId(1, 1, 10l, 1); ContainerId c2 = NewContainerId(1, 1, 10l, 2); ContainerId c3 = NewContainerId(1, 1, 10l, 1); ContainerId c4 = NewContainerId(1, 3, 10l, 1); ContainerId c5 = NewContainerId(1, 3, 8l, 1); NUnit.Framework.Assert.IsTrue(c1.Equals(c3)); NUnit.Framework.Assert.IsFalse(c1.Equals(c2)); NUnit.Framework.Assert.IsFalse(c1.Equals(c4)); NUnit.Framework.Assert.IsFalse(c1.Equals(c5)); NUnit.Framework.Assert.IsTrue(c1.CompareTo(c3) == 0); NUnit.Framework.Assert.IsTrue(c1.CompareTo(c2) < 0); NUnit.Framework.Assert.IsTrue(c1.CompareTo(c4) < 0); NUnit.Framework.Assert.IsTrue(c1.CompareTo(c5) > 0); NUnit.Framework.Assert.IsTrue(c1.GetHashCode() == c3.GetHashCode()); NUnit.Framework.Assert.IsFalse(c1.GetHashCode() == c2.GetHashCode()); NUnit.Framework.Assert.IsFalse(c1.GetHashCode() == c4.GetHashCode()); NUnit.Framework.Assert.IsFalse(c1.GetHashCode() == c5.GetHashCode()); long ts = Runtime.CurrentTimeMillis(); ContainerId c6 = NewContainerId(36473, 4365472, ts, 25645811); NUnit.Framework.Assert.AreEqual("container_10_0001_01_000001", c1.ToString()); NUnit.Framework.Assert.AreEqual(25645811, unchecked ((long)(0xffffffffffL)) & c6.GetContainerId ()); NUnit.Framework.Assert.AreEqual(0, c6.GetContainerId() >> 40); NUnit.Framework.Assert.AreEqual("container_" + ts + "_36473_4365472_25645811", c6 .ToString()); ContainerId c7 = NewContainerId(36473, 4365472, ts, 4298334883325L); NUnit.Framework.Assert.AreEqual(999799999997L, unchecked ((long)(0xffffffffffL)) & c7.GetContainerId()); NUnit.Framework.Assert.AreEqual(3, c7.GetContainerId() >> 40); NUnit.Framework.Assert.AreEqual("container_e03_" + ts + "_36473_4365472_999799999997" , c7.ToString()); ContainerId c8 = NewContainerId(36473, 4365472, ts, 844424930131965L); NUnit.Framework.Assert.AreEqual(1099511627773L, unchecked ((long)(0xffffffffffL)) & c8.GetContainerId()); NUnit.Framework.Assert.AreEqual(767, c8.GetContainerId() >> 40); NUnit.Framework.Assert.AreEqual("container_e767_" + ts + "_36473_4365472_1099511627773" , c8.ToString()); }
// TODO: The condition: containerId.getId() == 1 to determine an AM container // is not always true. private bool ShouldUploadLogs(ContainerId containerId, bool wasContainerSuccessful ) { // All containers if (this.retentionPolicy.Equals(ContainerLogsRetentionPolicy.AllContainers)) { return(true); } // AM Container only if (this.retentionPolicy.Equals(ContainerLogsRetentionPolicy.ApplicationMasterOnly )) { if ((containerId.GetContainerId() & ContainerId.ContainerIdBitmask) == 1) { return(true); } return(false); } // AM + Failing containers if (this.retentionPolicy.Equals(ContainerLogsRetentionPolicy.AmAndFailedContainersOnly )) { if ((containerId.GetContainerId() & ContainerId.ContainerIdBitmask) == 1) { return(true); } else { if (!wasContainerSuccessful) { return(true); } } return(false); } return(false); }
internal static ContainerId GetMockContainer(long id) { ApplicationId appId = Org.Mockito.Mockito.Mock <ApplicationId>(); Org.Mockito.Mockito.When(appId.GetClusterTimestamp()).ThenReturn(314159265L); Org.Mockito.Mockito.When(appId.GetId()).ThenReturn(3); ApplicationAttemptId appAttemptId = Org.Mockito.Mockito.Mock <ApplicationAttemptId >(); Org.Mockito.Mockito.When(appAttemptId.GetApplicationId()).ThenReturn(appId); Org.Mockito.Mockito.When(appAttemptId.GetAttemptId()).ThenReturn(0); ContainerId container = Org.Mockito.Mockito.Mock <ContainerId>(); Org.Mockito.Mockito.When(container.GetContainerId()).ThenReturn(id); Org.Mockito.Mockito.When(container.GetApplicationAttemptId()).ThenReturn(appAttemptId ); return(container); }
/// <exception cref="System.Exception"/> private void AmRestartTests(bool keepRunningContainers) { MockRM rm = new MockRM(conf); rm.Start(); RMApp app = rm.SubmitApp(200, "name", "user", new Dictionary <ApplicationAccessType , string>(), false, "default", -1, null, "MAPREDUCE", false, keepRunningContainers ); MockNM nm = new MockNM("127.0.0.1:1234", 10240, rm.GetResourceTrackerService()); nm.RegisterNode(); MockAM am0 = MockRM.LaunchAndRegisterAM(app, rm, nm); int NumContainers = 1; // allocate NUM_CONTAINERS containers am0.Allocate("127.0.0.1", 1024, NumContainers, new AList <ContainerId>()); nm.NodeHeartbeat(true); // wait for containers to be allocated. IList <Container> containers = am0.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers(); while (containers.Count != NumContainers) { nm.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, am0.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); Sharpen.Thread.Sleep(200); } // launch the 2nd container. ContainerId containerId2 = ContainerId.NewContainerId(am0.GetApplicationAttemptId (), 2); nm.NodeHeartbeat(am0.GetApplicationAttemptId(), containerId2.GetContainerId(), ContainerState .Running); rm.WaitForState(nm, containerId2, RMContainerState.Running); // Capture the containers here so the metrics can be calculated after the // app has completed. ICollection <RMContainer> rmContainers = rm.scheduler.GetSchedulerAppInfo(am0.GetApplicationAttemptId ()).GetLiveContainers(); // fail the first app attempt by sending CONTAINER_FINISHED event without // registering. ContainerId amContainerId = app.GetCurrentAppAttempt().GetMasterContainer().GetId (); nm.NodeHeartbeat(am0.GetApplicationAttemptId(), amContainerId.GetContainerId(), ContainerState .Complete); am0.WaitForState(RMAppAttemptState.Failed); long memorySeconds = 0; long vcoreSeconds = 0; // Calculate container usage metrics for first attempt. if (keepRunningContainers) { // Only calculate the usage for the one container that has completed. foreach (RMContainer c in rmContainers) { if (c.GetContainerId().Equals(amContainerId)) { AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c); memorySeconds += ru.GetMemorySeconds(); vcoreSeconds += ru.GetVcoreSeconds(); } else { // The remaining container should be RUNNING. NUnit.Framework.Assert.IsTrue("After first attempt failed, remaining container " + "should still be running. ", c.GetContainerState().Equals(ContainerState.Running )); } } } else { // If keepRunningContainers is false, all live containers should now // be completed. Calculate the resource usage metrics for all of them. foreach (RMContainer c in rmContainers) { AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c); memorySeconds += ru.GetMemorySeconds(); vcoreSeconds += ru.GetVcoreSeconds(); } } // wait for app to start a new attempt. rm.WaitForState(app.GetApplicationId(), RMAppState.Accepted); // assert this is a new AM. RMAppAttempt attempt2 = app.GetCurrentAppAttempt(); NUnit.Framework.Assert.IsFalse(attempt2.GetAppAttemptId().Equals(am0.GetApplicationAttemptId ())); // launch the new AM nm.NodeHeartbeat(true); MockAM am1 = rm.SendAMLaunched(attempt2.GetAppAttemptId()); am1.RegisterAppAttempt(); // allocate NUM_CONTAINERS containers am1.Allocate("127.0.0.1", 1024, NumContainers, new AList <ContainerId>()); nm.NodeHeartbeat(true); // wait for containers to be allocated. containers = am1.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()) .GetAllocatedContainers(); while (containers.Count != NumContainers) { nm.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, am1.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); Sharpen.Thread.Sleep(200); } rm.WaitForState(app.GetApplicationId(), RMAppState.Running); // Capture running containers for later use by metrics calculations. rmContainers = rm.scheduler.GetSchedulerAppInfo(attempt2.GetAppAttemptId()).GetLiveContainers (); // complete container by sending the container complete event which has // earlier attempt's attemptId amContainerId = app.GetCurrentAppAttempt().GetMasterContainer().GetId(); nm.NodeHeartbeat(am0.GetApplicationAttemptId(), amContainerId.GetContainerId(), ContainerState .Complete); MockRM.FinishAMAndVerifyAppState(app, rm, nm, am1); // Calculate container usage metrics for second attempt. foreach (RMContainer c_1 in rmContainers) { AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c_1); memorySeconds += ru.GetMemorySeconds(); vcoreSeconds += ru.GetVcoreSeconds(); } RMAppMetrics rmAppMetrics = app.GetRMAppMetrics(); NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", memorySeconds, rmAppMetrics.GetMemorySeconds()); NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", vcoreSeconds, rmAppMetrics .GetVcoreSeconds()); rm.Stop(); return; }
/// <exception cref="System.Exception"/> public virtual void TestUsageWithMultipleContainersAndRMRestart() { // Set max attempts to 1 so that when the first attempt fails, the app // won't try to start a new one. conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true); conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); MockRM rm0 = new MockRM(conf, memStore); rm0.Start(); MockNM nm = new MockNM("127.0.0.1:1234", 65536, rm0.GetResourceTrackerService()); nm.RegisterNode(); RMApp app0 = rm0.SubmitApp(200); rm0.WaitForState(app0.GetApplicationId(), RMAppState.Accepted); RMAppAttempt attempt0 = app0.GetCurrentAppAttempt(); ApplicationAttemptId attemptId0 = attempt0.GetAppAttemptId(); rm0.WaitForState(attemptId0, RMAppAttemptState.Scheduled); nm.NodeHeartbeat(true); rm0.WaitForState(attemptId0, RMAppAttemptState.Allocated); MockAM am0 = rm0.SendAMLaunched(attempt0.GetAppAttemptId()); am0.RegisterAppAttempt(); int NumContainers = 2; am0.Allocate("127.0.0.1", 1000, NumContainers, new AList <ContainerId>()); nm.NodeHeartbeat(true); IList <Container> conts = am0.Allocate(new AList <ResourceRequest>(), new AList <ContainerId >()).GetAllocatedContainers(); while (conts.Count != NumContainers) { nm.NodeHeartbeat(true); Sharpen.Collections.AddAll(conts, am0.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); Sharpen.Thread.Sleep(500); } // launch the 2nd and 3rd containers. foreach (Container c in conts) { nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c.GetId().GetContainerId(), ContainerState .Running); rm0.WaitForState(nm, c.GetId(), RMContainerState.Running); } // Get the RMContainers for all of the live containers, to be used later // for metrics calculations and comparisons. ICollection <RMContainer> rmContainers = rm0.scheduler.GetSchedulerAppInfo(attempt0 .GetAppAttemptId()).GetLiveContainers(); // Allow metrics to accumulate. int sleepInterval = 1000; int cumulativeSleepTime = 0; while (app0.GetRMAppMetrics().GetMemorySeconds() <= 0 && cumulativeSleepTime < 5000 ) { Sharpen.Thread.Sleep(sleepInterval); cumulativeSleepTime += sleepInterval; } // Stop all non-AM containers foreach (Container c_1 in conts) { if (c_1.GetId().GetContainerId() == 1) { continue; } nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c_1.GetId().GetContainerId(), ContainerState .Complete); rm0.WaitForState(nm, c_1.GetId(), RMContainerState.Completed); } // After all other containers have completed, manually complete the master // container in order to trigger a save to the state store of the resource // usage metrics. This will cause the attempt to fail, and, since the max // attempt retries is 1, the app will also fail. This is intentional so // that all containers will complete prior to saving. ContainerId cId = ContainerId.NewContainerId(attempt0.GetAppAttemptId(), 1); nm.NodeHeartbeat(attempt0.GetAppAttemptId(), cId.GetContainerId(), ContainerState .Complete); rm0.WaitForState(nm, cId, RMContainerState.Completed); // Check that the container metrics match those from the app usage report. long memorySeconds = 0; long vcoreSeconds = 0; foreach (RMContainer c_2 in rmContainers) { AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c_2); memorySeconds += ru.GetMemorySeconds(); vcoreSeconds += ru.GetVcoreSeconds(); } RMAppMetrics metricsBefore = app0.GetRMAppMetrics(); NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", memorySeconds, metricsBefore.GetMemorySeconds()); NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", vcoreSeconds, metricsBefore .GetVcoreSeconds()); // create new RM to represent RM restart. Load up the state store. MockRM rm1 = new MockRM(conf, memStore); rm1.Start(); RMApp app0After = rm1.GetRMContext().GetRMApps()[app0.GetApplicationId()]; // Compare container resource usage metrics from before and after restart. RMAppMetrics metricsAfter = app0After.GetRMAppMetrics(); NUnit.Framework.Assert.AreEqual("Vcore seconds were not the same after RM Restart" , metricsBefore.GetVcoreSeconds(), metricsAfter.GetVcoreSeconds()); NUnit.Framework.Assert.AreEqual("Memory seconds were not the same after RM Restart" , metricsBefore.GetMemorySeconds(), metricsAfter.GetMemorySeconds()); rm0.Stop(); rm0.Close(); rm1.Stop(); rm1.Close(); }
/// <exception cref="System.IO.IOException"/> public AllocateResponse Allocate(AllocateRequest request) { AllocateResponse response = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateResponse >(); IList <ResourceRequest> askList = request.GetAskList(); IList <Container> containers = new AList <Container>(); foreach (ResourceRequest req in askList) { if (!ResourceRequest.IsAnyLocation(req.GetResourceName())) { continue; } int numContainers = req.GetNumContainers(); for (int i = 0; i < numContainers; i++) { ContainerId containerId = ContainerId.NewContainerId(this._enclosing.GetContext() .GetApplicationAttemptId(), request.GetResponseId() + i); containers.AddItem(Container.NewInstance(containerId, NodeId.NewInstance("host" + containerId.GetContainerId(), 2345), "host" + containerId.GetContainerId() + ":5678" , req.GetCapability(), req.GetPriority(), null)); } } response.SetAllocatedContainers(containers); response.SetResponseId(request.GetResponseId() + 1); response.SetNumClusterNodes(350); return(response); }
// TODO later: add explicit "isUber()" checks of some sort /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> public virtual void TestContainerRollingLog() { if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists()) { Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test." ); return; } SleepJob sleepJob = new SleepJob(); JobConf sleepConf = new JobConf(mrCluster.GetConfig()); sleepConf.Set(MRJobConfig.MapLogLevel, Level.All.ToString()); long userLogKb = 4; sleepConf.SetLong(MRJobConfig.TaskUserlogLimit, userLogKb); sleepConf.SetInt(MRJobConfig.TaskLogBackups, 3); sleepConf.Set(MRJobConfig.MrAmLogLevel, Level.All.ToString()); long amLogKb = 7; sleepConf.SetLong(MRJobConfig.MrAmLogKb, amLogKb); sleepConf.SetInt(MRJobConfig.MrAmLogBackups, 7); sleepJob.SetConf(sleepConf); Job job = sleepJob.CreateJob(1, 0, 1L, 100, 0L, 0); job.SetJarByClass(typeof(SleepJob)); job.AddFileToClassPath(AppJar); // The AppMaster jar itself. job.WaitForCompletion(true); JobId jobId = TypeConverter.ToYarn(job.GetJobID()); ApplicationId appID = jobId.GetAppId(); int pollElapsed = 0; while (true) { Sharpen.Thread.Sleep(1000); pollElapsed += 1000; if (TerminalRmAppStates.Contains(mrCluster.GetResourceManager().GetRMContext().GetRMApps ()[appID].GetState())) { break; } if (pollElapsed >= 60000) { Log.Warn("application did not reach terminal state within 60 seconds"); break; } } NUnit.Framework.Assert.AreEqual(RMAppState.Finished, mrCluster.GetResourceManager ().GetRMContext().GetRMApps()[appID].GetState()); // Job finished, verify logs // string appIdStr = appID.ToString(); string appIdSuffix = Sharpen.Runtime.Substring(appIdStr, "application_".Length, appIdStr .Length); string containerGlob = "container_" + appIdSuffix + "_*_*"; string syslogGlob = appIdStr + Path.Separator + containerGlob + Path.Separator + TaskLog.LogName.Syslog; int numAppMasters = 0; int numMapTasks = 0; for (int i = 0; i < NumNodeMgrs; i++) { Configuration nmConf = mrCluster.GetNodeManager(i).GetConfig(); foreach (string logDir in nmConf.GetTrimmedStrings(YarnConfiguration.NmLogDirs)) { Path absSyslogGlob = new Path(logDir + Path.Separator + syslogGlob); Log.Info("Checking for glob: " + absSyslogGlob); FileStatus[] syslogs = localFs.GlobStatus(absSyslogGlob); foreach (FileStatus slog in syslogs) { bool foundAppMaster = job.IsUber(); Path containerPathComponent = slog.GetPath().GetParent(); if (!foundAppMaster) { ContainerId cid = ConverterUtils.ToContainerId(containerPathComponent.GetName()); foundAppMaster = ((cid.GetContainerId() & ContainerId.ContainerIdBitmask) == 1); } FileStatus[] sysSiblings = localFs.GlobStatus(new Path(containerPathComponent, TaskLog.LogName .Syslog + "*")); // sort to ensure for i > 0 sysSiblings[i] == "syslog.i" Arrays.Sort(sysSiblings); if (foundAppMaster) { numAppMasters++; } else { numMapTasks++; } if (foundAppMaster) { NUnit.Framework.Assert.AreSame("Unexpected number of AM sylog* files", sleepConf. GetInt(MRJobConfig.MrAmLogBackups, 0) + 1, sysSiblings.Length); NUnit.Framework.Assert.IsTrue("AM syslog.1 length kb should be >= " + amLogKb, sysSiblings [1].GetLen() >= amLogKb * 1024); } else { NUnit.Framework.Assert.AreSame("Unexpected number of MR task sylog* files", sleepConf .GetInt(MRJobConfig.TaskLogBackups, 0) + 1, sysSiblings.Length); NUnit.Framework.Assert.IsTrue("MR syslog.1 length kb should be >= " + userLogKb, sysSiblings[1].GetLen() >= userLogKb * 1024); } } } } // Make sure we checked non-empty set // NUnit.Framework.Assert.AreEqual("No AppMaster log found!", 1, numAppMasters); if (sleepConf.GetBoolean(MRJobConfig.JobUbertaskEnable, false)) { NUnit.Framework.Assert.AreEqual("MapTask log with uber found!", 0, numMapTasks); } else { NUnit.Framework.Assert.AreEqual("No MapTask log found!", 1, numMapTasks); } }